In [None]:
import pandas as pd

In [None]:
df_news_numbers = pd.read_csv("economic_calendar.csv")
df_btc_price = pd.read_csv("df_binance.csv").drop(columns=["Unnamed: 0", "symbol", "interval"])
display(df_btc_price)
display(df_news_numbers)

In [None]:
df_news_numbers["evento"].value_counts().head(20)

In [None]:
df = df_news_numbers.loc[df_news_numbers["evento"].isin([
    "Crude Oil Inventories",
    "Initial Jobless Claims",
    "Fed Interest Rate Decision"
])].copy()

In [None]:
mask_crude = df["evento"] == "Crude Oil Inventories"
mask_initial_jobless_claims = df["evento"] == "Initial Jobless Claims"
mask_fed_interest_rate_decision = df["evento"] == "Fed Interest Rate Decision"
mask = mask_crude | mask_initial_jobless_claims | mask_fed_interest_rate_decision

for col in ["atual", "previsao", "anterior"]:
    df.loc[mask_crude, col] = (
        df.loc[mask_crude, col].str.replace("M", "").astype(float) * 1_000_000
    )
    df.loc[mask_initial_jobless_claims, col] = (
        df.loc[mask_initial_jobless_claims, col].str.replace("K", "").str.replace(",", "").astype(float) * 1_000
    )
    df.loc[mask_fed_interest_rate_decision, col] = (
        df.loc[mask_fed_interest_rate_decision, col].str.replace("%", "").astype(float) / 100
    )

df.loc[mask, "diferenca"] = (
    df.loc[mask, "atual"]
    - df.loc[mask, "previsao"]
)
df

In [None]:
final_df = pd.DataFrame({
    "data": df["data"],
    "Initial Jobless Claims": df["diferenca"].where(df["evento"] == "Initial Jobless Claims"),
    "Crude Oil Inventories": df["diferenca"].where(df["evento"] == "Crude Oil Inventories"),
    "Fed Interest Rate Decision": df["diferenca"].where(df["evento"] == "Fed Interest Rate Decision"),
})
final_df = final_df.fillna(0)
display(final_df)
final_df.to_csv("economic_features.csv", index=False)

In [None]:
powell = pd.read_csv("data/powell_classificacoes.csv", parse_dates=["date"])
powell

In [None]:
event_selection = "Fed Chair Powell Speaks"
df_selected_events = df_news_numbers.loc[df_news_numbers["evento"] == event_selection]
event_dates = pd.to_datetime(df_selected_events["data"]).dt.date.unique()
df_btc_price["date"] = pd.to_datetime(df_btc_price["Date"]).dt.date
df_btc_price_events = df_btc_price.loc[df_btc_price["date"].isin(event_dates)]

df_dict = pd.DataFrame({
    "Date": df_btc_price_events["Date"],
    "BTC_daily_returns": None,
    "BTC_daily_volume": df_btc_price_events["volume"],
    "BTC_returns_since_last_event": None,
})
BTC_daily_returns = []
BTC_returns_since_last_event = []
for date in df_btc_price_events["Date"]:
    btc_close_price_today = df_btc_price.loc[df_btc_price["Date"] == date, "close"].values[0]
    btc_open_price_today = df_btc_price.loc[df_btc_price["Date"] == date, "open"].values[0]

    daily_return = (btc_close_price_today - btc_open_price_today) / btc_open_price_today
    BTC_daily_returns.append(daily_return)
    
    last_event_date = df_selected_events.loc[pd.to_datetime(df_selected_events["data"]).dt.date < pd.to_datetime(date).date(), "data"]
    if len(last_event_date) == 0:
        BTC_returns_since_last_event.append(None)
    else:
        last_event_date = last_event_date.iloc[-1]
        last_event_date = f"{last_event_date[:4]}-{last_event_date[5:7]}-{last_event_date[8:10]}"
        btc_price_last_event = df_btc_price.loc[df_btc_price["Date"] == last_event_date, "close"].values[0]
        return_since_last_event = (btc_open_price_today - btc_price_last_event) / btc_price_last_event
        BTC_returns_since_last_event.append(return_since_last_event)

df_dict["BTC_daily_returns"] = BTC_daily_returns
df_dict["BTC_returns_since_last_event"] = BTC_returns_since_last_event
display(df_dict.head(30))