In [52]:
import pandas as pd
import numpy as np

In [11]:
df1 = pd.read_csv("../data/aapl_fundamentals.csv")

df2 = pd.read_csv("../data/aapl_model_dataset.csv")
df3 = pd.read_csv("../data/vix_with_daily_closing_return.csv")



In [12]:
df1.head()

Unnamed: 0,Quarter End Date,EBITDA (USD millions),EV (USD millions)
0,2025-09-30,35550,3790000
1,2025-06-30,31032,3060000
2,2025-03-31,32250,3340000
3,2024-12-31,45912,3920000
4,2024-09-30,32502,3550000


In [13]:
df2.head()

Unnamed: 0,date,close,volume,macd_line,macd_diff,macd_signal,rsi,ev_ebidta,vix_prct_returns,daily_prct_change
0,2016-03-31T00:00:00.000000000Z,24.73,114645816,0.559188,0.066089,0.493099,71.00847,38.940719,2.876106,-0.522928
1,2016-04-01T00:00:00.000000000Z,24.96,113856948,0.584986,0.07351,0.511476,73.167919,38.940719,-6.09319,0.930044
2,2016-04-04T00:00:00.000000000Z,25.21,157112212,0.618475,0.085599,0.532876,75.319795,38.940719,7.78626,1.001603
3,2016-04-05T00:00:00.000000000Z,24.92,111266284,0.61453,0.065323,0.549207,68.460985,38.940719,9.206799,-1.150337
4,2016-04-06T00:00:00.000000000Z,25.18,111477272,0.625177,0.060776,0.564401,71.009868,38.940719,-8.625162,1.043339


In [14]:
df3.head()

Unnamed: 0,DATE,OPEN,HIGH,LOW,CLOSE,Daily_Closing_Return
0,1990-01-02,17.24,17.24,17.24,17.24,
1,1990-01-03,18.19,18.19,18.19,18.19,0.055104
2,1990-01-04,19.22,19.22,19.22,19.22,0.056625
3,1990-01-05,20.11,20.11,20.11,20.11,0.046306
4,1990-01-08,20.26,20.26,20.26,20.26,0.007459


In [44]:
fund = pd.read_csv("../data/aapl_fundamentals.csv", parse_dates=["Quarter End Date"])
daily = pd.read_csv("../data/aapl_model_dataset.csv", parse_dates=["date"])
ohlc = pd.read_csv("../data/vix_with_daily_closing_return.csv", parse_dates=["DATE"])

print("Fundamentals shape:", fund.shape)
print("Daily engineered shape:", daily.shape)
print("OHLC shape:", ohlc.shape)


Fundamentals shape: (60, 3)
Daily engineered shape: (2416, 10)
OHLC shape: (9055, 6)


In [45]:
fund = fund.sort_values("Quarter End Date")

fund["EV (USD millions)"] = (
    fund["EV (USD millions)"]
    .astype(str)
    .str.replace(",", "", regex=False)
    .astype(float)
)

fund["EBITDA (USD millions)"] = (
    fund["EBITDA (USD millions)"]
    .astype(str)
    .str.replace(",", "", regex=False)
    .astype(float)
)

fund = fund.dropna(subset=["EV (USD millions)", "EBITDA (USD millions)"])

fund["ev_ebitda"] = fund["EV (USD millions)"] / fund["EBITDA (USD millions)"]

fund = fund.rename(columns={
    "Quarter End Date": "date",
    "EBITDA (USD millions)": "ebitda"
})[["date", "ebitda", "ev_ebitda"]]


In [46]:
print("Fundamentals shape:", fund.shape)

Fundamentals shape: (39, 3)


In [47]:
daily["date"] = pd.to_datetime(daily["date"])
fund["date"] = pd.to_datetime(fund["date"])

daily["date"] = daily["date"].dt.tz_localize(None)
# Re-sort after conversion (required by merge_asof)
daily = daily.sort_values("date")
fund = fund.sort_values("date")

# Verify types
print("daily date type:", daily["date"].dtype)
print("fund  date type:", fund["date"].dtype)

daily date type: datetime64[ns]
fund  date type: datetime64[ns]


In [48]:
daily = daily.sort_values("date")
daily.head()
daily = pd.merge_asof(
    daily,
    fund,
    on="date",
    direction="backward"
)

daily[["date", "close", "ebitda", "ev_ebitda"]].head(10)


Unnamed: 0,date,close,ebitda,ev_ebitda
0,2016-03-31,24.73,16464.0,38.940719
1,2016-04-01,24.96,16464.0,38.940719
2,2016-04-04,25.21,16464.0,38.940719
3,2016-04-05,24.92,16464.0,38.940719
4,2016-04-06,25.18,16464.0,38.940719
5,2016-04-07,24.63,16464.0,38.940719
6,2016-04-08,24.65,16464.0,38.940719
7,2016-04-11,24.74,16464.0,38.940719
8,2016-04-12,25.06,16464.0,38.940719
9,2016-04-13,25.42,16464.0,38.940719


In [49]:
ohlc = ohlc.rename(columns={
    "DATE": "date",
    "Daily_Closing_Return": "daily_return"
})

ohlc = ohlc[["date", "daily_return"]]
daily = pd.merge(daily, ohlc, on="date", how="left")

daily[["date", "close", "daily_return"]].head(10)

Unnamed: 0,date,close,daily_return
0,2016-03-31,24.73,0.028761
1,2016-04-01,24.96,-0.060932
2,2016-04-04,25.21,0.077863
3,2016-04-05,24.92,0.092068
4,2016-04-06,25.18,-0.086252
5,2016-04-07,24.63,0.146913
6,2016-04-08,24.65,-0.049505
7,2016-04-11,24.74,0.058594
8,2016-04-12,25.06,-0.086716
9,2016-04-13,25.42,-0.068013


In [51]:
daily = daily.drop(columns=["ev_ebidta"])

print("Daily engineered shape:", daily.shape)
daily.head()

Daily engineered shape: (2416, 12)


Unnamed: 0,date,close,volume,macd_line,macd_diff,macd_signal,rsi,vix_prct_returns,daily_prct_change,ebitda,ev_ebitda,daily_return
0,2016-03-31,24.73,114645816,0.559188,0.066089,0.493099,71.00847,2.876106,-0.522928,16464.0,38.940719,0.028761
1,2016-04-01,24.96,113856948,0.584986,0.07351,0.511476,73.167919,-6.09319,0.930044,16464.0,38.940719,-0.060932
2,2016-04-04,25.21,157112212,0.618475,0.085599,0.532876,75.319795,7.78626,1.001603,16464.0,38.940719,0.077863
3,2016-04-05,24.92,111266284,0.61453,0.065323,0.549207,68.460985,9.206799,-1.150337,16464.0,38.940719,0.092068
4,2016-04-06,25.18,111477272,0.625177,0.060776,0.564401,71.009868,-8.625162,1.043339,16464.0,38.940719,-0.086252


In [53]:
# log return at time t
daily["log_return"] = np.log(daily["close"] / daily["close"].shift(1))

# 5-day ahead forecast target (your baseline horizon)
daily["target"] = daily["log_return"].shift(-5)

daily = daily.dropna()

daily[["date", "close", "log_return", "target"]].head()

Unnamed: 0,date,close,log_return,target
1,2016-04-01,24.96,0.009257,0.000812
2,2016-04-04,25.21,0.009966,0.003644
3,2016-04-05,24.92,-0.01157,0.012852
4,2016-04-06,25.18,0.010379,0.014263
5,2016-04-07,24.63,-0.022085,0.000786


In [54]:
daily["time_idx"] = (daily["date"] - daily["date"].min()).dt.days
daily["symbol"] = "AAPL"
daily.to_csv("final_tft_dataset.csv", index=False)

print("✅ FINAL MODELING DATASET SAVED")
print("Shape:", daily.shape)
daily.head()

✅ FINAL MODELING DATASET SAVED
Shape: (2410, 16)


Unnamed: 0,date,close,volume,macd_line,macd_diff,macd_signal,rsi,vix_prct_returns,daily_prct_change,ebitda,ev_ebitda,daily_return,log_return,target,time_idx,symbol
1,2016-04-01,24.96,113856948,0.584986,0.07351,0.511476,73.167919,-6.09319,0.930044,16464.0,38.940719,-0.060932,0.009257,0.000812,0,AAPL
2,2016-04-04,25.21,157112212,0.618475,0.085599,0.532876,75.319795,7.78626,1.001603,16464.0,38.940719,0.077863,0.009966,0.003644,3,AAPL
3,2016-04-05,24.92,111266284,0.61453,0.065323,0.549207,68.460985,9.206799,-1.150337,16464.0,38.940719,0.092068,-0.01157,0.012852,4,AAPL
4,2016-04-06,25.18,111477272,0.625177,0.060776,0.564401,71.009868,-8.625162,1.043339,16464.0,38.940719,-0.086252,0.010379,0.014263,5,AAPL
5,2016-04-07,24.63,132677620,0.58252,0.014495,0.568025,59.969013,14.69127,-2.184273,16464.0,38.940719,0.146913,-0.022085,0.000786,6,AAPL
