In [15]:
import torch
from torch import nn
import numpy as np
import pandas as pd
import yfinance as yf

In [16]:
start_date = "1980-01-01"
end_date = "2022-04-11"

df = yf.download("AAPL", start_date, end_date, auto_adjust=False)
df

[*********************100%***********************]  1 of 1 completed


Price,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1980-12-12,0.098834,0.128348,0.128906,0.128348,0.128348,469033600
1980-12-15,0.093678,0.121652,0.122210,0.121652,0.122210,175884800
1980-12-16,0.086802,0.112723,0.113281,0.112723,0.113281,105728000
1980-12-17,0.088951,0.115513,0.116071,0.115513,0.115513,86441600
1980-12-18,0.091530,0.118862,0.119420,0.118862,0.118862,73449600
...,...,...,...,...,...,...
2022-04-04,175.787796,178.440002,178.490005,174.440002,174.570007,76468400
2022-04-05,172.458038,175.059998,178.300003,174.419998,177.500000,73401800
2022-04-06,169.276031,171.830002,173.630005,170.130005,172.360001,89058800
2022-04-07,169.581451,172.139999,173.360001,169.850006,171.160004,77594700


In [17]:
df.columns = df.columns.droplevel(1)
df["Ret"] = df.loc[:, "Adj Close"].pct_change()
df = df.dropna()

df.tail()

Price,Adj Close,Close,High,Low,Open,Volume,Ret
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-04-04,175.787796,178.440002,178.490005,174.440002,174.570007,76468400,0.023693
2022-04-05,172.458038,175.059998,178.300003,174.419998,177.5,73401800,-0.018942
2022-04-06,169.276031,171.830002,173.630005,170.130005,172.360001,89058800,-0.018451
2022-04-07,169.581451,172.139999,173.360001,169.850006,171.160004,77594700,0.001804
2022-04-08,167.56192,170.089996,171.779999,169.199997,171.779999,76575500,-0.011909


In [18]:
cum_returns_dict = {f"Ret_{window}":((1 + df["Ret"]).rolling(window).apply(np.prod) - 1) for window in [25, 60, 90, 120, 240]}
cum_returns_df = pd.DataFrame(cum_returns_dict)
cum_returns_df["Ret"] = df["Ret"]

In [19]:
cum_returns_df = cum_returns_df.iloc[:, [-1, 0, 1, 2, 3, 4]]
cum_returns_df.tail()

Unnamed: 0_level_0,Ret,Ret_25,Ret_60,Ret_90,Ret_120,Ret_240
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-04,0.023693,0.080668,0.038763,0.106916,0.26442,0.360471
2022-04-05,-0.018942,0.072672,0.018081,0.082395,0.245752,0.311052
2022-04-06,-0.018451,0.03164,-0.00082,0.097181,0.198525,0.283041
2022-04-07,0.001804,0.035553,-0.01554,0.075632,0.191735,0.288512
2022-04-08,-0.011909,0.04241,-0.029757,0.030289,0.163803,0.280887


In [20]:
cum_returns_df["future_120_ret"] = cum_returns_df["Ret"].shift(-120)
cum_returns_df["label"] = (cum_returns_df["future_120_ret"] > 0.0).astype(int)
cum_returns_df.drop(columns=["future_120_ret"], inplace=True)
df = cum_returns_df.dropna()
df.head(20)

Unnamed: 0_level_0,Ret,Ret_25,Ret_60,Ret_90,Ret_120,Ret_240,label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1981-11-24,-0.006897,-0.082808,-0.105594,-0.253888,-0.430831,-0.373913,0
1981-11-25,0.020832,-0.063702,-0.140353,-0.234378,-0.397544,-0.325692,0
1981-11-27,0.02721,-0.032061,-0.132186,-0.165746,-0.393577,-0.252478,1
1981-11-30,-0.013244,-0.019736,-0.096974,-0.198931,-0.408732,-0.280194,1
1981-12-01,0.0,-0.019736,-0.085895,-0.223962,-0.433461,-0.300474,1
1981-12-02,0.006712,-0.032267,-0.05064,-0.250002,-0.423078,-0.336285,0
1981-12-03,-0.013334,-0.075007,-0.063299,-0.233165,-0.428573,-0.375528,0
1981-12-04,0.027026,-0.037983,-0.044033,-0.200006,-0.401578,-0.384619,0
1981-12-07,0.006579,-0.043758,-0.025487,-0.223354,-0.388004,-0.41154,0
1981-12-08,-0.019606,-0.062507,-0.013156,-0.250002,-0.397593,-0.471832,0


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10180 entries, 1981-11-24 to 2022-04-08
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Ret      10180 non-null  float64
 1   Ret_25   10180 non-null  float64
 2   Ret_60   10180 non-null  float64
 3   Ret_90   10180 non-null  float64
 4   Ret_120  10180 non-null  float64
 5   Ret_240  10180 non-null  float64
 6   label    10180 non-null  int64  
dtypes: float64(6), int64(1)
memory usage: 636.2 KB


In [22]:
# 80% train, 20% test
df_train = df.iloc[:int(0.8 * len(df))]
df_test = df.iloc[int(0.8 * len(df)):]

print("df_train.shape", df_train.shape)
print("df_test.shape", df_test.shape)

X_train, y_train = df_train.iloc[:, :-1].values, df_train.iloc[:, -1].values
X_test, y_test = df_test.iloc[:, :-1].values, df_test.iloc[:, -1].values

print("X_train.shape", X_train.shape)
print("y_train.shape", y_train.shape)
print("X_test.shape", X_test.shape)
print("y_test.shape", y_test.shape)

df_train.shape (8144, 7)
df_test.shape (2036, 7)
X_train.shape (8144, 6)
y_train.shape (8144,)
X_test.shape (2036, 6)
y_test.shape (2036,)


In [23]:
import tensorflow as tf