In [None]:
import torch 
import torch.nn as nn
import numpy as np
import pandas as pd
import yfinance as yf

In [None]:
tsla = yf.Ticker("tsla")

tsla_hist = tsla.history(period='5y', interval='1d', start='2020-01-01')
tsla_hist

In [None]:
tsla_hist.drop(columns=['Dividends', 'Stock Splits'], inplace=True)

In [None]:
tsla_hist.index =pd.to_numeric(tsla_hist.index)
tsla_hist.index = tsla_hist.index/(max(tsla_hist.index))

In [None]:
for col in tsla_hist.columns:
    tsla_hist[col] = tsla_hist[col]/max(tsla_hist[col])

In [None]:
class MLP(nn.Module):
    def __init__(self, input_features, hidden_features, output_features):
        super().__init__()
        self.l1 = nn.Sequential(
            nn.Linear(input_features, hidden_features),
            nn.ReLU()
        )
        self.l2 = nn.Sequential(
            nn.Linear(hidden_features, output_features),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        return x

In [None]:
X = tsla_hist.drop(columns=['Close'])
y = tsla_hist['Close']

In [None]:
X['Date'] = X.index

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=5)

pca.fit(X, y)

In [None]:
import matplotlib.pyplot as plt

plt.bar(X.columns, pca.explained_variance_)
plt.xlabel('Features')
plt.ylabel('PCA Importance')
plt.title("PCA Decompostion")
plt.show()

In [None]:
for delay in range(1,4):
    X[f"Delay {delay}"] = y.iloc[(3-delay):-(delay)]

In [None]:
X

In [None]:
X_sample = [[d, h, l, o, vol, v1, v2, v3] for d, h, l, o, vol, v1, v2, v3 in zip(X['Date'].iloc[3:].values, X['High'].iloc[3:].values, X['Low'].iloc[3:].values,X['Open'].iloc[3:].values, X['Volume'].iloc[3:].values, X['Delay 1'].dropna().values, X['Delay 2'].dropna().values, X['Delay 3'].dropna().values)]
y_sample = y.iloc[3:].values

In [None]:
len(X['Date'].iloc[3:].values), len(X_sample)

In [None]:
len(y_sample), len(X_sample)

In [None]:
X = torch.from_numpy(np.array(X))
y = torch.from_numpy(np.array(y))

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
len(X_test)

In [None]:
train_data = [(X,y) for X, y in zip(X[:-273], y[:-273])]
test_data = [(X,y) for X, y in zip(X[-273:], y[-273:])]

In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
test_dataloaader= DataLoader(test_data, batch_size=32, shuffle=True)

In [27]:
mlpmodel = MLP(8, 32, 1)

In [30]:
loss_fn = nn.MSELoss()

optimizer = torch.optim.SGD(params=mlpmodel.parameters(), lr=0.01)