Imports

In [855]:
import pandas as pd
import yfinance as yf
import numpy as np
import torch
import torch
from sklearn.model_selection import train_test_split
from torch import nn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

Data Gathering

In [856]:
ticker_symbol = "GOOGL" # "TSLA", "GOOGL", "AAPL"
ticker = yf.Ticker(ticker_symbol)
df = ticker.history(period="2y")
df = df.reset_index()

Data Pre-processing

In [857]:
df.drop(columns="Date", inplace=True)
df.drop(columns="Dividends", inplace=True)
df.drop(columns="Stock Splits", inplace=True)
df= df.iloc[:,:5]
df['Prev_EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean().shift(1)
df['Prev_RSI'] = (100 - (100 / (1 + (df['Close'].diff().where(df['Close'].diff() > 0, 0).rolling(14).mean() /
                                    (-df['Close'].diff().where(df['Close'].diff() < 0, 0).rolling(14).mean()))))).shift(1)
df['Prev_UpDown'] = (df['Close'].diff() > 0).astype(int)
df.dropna(inplace=True)
df.head(5)


Unnamed: 0,Open,High,Low,Close,Volume,Prev_EMA_10,Prev_RSI,Prev_UpDown
14,87.892362,88.959718,87.224018,88.80011,23453800,92.952101,15.426998,1
15,88.859969,89.997153,88.101844,89.358734,24745600,92.197194,18.045884,1
16,87.942242,88.3213,86.106783,87.543228,27658300,91.68111,20.425093,0
17,86.894832,89.328807,86.85493,89.009598,23003000,90.928768,19.16342,1
18,88.580662,88.720315,86.795082,87.174141,20097300,90.579828,25.676312,0


Calculate Target

In [858]:
df['Target'] = 0
df.loc[df['Close'].shift(-1) > df['Close'], 'Target'] = 1
df.dropna(inplace=True)
print("length: ", len(df))
df.head(10)

length:  489


Unnamed: 0,Open,High,Low,Close,Volume,Prev_EMA_10,Prev_RSI,Prev_UpDown,Target
14,87.892362,88.959718,87.224018,88.80011,23453800,92.952101,15.426998,1,1
15,88.859969,89.997153,88.101844,89.358734,24745600,92.197194,18.045884,1,0
16,87.942242,88.3213,86.106783,87.543228,27658300,91.68111,20.425093,0,1
17,86.894832,89.328807,86.85493,89.009598,23003000,90.928768,19.16342,1,0
18,88.580662,88.720315,86.795082,87.174141,20097300,90.579828,25.676312,0,0
19,86.765161,87.82254,85.727729,85.807526,19523200,89.960612,26.506611,0,1
20,86.406048,88.630535,86.39607,88.231522,23333500,89.205505,27.406275,1,0
21,86.765157,88.081896,86.356166,88.01207,23986300,89.028417,37.4403,0,1
22,89.368705,90.825105,88.301348,88.899872,28131200,88.843627,38.658774,1,0
23,90.126824,90.426086,87.05443,87.862434,34854800,88.853853,39.87436,0,0


In [859]:
X_data=df.iloc[:, :-1].values
y=df.iloc[:, -1].values
len(X_data), len(y)
X_data.shape
# print(df.columns)
# print(X_data[:5])
# print(y[:5])


(489, 8)

Standardizing data

In [860]:
scaler=StandardScaler()
X=scaler.fit_transform(X_data)
X[:3]

array([[-1.86093124, -1.8739054 , -1.85235814, -1.84235106, -0.52594379,
        -1.65878616, -2.46125246,  0.87120193],
       [-1.82510799, -1.83559056, -1.8195761 , -1.82156128, -0.42119193,
        -1.68693925, -2.29986781,  0.87120193],
       [-1.85908456, -1.89748366, -1.89408082, -1.88912721, -0.18500155,
        -1.70618577, -2.1532529 , -1.14783951]])

Creating Torches

In [861]:
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)
X.shape, y.shape
X_sample = X[0]
y_sample = y[0]
print(f"Values for sample 0 of X: {X_sample} and sample 0 for y: {y_sample}")
print(f"Shapes for sample 0 of X: {X_sample.shape} and same 0 for y: {y_sample.shape}")

Values for sample 0 of X: tensor([-1.8609, -1.8739, -1.8524, -1.8424, -0.5259, -1.6588, -2.4613,  0.8712]) and sample 0 for y: 1.0
Shapes for sample 0 of X: torch.Size([8]) and same 0 for y: torch.Size([])


Splitting test/train data

In [862]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,shuffle=True)

print(f"length of X_train, X_test, y_train, y_test: {len(X_train), len(X_test), len(y_train), len(y_test)}")

length of X_train, X_test, y_train, y_test: (391, 98, 391, 98)


In [863]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

Strock Prediction By Neural Network Class

In [864]:
class StockPredictionNN(nn.Module):
    def __init__(self, input_size):
        super(StockPredictionNN, self).__init__()
        self.model = nn.Sequential(
          nn.Linear(input_size, 64),
          nn.ReLU(),
          nn.Dropout(0.2),
          nn.Linear(64, 64),
          nn.ReLU(),
          nn.Dropout(0.2),
          nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.model(x)

In [865]:
model_1 = StockPredictionNN(input_size=X.shape[1]).to(device)

Optimizers/loss functions setting up

In [866]:
loss_fn = nn.BCEWithLogitsLoss()

optimizer_1 = torch.optim.SGD(params=model_1.parameters(),lr=0.01)


Accuracy Function

In [867]:
def accuracy_fn(y_true, y_predicted):
    correct = torch.eq(y_true, y_predicted).sum().item()
    acc = (correct / len(y_predicted)) * 100
    return acc

Model Training/Testing

In [868]:
torch.manual_seed(42)
epochs = 1000

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

test_losses_history = []
test_accuracies_history = []
test_precision_history = []
test_recall_history = []
test_f1_history = []
test_roc_auc_history = []

patience = 10
min_delta = 0.0001
best_loss = float('inf')
counter = 0

for epoch in range(epochs):
    y_logits = model_1(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))

    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_true=y_train, y_predicted=y_pred)

    optimizer_1.zero_grad()
    loss.backward()
    optimizer_1.step()

    model_1.eval()
    with torch.inference_mode():
        test_logits = model_1(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        test_proba = torch.sigmoid(test_logits).cpu().numpy()
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true=y_test, y_predicted=test_pred)

        test_labels_np = y_test.cpu().numpy()
        test_pred_np = test_pred.cpu().numpy()

        recall = recall_score(test_labels_np, test_pred_np)
        f1 = f1_score(test_labels_np, test_pred_np)
        roc_auc = roc_auc_score(test_labels_np, test_proba)
        precision = precision_score(test_labels_np, test_pred_np, zero_division=1)

        test_recall_history.append(recall)
        test_f1_history.append(f1)
        test_roc_auc_history.append(roc_auc)
        test_precision_history.append(precision)

    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, "
              f"Test Accuracy: {test_acc:.2f}% | Precision: {precision:.2f} | Recall: {recall:.2f} | "
              f"F1-Score: {f1:.2f} | ROC-AUC: {roc_auc:.2f}")

    if test_loss < best_loss - min_delta:
        best_loss = test_loss
        counter = 0
    else:
        counter += 1

    if counter >= patience:
        print(f"Early stopping triggered at epoch {epoch}. Best Test Loss: {best_loss:.5f}")
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, "
              f"Test Accuracy: {test_acc:.2f}% | Precision: {precision:.2f} | Recall: {recall:.2f} | "
              f"F1-Score: {f1:.2f} | ROC-AUC: {roc_auc:.2f}")
        break


Epoch: 0 | Loss: 0.69679, Accuracy: 49.62% | Test Loss: 0.69900, Test Accuracy: 47.96% | Precision: 0.59 | Recall: 0.33 | F1-Score: 0.43 | ROC-AUC: 0.49
Epoch: 10 | Loss: 0.69509, Accuracy: 46.80% | Test Loss: 0.69735, Test Accuracy: 45.92% | Precision: 0.55 | Recall: 0.39 | F1-Score: 0.45 | ROC-AUC: 0.49
Epoch: 20 | Loss: 0.69381, Accuracy: 46.80% | Test Loss: 0.69590, Test Accuracy: 44.90% | Precision: 0.53 | Recall: 0.40 | F1-Score: 0.46 | ROC-AUC: 0.49
Epoch: 30 | Loss: 0.69268, Accuracy: 49.10% | Test Loss: 0.69460, Test Accuracy: 45.92% | Precision: 0.54 | Recall: 0.47 | F1-Score: 0.50 | ROC-AUC: 0.49
Epoch: 40 | Loss: 0.69167, Accuracy: 51.92% | Test Loss: 0.69345, Test Accuracy: 47.96% | Precision: 0.56 | Recall: 0.51 | F1-Score: 0.53 | ROC-AUC: 0.49
Epoch: 50 | Loss: 0.69078, Accuracy: 53.96% | Test Loss: 0.69242, Test Accuracy: 53.06% | Precision: 0.59 | Recall: 0.61 | F1-Score: 0.60 | ROC-AUC: 0.49
Epoch: 60 | Loss: 0.68997, Accuracy: 52.94% | Test Loss: 0.69150, Test Accura