In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import torch as th
import torch.nn as nn

In [2]:
device = th.device("cuda" if th.cuda.is_available() else "cpu")

In [3]:
class Network(nn.Module):
    def __init__(self, n_inputs: int, n_outputs: int, hidden_size: int = 128, n_layers: int = 2):
        super().__init__()
        
        self.layers = nn.Sequential(
            nn.Linear(n_inputs, hidden_size),
            *[
                nn.ReLU() if i % 2 == 0 else nn.Linear(hidden_size, hidden_size) for i in range(n_layers*2)
            ],
            nn.ReLU(),
            nn.Linear(hidden_size, n_outputs),
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x: th.Tensor) -> th.Tensor:
        return self.softmax(self.layers(x))

In [4]:
batch_size = 64

In [5]:
df = pd.read_csv("datasets/training/EURUSD.csv")
df["Lable"] = (df["Trading_Price"].shift(-1)-df["Trading_Price"]).apply(lambda x: 1 if x > 0 else 0)
df = df.dropna()
features = th.from_numpy(df.drop(["Date", "Trading_Price", "Lable"], axis=1).to_numpy()).to(device=device).type(th.float32)
lable = th.from_numpy(df["Lable"].to_numpy()).to(device=device)
lable = nn.functional.one_hot(lable, num_classes=2).type(th.float32)
df.head()

Unnamed: 0,Date,Trading_Price,1H_x1_0,1H_x2_0,1H_x3_0,1H_x4_0,1H_x5_0,1H_x1_1,1H_x2_1,1H_x3_1,...,15m_x2_14,15m_x3_14,15m_x4_14,15m_x5_14,15m_x1_15,15m_x2_15,15m_x3_15,15m_x4_15,15m_x5_15,Lable
0,2023-09-20 04:00:00+00:00,1.06792,-8.4e-05,-0.00029,0.0,0.000234,3.7e-05,-0.000328,-0.000515,-0.000421,...,0.000197,0.000225,0.000206,0.000112,0.000318,-9e-06,5.6e-05,2.8e-05,0.000318,0
1,2023-09-20 04:15:00+00:00,1.06791,-8.4e-05,-0.00029,0.0,0.000234,3.7e-05,-0.000328,-0.000515,-0.000421,...,9e-06,9.4e-05,9.4e-05,0.00014,1.9e-05,0.000197,0.000225,0.000206,0.000112,0
2,2023-09-20 04:30:00+00:00,1.06785,-8.4e-05,-0.00029,0.0,0.000234,3.7e-05,-0.000328,-0.000515,-0.000421,...,2.8e-05,-9e-06,0.000112,0.000159,0.000122,9e-06,9.4e-05,9.4e-05,0.00014,1
3,2023-09-20 04:45:00+00:00,1.06788,-8.4e-05,-0.00029,0.0,0.000234,3.7e-05,-0.000328,-0.000515,-0.000421,...,-9e-06,-0.0003,9.4e-05,0.000468,9e-06,2.8e-05,-9e-06,0.000112,0.000159,1
4,2023-09-20 05:00:00+00:00,1.06797,0.000215,0.000262,9e-06,0.000281,0.000243,-8.4e-05,-0.00029,0.0,...,0.00014,0.000318,0.000206,0.000178,9e-06,-9e-06,-0.0003,9.4e-05,0.000468,1


In [6]:
features.shape, lable.shape

(torch.Size([25063, 100]), torch.Size([25063, 2]))

In [7]:
X_train, X_test, y_train, y_test = train_test_split(features, lable, test_size=0.2, random_state=42)

In [8]:
epochs = 100000
learning_rate = 0.001

model = Network(
    n_inputs=features.shape[1],
    n_outputs=2,
    hidden_size=256,
    n_layers=3
).to(device=device)

lossfn = nn.CrossEntropyLoss()
optimiser = th.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
model.layers

Sequential(
  (0): Linear(in_features=100, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=256, bias=True)
  (5): ReLU()
  (6): Linear(in_features=256, out_features=256, bias=True)
  (7): ReLU()
  (8): Linear(in_features=256, out_features=2, bias=True)
)

In [10]:
confidence_threshold = 0.80

In [11]:
model.train()
for epoch in range(epochs):
    y_pred = model(X_train)
    optimiser.zero_grad()
    loss = lossfn(y_pred, y_train)
    loss.backward()
    th.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimiser.step()
    
    
    if epoch % 100 == 0:
        model.eval()
        with th.inference_mode():
            y_pred = model(X_test)
        confident_indices = np.where(y_pred.cpu() >= confidence_threshold)[0]   
        acc = sum(y_test.argmax(dim=1).unsqueeze(dim=1).cpu()[confident_indices] == th.argmax(y_pred, dim=1).unsqueeze(dim=1).cpu()[confident_indices]) / max(len(y_pred[confident_indices]), 1)
        loss = lossfn(y_pred, y_test)
        
        print(f"Epoch: {epoch} | Loss: {loss} | Accuracy: {acc}")
        
    model.train()

Epoch: 0 | Loss: 0.6933451294898987 | Accuracy: 0.0
Epoch: 100 | Loss: 0.6876499056816101 | Accuracy: tensor([0.9000])
Epoch: 200 | Loss: 0.632662296295166 | Accuracy: tensor([0.7854])
Epoch: 300 | Loss: 0.6092607378959656 | Accuracy: tensor([0.8030])
Epoch: 400 | Loss: 0.6754302978515625 | Accuracy: tensor([0.6287])
Epoch: 500 | Loss: 0.6652454733848572 | Accuracy: tensor([0.6444])
Epoch: 600 | Loss: 0.613131046295166 | Accuracy: tensor([0.7272])
Epoch: 700 | Loss: 0.6150151491165161 | Accuracy: tensor([0.7192])
Epoch: 800 | Loss: 0.6191672682762146 | Accuracy: tensor([0.7101])
Epoch: 900 | Loss: 0.5936249494552612 | Accuracy: tensor([0.7549])
Epoch: 1000 | Loss: 0.758109450340271 | Accuracy: tensor([0.5474])
Epoch: 1100 | Loss: 0.6552843451499939 | Accuracy: tensor([0.6544])
Epoch: 1200 | Loss: 0.6045853495597839 | Accuracy: tensor([0.7343])
Epoch: 1300 | Loss: 0.6813902258872986 | Accuracy: tensor([0.6226])
Epoch: 1400 | Loss: 0.6311733722686768 | Accuracy: tensor([0.6910])
Epoch: 1

KeyboardInterrupt: 

In [33]:
columns = df.drop(["Date", "Trading_Price", "Lable"], axis=1).columns.to_numpy()

In [43]:
model.layers[0].weight.shape

torch.Size([256, 120])

In [51]:
columns[50]

'15m_x1_2'

In [12]:
th.save(model, "models/supervised_02.pt")