In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import torch as th
import torch.nn as nn

In [2]:
device = th.device("cuda" if th.cuda.is_available() else "cpu")

In [3]:
class Network(nn.Module):
    def __init__(self, n_inputs: int, n_outputs: int, hidden_size: int = 128, n_layers: int = 2):
        super().__init__()
        
        self.layers = nn.Sequential(
            nn.Linear(n_inputs, hidden_size),
            *[
                nn.ReLU() if i % 2 == 0 else nn.Linear(hidden_size, hidden_size) for i in range(n_layers*2)
            ],
            nn.ReLU(),
            nn.Linear(hidden_size, n_outputs),
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x: th.Tensor) -> th.Tensor:
        return self.softmax(self.layers(x))

In [4]:
batch_size = 64

In [5]:
df = pd.read_csv("datasets/training/EURUSD.csv")
df["Lable"] = (df["Trading_Price"].shift(-1)-df["Trading_Price"]).apply(lambda x: 1 if x > 0 else 0)
df = df.dropna()
features = th.from_numpy(df.drop(["Date", "Trading_Price", "Lable"], axis=1).to_numpy()).to(device=device).type(th.float32)
lable = th.from_numpy(df["Lable"].to_numpy()).to(device=device)
lable = nn.functional.one_hot(lable, num_classes=2).type(th.float32)
df.head()

Unnamed: 0,Date,Trading_Price,4H_x1_0,4H_x2_0,4H_x3_0,4H_x4_0,4H_x5_0,4H_x1_1,4H_x2_1,4H_x3_1,...,15m_x2_14,15m_x3_14,15m_x4_14,15m_x5_14,15m_x1_15,15m_x2_15,15m_x3_15,15m_x4_15,15m_x5_15,Lable
0,2023-09-20 16:00:00+00:00,1.07259,-0.005189,0.000895,-0.003208,0.007327,6.6e-05,0.001187,0.002074,0.000243,...,-0.000327,-0.000411,0.000187,0.000533,0.000196,-9e-06,-4.7e-05,0.000327,0.000308,1
1,2023-09-20 16:15:00+00:00,1.07369,-0.005189,0.000895,-0.003208,0.007327,6.6e-05,0.001187,0.002074,0.000243,...,0.000131,0.000309,0.000318,0.000224,-0.000187,-0.000327,-0.000411,0.000187,0.000533,0
2,2023-09-20 16:30:00+00:00,1.07359,-0.005189,0.000895,-0.003208,0.007327,6.6e-05,0.001187,0.002074,0.000243,...,0.000794,0.00015,4.7e-05,0.001139,0.0,0.000131,0.000309,0.000318,0.000224,0
3,2023-09-20 16:45:00+00:00,1.07353,-0.005189,0.000895,-0.003208,0.007327,6.6e-05,0.001187,0.002074,0.000243,...,-1.9e-05,0.00057,0.00043,0.000168,0.001066,0.000794,0.00015,4.7e-05,0.001139,0
4,2023-09-20 17:00:00+00:00,1.07312,-0.005189,0.000895,-0.003208,0.007327,6.6e-05,0.001187,0.002074,0.000243,...,0.000411,3.7e-05,0.000327,0.000644,-0.000402,-1.9e-05,0.00057,0.00043,0.000168,0


In [6]:
features.shape, lable.shape

(torch.Size([25015, 120]), torch.Size([25015, 2]))

In [7]:
X_train, X_test, y_train, y_test = train_test_split(features, lable, test_size=0.2, random_state=42)

In [8]:
epochs = 100000
learning_rate = 0.001

model = Network(
    n_inputs=features.shape[1],
    n_outputs=2,
    hidden_size=256,
    n_layers=3
).to(device=device)

lossfn = nn.CrossEntropyLoss()
optimiser = th.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
model.layers

Sequential(
  (0): Linear(in_features=120, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=256, bias=True)
  (5): ReLU()
  (6): Linear(in_features=256, out_features=256, bias=True)
  (7): ReLU()
  (8): Linear(in_features=256, out_features=2, bias=True)
)

In [12]:
confidence_threshold = 0.80

In [13]:
model.train()
for epoch in range(epochs):
    y_pred = model(X_train)
    optimiser.zero_grad()
    loss = lossfn(y_pred, y_train)
    loss.backward()
    th.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimiser.step()
    
    
    if epoch % 100 == 0:
        model.eval()
        with th.inference_mode():
            y_pred = model(X_test)
        confident_indices = np.where(y_pred.cpu() >= confidence_threshold)[0]   
        acc = sum(y_test.argmax(dim=1).unsqueeze(dim=1).cpu()[confident_indices] == th.argmax(y_pred, dim=1).unsqueeze(dim=1).cpu()[confident_indices]) / max(len(y_pred[confident_indices]), 1)
        loss = lossfn(y_pred, y_test)
        
        print(f"Epoch: {epoch} | Loss: {loss} | Accuracy: {acc}")
        
    model.train()

Epoch: 0 | Loss: 0.6931703686714172 | Accuracy: 0.0
Epoch: 100 | Loss: 0.6865331530570984 | Accuracy: tensor([0.6667])
Epoch: 200 | Loss: 0.6460449695587158 | Accuracy: tensor([0.7320])
Epoch: 300 | Loss: 0.6311432719230652 | Accuracy: tensor([0.7615])
Epoch: 400 | Loss: 0.6689441204071045 | Accuracy: tensor([0.6564])
Epoch: 500 | Loss: 0.6112592816352844 | Accuracy: tensor([0.7882])
Epoch: 600 | Loss: 0.7039888501167297 | Accuracy: tensor([0.5896])
Epoch: 700 | Loss: 0.6104170680046082 | Accuracy: tensor([0.7575])
Epoch: 800 | Loss: 0.5899274945259094 | Accuracy: tensor([0.7892])
Epoch: 900 | Loss: 0.583843469619751 | Accuracy: tensor([0.7915])
Epoch: 1000 | Loss: 0.6301195621490479 | Accuracy: tensor([0.6944])
Epoch: 1100 | Loss: 0.5973305702209473 | Accuracy: tensor([0.7518])
Epoch: 1200 | Loss: 0.5814186930656433 | Accuracy: tensor([0.7874])
Epoch: 1300 | Loss: 0.6247776746749878 | Accuracy: tensor([0.6984])
Epoch: 1400 | Loss: 0.5965507626533508 | Accuracy: tensor([0.7426])
Epoch:

KeyboardInterrupt: 

In [33]:
columns = df.drop(["Date", "Trading_Price", "Lable"], axis=1).columns.to_numpy()

In [43]:
model.layers[0].weight.shape

torch.Size([256, 120])

In [51]:
columns[50]

'15m_x1_2'

In [18]:
th.save(model, "models/supervised_01.pt")