# Neural Network Classifier

In this notebook I explore building a neural netwrok classifier from lagged returns data. As with the SVM, this model is fundamentally flawed due to the efficient markets hypothesis, but again this provides good practice for building and backtesting models.

I use PyTorch and Lightning here. This is obviously overkill for such a simple NN classifier however, it is once again a good learning experience.

In [40]:
import pandas as pd
import lightning as L
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split

In [41]:
class Classifier(nn.Module):
    def __init__(self, input_size, output_size, hidden_l1, hidden_l2):
        super().__init__()
    
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, hidden_l1),
            nn.ReLU(),
            nn.Linear(hidden_l1, hidden_l2),
            nn.ReLU(),
            nn.Linear(hidden_l2, output_size),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [42]:
class LitClassifier(L.LightningModule):
    def __init__(self, Classifier, learning_rate):
        super().__init__()
        self.Classifier = Classifier
        self.learning_rate= learning_rate
        self.BCE = torch.nn.BCEWithLogitsLoss()

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        # x = x.view(x.size(0), -1)
        x = self.Classifier(x)
        loss = self.BCE(x, y)
        return loss

    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        x, y = batch
        # x = x.view(x.size(0), -1)
        x = self.Classifier(x)
        val_loss = self.BCE(x, y)
        self.log("val_loss", val_loss)
    
    def test_step(self, batch, batch_idx):
        # this is the test loop
        x, y = batch
        # x = x.view(x.size(0), -1)
        x = self.Classifier(x)
        test_loss = self.BCE(x, y)
        self.log("test_loss", test_loss)

In [43]:
class MarketDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file,index_col=0)
        self.features = self.data.iloc[:,8:14].values  # Select all columns except the last one
        self.labels = self.data.iloc[:,7].values  # Select the last column

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features = torch.FloatTensor(self.features[idx])
        label = torch.FloatTensor([self.labels[idx]])  # Assuming market direction is -1 or +1

        return features, label

In [44]:
class MarketDataModule(L.LightningDataModule):
    def __init__(self, train_dir: str = "./EURUSD_train.csv",test_dir: str = "./EURUSD_test.csv" , batch_size: int = 32):
        super().__init__()
        self.train_dir = train_dir
        self.test_dir = test_dir
        self.batch_size = batch_size

    def setup(self, stage: str):
        if stage == "fit":
            markets_full = MarketDataset(self.train_dir)
            total = len(markets_full)
            train_val = round(total * 0.8)
            lengths = [train_val, total - train_val]
            self.markets_train, self.markets_val = random_split(
                markets_full, lengths, generator=torch.Generator().manual_seed(42)
            )

        elif stage == 'test':
            self.market_test = MarketDataset(self.test_dir)
        # self.market_predict = MarketDataset(self.da_dir)


    def train_dataloader(self):
        return DataLoader(self.markets_train, 
                        batch_size=self.batch_size,
                        drop_last=True,
                        shuffle=False,
                    )

    def val_dataloader(self):
        return DataLoader(self.markets_val, 
                        batch_size=self.batch_size,
                        drop_last=True,
                        shuffle=False,
                    )

    def test_dataloader(self):
        return DataLoader(self.markets_test, batch_size=self.batch_size)

    def predict_dataloader(self):
        return DataLoader(self.markets_predict, batch_size=self.batch_size)


In [46]:
marketsDataset = MarketDataModule()
model= LitClassifier(Classifier(5,1,10,10),learning_rate=1e-3)

# train model
trainer = L.Trainer(max_epochs=10,
                    default_root_dir="./checkpoints/")
trainer.fit(model=model, train_dataloaders=marketsDataset)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./checkpoints/lightning_logs

  | Name       | Type              | Params
-------------------------------------------------
0 | Classifier | Classifier        | 181   
1 | BCE        | BCEWithLogitsLoss | 0     
-------------------------------------------------
181       Trainable params
0         Non-trainable params
181       Total params
0.001     Total estimated model params size (MB)


                                                                           

/Users/edroberts/opt/anaconda3/envs/DNN_trading/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/edroberts/opt/anaconda3/envs/DNN_trading/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 100/100 [00:00<00:00, 165.26it/s, v_num=0]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 100/100 [00:00<00:00, 163.78it/s, v_num=0]
