### AI Thursdays Session 9 - Lab 2
- S & P index direction classifier

References: ["Python for Algorithmic Trading", Yves Hilpisch, Chapter 5, O'Reilly](https://github.com/yhilpisch/py4at)

#### Imports

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
import time
import random
import os
from typing import List, Tuple, Dict, Any


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.4 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\adeid\miniconda3\envs\ai_thursdays_env_312\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\adeid\miniconda3\envs\ai_thursdays_env_312\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\adeid\miniconda3\envs\ai_thursdays_env_312\Lib\site-packages\ipykernel\kernelapp.py", line 758,

#### Define global constants/variables

In [None]:
# Define the random seed for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Specify the device CPU or GPU
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# Specify the S & P data path
S_P_DATA_PATH = './data/sp500_stocks.csv'  

# Data partition fractions
TEST_SIZE = 0.15
VAL_SIZE = 0.1765 # 0.1765 of the remaining 0.85 gives ~0.15 of total


Device: cpu


#### Get the S & P data

In [10]:
def get_sp_data(
    file_path: str = S_P_DATA_PATH,
    symbol: str = "MSFT"
    ) -> pd.DataFrame:
    """
    Load S & P data from a CSV file.
    :param file_path: Path to the CSV file.
    :param symbol: Stock symbol to filter the data.
    :return: DataFrame containing the S & P data.
    """
    data_df = pd.read_csv(file_path, index_col=False)
    data_df = data_df[data_df['Symbol'] == symbol]
    data_df = data_df.sort_values(by='Date').reset_index(drop=True)
    return data_df

sp_data = get_sp_data()
print(sp_data.tail())
print(sp_data.info())

            Date Symbol   Adj Close       Close        High         Low  \
3763  2024-12-16   MSFT  451.589996  451.589996  452.179993  445.279999   
3764  2024-12-17   MSFT  454.459991  454.459991  455.290009  449.570007   
3765  2024-12-18   MSFT  437.390015  437.390015  452.649994  437.019989   
3766  2024-12-19   MSFT  437.029999  437.029999  443.179993  436.320007   
3767  2024-12-20   MSFT  436.600006  436.600006  443.739990  428.630005   

            Open      Volume  
3763  447.269989  23598800.0  
3764  451.010010  22733500.0  
3765  451.320007  24444500.0  
3766  441.619995  22963700.0  
3767  433.109985  64235200.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3768 entries, 0 to 3767
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       3768 non-null   object 
 1   Symbol     3768 non-null   object 
 2   Adj Close  3768 non-null   float64
 3   Close      3768 non-null   float64
 4   High       3

#### DNN Classifier class to classifier market direction

In [None]:
class DNNMarketDirectionClassifier:
    def __init__(
        self, 
        input_size: int, 
        hidden_sizes: List[int], 
        output_size: int, 
        dropout: float = 0.5
        ):
        """
        Initialize the DNN classifier.
        :param input_size: Size of the input features.
        :param hidden_sizes: List of sizes for hidden layers.
        :param output_size: Size of the output layer.
        :param dropout: Dropout rate for regularization.        
        """
        layers = []
        in_size = input_size
        for h_size in hidden_sizes:
            layers.append(nn.Linear(in_size, h_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_size = h_size
        layers.append(nn.Linear(in_size, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)