In [75]:
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as tts
from scipy.ndimage import uniform_filter1d
from torch.utils.data import DataLoader
import methods_NN
import torch
from time import sleep
from tqdm import tqdm

# Plot setup
from pandas.plotting import register_matplotlib_converters
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
register_matplotlib_converters()
plt.rc("figure", figsize=(12, 8))
plt.rc("font", size=13)

In [76]:
def load_data(dim_type: str) -> list[np.array]:
    match dim_type.lower():
        case "full":
            file_names = [
                "data/trn_all.csv",
                "data/tst_all.csv",
            ]
        case "pca2":
            file_names = [
                "data/trn_pca2.csv",
                "data/tst_pca2.csv",
            ]
        case "pca10":
            file_names = [
                "data/trn_pca10.csv",
                "data/tst_pca10.csv",
            ]
        case other:
            raise KeyError("dim_type must be: 'full', 'pca2', or 'pca10'")
    
    file_names += ["data/trn_labs.csv", "data/tst_labs.csv"]
    
    return (pd.read_csv(f).to_numpy() for f in file_names)

In [77]:
# Data prep
# data_train.__len__() -> 2, 2, 2, 3, 3, 3, 7, 151
# data_test.__len__() -> 2, 2, 2, 3, 7, 151
batch_size = 151

data_train, data_test, label_train, label_test = load_data("full")

label_train_reshaped = np.zeros((len(label_train), 2))
label_test_reshaped = np.zeros((len(label_test), 2))
for i, (trn, tst) in enumerate(zip(label_train, label_test)):
    label_train_reshaped[i, int(trn)] = 1
    label_test_reshaped[i, int(tst)] = 1

data_train = methods_NN.NumbersDataset(data_train, label_train_reshaped)
data_test = methods_NN.NumbersDataset(data_test, label_train_reshaped)

loader_train = DataLoader(data_train, batch_size=batch_size, shuffle=True, pin_memory=True)
loader_test = DataLoader(data_test, batch_size=batch_size, shuffle=True, pin_memory=True)

In [78]:
class Network(nn.Module):
    def __init__(self, hidden_layer_dims: list[int], input_size: int, device: torch.device) -> None:
        super(Network, self).__init__()
        self.device = device
        
        self.relu = nn.ReLU().to(device)
        self.l1 = nn.Linear(input_size, hidden_layer_dims[0]).to(device)
        self.ln = nn.Linear(hidden_layer_dims[-1], 2).to(device)
        self.ls = [self.l1]
        
        for i in range(len(hidden_layer_dims) - 1):
            self.ls.append(
                nn.Linear(hidden_layer_dims[i], hidden_layer_dims[i+1]).to(device)
            )
        self.ls.append(self.ln)
        
    def forward(self, x: np.array) -> np.array:
        out = self.relu(self.ls[0](x))
        for l in self.ls[1:-1]:
            out = self.relu(l(out))
        out = self.ls[-1](out)
        
        return out

In [79]:
# Hyperparams and device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
epochs = 20
learning_rate = 0.01

In [80]:
# Model prep
model = Network(
    hidden_layer_dims=[100, 250, 500, 250, 100, 10],
    input_size=21,
    device=device
).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [82]:
# Train the data
num_steps = len(loader_train)
loss_list = list()

for epoch in tqdm(range(epochs)):
    for i, (value, label) in enumerate(loader_train):
        sample = value.reshape(batch_size, -1).to(device)
        label = label.view(label.shape[0], 2).to(device)
        
        # forward
        output = model(sample)
        loss = criterion(output, label)
        loss_list.append(loss.item())
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

100%|██████████| 20/20 [00:42<00:00,  2.13s/it]


In [84]:
# Test the data
with torch.no_grad():
    out_list = []
    label_list = []
    n_samples = 0
    n_diff = 0
    
    for value, label in loader_test:
        sample = value.to(device)
        label = label.view(label.shape[0], 2).to(device)
        label_list += label.tolist()
        
        output = model(sample)
        out_list += output.tolist()
        n_diff += torch.mean(torch.abs(output-label))
        n_samples += 1
    
    acc = n_diff/n_samples

In [85]:
plt.plot(out_list, '*', label='Guess')
plt.plot(label_list, '*', label='Truth')
plt.legend()
plt.show()

In [None]:
# Summary