In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn import model_selection
import matplotlib.pyplot as plt
import torch
import scipy.stats as stats
import torch.nn as nn

In [2]:
# Lets grab our data from the pca transformation
X_numpy = np.load('pca_transformed_data_v1.npy', allow_pickle=True)
y_numpy = np.load('label.npy', allow_pickle=True)

In [3]:
X_numpy.shape

(1494, 18)

In [4]:
X = torch.from_numpy(X_numpy.astype(np.float32)).squeeze()
y = torch.from_numpy(y_numpy.astype(np.float32))

In [5]:
#lets split our data
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2,
                                                                           random_state=52)

In [6]:
#send the data into gpu
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
X_train,X_test, y_train,y_test = X_train.to(device), X_test.to(device), y_train.to(device), y_test.to(device)

In [7]:
# lets create a model for hyperparameter tuning
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(18, 34)
        self.layer2 = nn.Linear(34, 1)
        self.relu = nn.ReLU()
        
        #add forward function
    def forward(self, x):
        return self.layer2(self.relu(self.layer1(x)))

In [8]:
model = NeuralNetwork().to(device)

In [9]:
#loss function & optimizer
loss_fn = torch.nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0009289305355089368, weight_decay=0.0007777563951641034)

In [10]:
#train and test loop
torch.mps.manual_seed(34)
torch.manual_seed(110)

epochs = 9308
subset = int(epochs * 0.05)
epoch_count = []
training_loss = []
testing_loss = []
for epoch in range(epochs):
    #turn on training mode
    model.train()
    
    #fit data
    train_pred = model(X_train)
    
    #calculate loss
    train_loss = loss_fn(train_pred, y_train)
    
    #zero gradients
    optimizer.zero_grad()
    
    #back propagation
    train_loss.backward()
    
    #gradient descent
    optimizer.step()
    
    #testing
    model.eval()
    with torch.inference_mode():
        test_pred = model(X_test)
        test_loss = loss_fn(test_pred, y_test)
    
    if epoch % subset == 0:
        training_loss.append(train_loss)
        epoch_count.append(epoch)
        testing_loss.append(test_loss)
        pred = test_pred.to('cpu')
        accuracy = stats.spearmanr(y_test.to('cpu'),pred)
        print(f'Epoch: {epoch} | train_loss: {train_loss} | test_loss: {test_loss} | test_spearman: {accuracy.statistic}')

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch: 0 | train_loss: 0.6128159165382385 | test_loss: 0.6664394736289978 | test_spearman: -0.099776524575603
Epoch: 465 | train_loss: 0.5653883218765259 | test_loss: 0.6251594424247742 | test_spearman: -0.03449278979753924
Epoch: 930 | train_loss: 0.550709068775177 | test_loss: 0.6148455142974854 | test_spearman: 0.02469587499742863
Epoch: 1395 | train_loss: 0.5447897911071777 | test_loss: 0.6096615791320801 | test_spearman: 0.06663051314952195
Epoch: 1860 | train_loss: 0.5416805148124695 | test_loss: 0.6074155569076538 | test_spearman: 0.08413863197847828
Epoch: 2325 | train_loss: 0.539467453956604 | test_loss: 0.6059355139732361 | test_spearman: 0.09103997327995379
Epoch: 2790 | train_loss: 0.5379705429077148 | test_loss: 0.6045100688934326 | test_spearman: 0.09416449910173677
Epoch: 3255 | train_loss: 0.536628782749176 | test_loss: 0.6032556295394897 | test_spearman: 0.09749822449721958
Epoch: 3720 | train_loss: 0.5354858040809631 | test_loss: 0.6023893356323242 | test_spearman: 0.