In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn import model_selection
import matplotlib.pyplot as plt
import torch
import scipy.stats as stats
import torch.nn as nn

In [2]:
# Lets grab our data from the pca transformation
X_numpy = np.load('pca_transformed_data_v1.npy', allow_pickle=True)
y_numpy = np.load('label.npy', allow_pickle=True)

In [3]:
X = torch.from_numpy(X_numpy.astype(np.float32)).squeeze()
y = torch.from_numpy(y_numpy.astype(np.float32))

In [4]:
#lets split our data
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2,
                                                                           random_state=51)

In [5]:
#send the data into gpu
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
X_train,X_test, y_train,y_test = X_train.to(device), X_test.to(device), y_train.to(device), y_test.to(device)

In [6]:
# lets create a model for hyperparameter tuning
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(18, 34)
        self.layer2 = nn.Linear(34, 1)
        self.relu = nn.ReLU()
        
        #add forward function
    def forward(self, x):
        return self.layer2(self.relu(self.layer1(x)))

In [7]:
model = NeuralNetwork().to(device)

In [8]:
#loss function & optimizer
loss_fn = torch.nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0009289305355089368, weight_decay=0.0007777563951641034)

In [9]:
#train and test loop
torch.mps.manual_seed(23)
torch.manual_seed(4)

epochs = 9308
subset = int(epochs * 0.05)
epoch_count = []
training_loss = []
testing_loss = []
for epoch in range(epochs):
    #turn on training mode
    model.train()
    
    #fit data
    train_pred = model(X_train)
    
    #calculate loss
    train_loss = loss_fn(train_pred, y_train)
    
    #zero gradients
    optimizer.zero_grad()
    
    #back propagation
    train_loss.backward()
    
    #gradient descent
    optimizer.step()
    
    #testing
    model.eval()
    with torch.inference_mode():
        test_pred = model(X_test)
        test_loss = loss_fn(test_pred, y_test)
    
    if epoch % subset == 0:
        training_loss.append(train_loss)
        epoch_count.append(epoch)
        testing_loss.append(test_loss)
        pred = test_pred.to('cpu')
        accuracy = stats.spearmanr(y_test.to('cpu'),pred)
        print(f'Epoch: {epoch} | train_loss: {train_loss} | test_loss: {test_loss} | test_spearman: {accuracy.statistic}')

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch: 0 | train_loss: 0.7189545035362244 | test_loss: 0.6860600709915161 | test_spearman: 0.002595686186852043
Epoch: 465 | train_loss: 0.579241931438446 | test_loss: 0.5426278710365295 | test_spearman: 0.0999170834943905
Epoch: 930 | train_loss: 0.5695499777793884 | test_loss: 0.5350172519683838 | test_spearman: 0.14629353790045133
Epoch: 1395 | train_loss: 0.5646200776100159 | test_loss: 0.5303875207901001 | test_spearman: 0.1678125485410012
Epoch: 1860 | train_loss: 0.5612270832061768 | test_loss: 0.527455747127533 | test_spearman: 0.18814033585107512
Epoch: 2325 | train_loss: 0.5584163069725037 | test_loss: 0.5253605246543884 | test_spearman: 0.19837313954237665
Epoch: 2790 | train_loss: 0.556374728679657 | test_loss: 0.523381769657135 | test_spearman: 0.21088019889306808
Epoch: 3255 | train_loss: 0.5548725724220276 | test_loss: 0.5219380259513855 | test_spearman: 0.22477937565389927
Epoch: 3720 | train_loss: 0.5536481142044067 | test_loss: 0.5208061337471008 | test_spearman: 0.23