In [29]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import torch.utils.data
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pytorchtools import EarlyStopping

In [30]:
# load train data and train labels
X_train = pd.read_csv('data/EstrogenReceptorStatus_Train.csv',index_col=0)
y_train = pd.read_csv('data/EstrogenReceptorStatus_Train_labels.txt',header=None)

# convert them to numpy arrays
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)

In [31]:
# load test data and test labels
X_test = pd.read_csv('data/EstrogenReceptorStatus_Test.csv',index_col=0)
y_test = pd.read_csv('data/EstrogenReceptorStatus_Test_labels.txt',header=None)

# convert them to numpy arrays
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)

In [32]:
# split training data and labels into training and validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=14)
# check training = 60%, validation = 20% and test = 20%
print(len(y_train), len(y_val), len(y_test))

162 54 55


In [33]:
# convert features (data) to tensors
X_train = torch.FloatTensor(X_train)
X_val = torch.FloatTensor(X_val)
X_test = torch.FloatTensor(X_test)

# convert labels to tensors
y_train = torch.LongTensor(y_train)[:,0]
y_val = torch.FloatTensor(y_val)[:,0]
y_test = torch.LongTensor(y_test)[:,0]


In [34]:
epochs = 100
batch_size = 10
n_features = 162
patience = 20
n_examples_train = len(y_train)
n_examples_test = len(y_test)

In [35]:
def step(weighted_sum):
    most_probable = weighted_sum.argmax().item()
    output = 0 if most_probable < 0 else 1
    output = torch.tensor(output)
    return output

In [36]:
# create the Neural Network, it is a FeedForward
class Network(nn.Module):
    # create the layers of the Network: input layer, two hidden layers, output layer.
    def __init__(self, in_features=n_features, h1=30, h2=30, out_features=1):
        super().__init__() # instantiate the model
        torch.manual_seed(14)
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.out = nn.Linear(h2, out_features)

    # set the activations functions that will be used in every layer
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.sigmoid(self.out(x))
        return x

In [37]:
network = Network()
# set the criterion model to measure the loss/error
loss_criterion = nn.BCELoss()
# set the optimizer and learning rate
optimizer = torch.optim.Adam(network.parameters(), lr=0.01)
# define early stopping function
early_stopping = EarlyStopping(patience=patience, verbose=True)

In [38]:
# train the model
losses_train = []
stop = (n_examples_train//batch_size)*batch_size

# convert the output of the softmax to 0 or 1, taking 0.5 as the thresholds
def toBinary(prediction):
    output = []
    for pred in prediction:
        if pred < 0.5: output.append(0)
        else: output.append(1)
    return output

def training(a, b):
    # go for a prediction
    y_pred_train = network.forward(X_train[a:b,:])
    # measure the loss/error
    loss = loss_criterion(y_pred_train, torch.unsqueeze(y_train[a:b], 1).float())
    # keep track of the losses
    losses_train.append(loss.detach().numpy()) # we dont want it to save it as a tensor

    # back propagation
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

# function for the test/evaluation in which we dont want back propagation, it takes into acount mini batches
# variable that counts how many correct predictions we've got in the evaluation
correct = 0
def evaluation(a,b):
    with torch.no_grad():
        for i, data in enumerate(X_test[a:b,]):
            y_eval = network.forward(data)
            y_eval = toBinary(y_eval)
            # get the number of correct predicitions
            print(y_eval,y_test[i].item())
            '''if y_eval.argmax().item() == y_test[i].item(): 
                global correct
                correct += 1'''
            if y_eval[0] == y_test[i].item():
                global correct
                correct += 1

# in this function we get the indexes for the batches                
def get_batches(n_examples, train=1): # train is used to decide if the we are on training or in testing
    for begin in range(0, n_examples, batch_size):
        # indexes for all the batches but the last one
        if begin != stop:
            # define the index for the last example that will be taken into acount in the current batch
            final = begin+batch_size
            # decide if it is training or testing
            if train: training(begin, final)
            else: evaluation(begin, final)
        # indexes fot the last batch
        else: 
            # decide if it is training or testing
            if train: training(begin, X_train.shape[0])
            else: evaluation(begin, X_test.shape[0])

In [39]:
# train the model
losses_train = []
losses_val = []

for epoch in range(epochs):
    get_batches(n_examples_train)
    # VALIDATE THE MODEL
    y_pred_val = network.forward(X_val)
    # measure the loss/error
    loss = loss_criterion(y_pred_val, torch.unsqueeze(y_val, 1).float())
    #keep track of the losses
    losses_val.append(loss.detach().numpy())
    early_stopping(np.average(losses_val), network)
    if early_stopping.early_stop:
        print("Early stopping")
        break
print(losses_train[0].item(), losses_train[-1].item())
print(losses_val[0].item(), losses_val[-1].item())

Validation loss decreased (inf --> 0.646033).  Saving model ...
Validation loss decreased (0.646033 --> 0.627268).  Saving model ...
Validation loss decreased (0.627268 --> 0.613320).  Saving model ...
Validation loss decreased (0.613320 --> 0.608634).  Saving model ...
Validation loss decreased (0.608634 --> 0.608564).  Saving model ...
Validation loss decreased (0.608564 --> 0.606676).  Saving model ...
Validation loss decreased (0.606676 --> 0.604908).  Saving model ...
Validation loss decreased (0.604908 --> 0.603716).  Saving model ...
Validation loss decreased (0.603716 --> 0.602676).  Saving model ...
Validation loss decreased (0.602676 --> 0.601615).  Saving model ...
Validation loss decreased (0.601615 --> 0.600601).  Saving model ...
Validation loss decreased (0.600601 --> 0.599660).  Saving model ...
Validation loss decreased (0.599660 --> 0.598878).  Saving model ...
Validation loss decreased (0.598878 --> 0.597975).  Saving model ...
Validation loss decreased (0.597975 -->

In [40]:
# test the network
get_batches(n_examples_test, 0)
print(f'Accuracy: {correct/len(y_test)}')

[1] 1
[1] 1
[1] 1
[1] 0
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 0
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 0
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 0
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 0
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[1] 1
[0] 0
[1] 1
Accuracy: 0.9090909090909091


In [41]:
'''
# weight and biases run
import wandb
# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="EstrogenReceptor_FeedForwardNN",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.02,
    "architecture": "CNN",
    "dataset": "CIFAR-100",
    "epochs": 10,
    }
)

losses = []
for epoch in range(epochs):
    for batch in range(0, )
    # go for a prediction
    y_pred = network.forward(X_train)
    # measure the loss/error
    loss = loss_criterion(y_pred, y_train)
    # back propagation
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()    
# [optional] finish the wandb run, necessary in notebooks
wandb.finish()
'''

'\n# weight and biases run\nimport wandb\n# start a new wandb run to track this script\nwandb.init(\n    # set the wandb project where this run will be logged\n    project="EstrogenReceptor_FeedForwardNN",\n    \n    # track hyperparameters and run metadata\n    config={\n    "learning_rate": 0.02,\n    "architecture": "CNN",\n    "dataset": "CIFAR-100",\n    "epochs": 10,\n    }\n)\n\nlosses = []\nfor epoch in range(epochs):\n    for batch in range(0, )\n    # go for a prediction\n    y_pred = network.forward(X_train)\n    # measure the loss/error\n    loss = loss_criterion(y_pred, y_train)\n    # back propagation\n    loss.backward()\n    optimizer.step()\n    optimizer.zero_grad()    \n# [optional] finish the wandb run, necessary in notebooks\nwandb.finish()\n'