In [1]:
import torch
from torch import nn, Tensor
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, Dataset, DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time
import copy
from imblearn.over_sampling import SMOTE
torch.set_num_threads(4)
torch.set_num_interop_threads(4)



In [2]:
with open('train_df_std') as f:
    X_train = pd.read_csv(f)

with open('val_df_std') as f:
    X_val = pd.read_csv(f)

with open('test_df_std') as f:
    X_test = pd.read_csv(f)

In [3]:
y_train=X_train.Cancer
y_test=X_test.Cancer
y_val=X_val.Cancer
X_train=X_train.drop("Cancer",axis=1)
X_val=X_val.drop("Cancer",axis=1)
X_test=X_test.drop("Cancer",axis=1)


In [4]:
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [5]:
dataset = TensorDataset( Tensor(X_train.values), Tensor(y_train.values.astype(int)))
train_loader = DataLoader(dataset, batch_size= 50)

In [6]:
dataset = TensorDataset( Tensor(X_val.values), Tensor(y_val.values.astype(int)) )
val_loader = DataLoader(dataset, batch_size= 50)

In [7]:
dataset = TensorDataset( Tensor(X_test.values), Tensor(y_test.values.astype(int)) )
test_loader = DataLoader(dataset, batch_size= 50)

https://discuss.pytorch.org/t/load-dataframe-in-torch/47436/3
https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
https://datascience.stackexchange.com/questions/45916/loading-own-train-data-and-labels-in-dataloader-using-pytorch

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self,layer_1,layer_2,layer_3):
        super(NeuralNetwork,self).__init__()
        #self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.BatchNorm1d(layer_1),
            nn.Linear(layer_1, layer_2),
            #nn.ReLU(),
            #nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(layer_2, layer_3),
            nn.Sigmoid()
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [9]:
# hyperparameter values to consider.
hiddens=[64]  #256
lrs=[1e-6] #1e-4
layer_1= X_train.shape[1]
layer_3=1
loss_function = nn.BCELoss()

In [10]:
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [11]:
def eval_model(model,data_loader):
    model.eval()
    y_true_list=[]
    y_pred_list=[]
    model.eval()
    for x,y in data_loader:
        outputs=model(x)
        y_pred = torch.where(outputs >.45,1,0)   #.49
        y_pred_list.extend(y_pred.clone().detach().tolist())
        y_true_list.extend(y.clone().detach().tolist())
    return classification_report(y_true_list, y_pred_list)

best_model=None
acclist=[]

for h in hiddens:
    for lr in lrs:
        net = NeuralNetwork(layer_1, h, layer_3)
        optimizer = torch.optim.SGD(net.parameters(), lr=lr)
        epoch=0
        for i in range(30):
            epoch+=1
            net.train()
            for x,y in train_loader:
                optimizer.zero_grad()
                outputs = net(x)
                loss= loss_function(outputs,y.unsqueeze(1))
                loss.backward()
                torch.nn.utils.clip_grad_value_(net.parameters(), .005)  #.005
                optimizer.step()
            if epoch%1 ==0:
                #acclist.append(eval_model(net,val_loader))
                print(eval_model(net,val_loader))
         
            
            

              precision    recall  f1-score   support

         0.0       0.90      0.62      0.73     39244
         1.0       0.14      0.49      0.22      5027

    accuracy                           0.60     44271
   macro avg       0.52      0.55      0.48     44271
weighted avg       0.82      0.60      0.68     44271

              precision    recall  f1-score   support

         0.0       0.90      0.62      0.73     39244
         1.0       0.14      0.49      0.22      5027

    accuracy                           0.60     44271
   macro avg       0.52      0.55      0.48     44271
weighted avg       0.82      0.60      0.68     44271

              precision    recall  f1-score   support

         0.0       0.90      0.62      0.73     39244
         1.0       0.14      0.49      0.22      5027

    accuracy                           0.60     44271
   macro avg       0.52      0.55      0.48     44271
weighted avg       0.82      0.60      0.68     44271

              preci

KeyboardInterrupt: 

In [13]:
print(eval_model(net,test_loader))

              precision    recall  f1-score   support

         0.0       0.91      0.60      0.72     39245
         1.0       0.14      0.52      0.22      5027

    accuracy                           0.59     44272
   macro avg       0.52      0.56      0.47     44272
weighted avg       0.82      0.59      0.66     44272

