In [8]:
#import statements
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [9]:
#Data class
class testData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)


In [10]:
#model class
class binaryClassification(nn.Module):
    def __init__(self):
        super(binaryClassification, self).__init__()
        # Number of input features is 12.
        self.layer_1 = nn.Linear(12, 64) #layer 1
        self.layer_2 = nn.Linear(64, 64) #layer 2 input matches output of prev layer
        self.layer_out = nn.Linear(64, 1) #output layer
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x

In [11]:
#Data loaders (to look at data) and deal with data (training and testing)
class trainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)



In [17]:
#accuracy calculatory rounds output to 0 or 1
def binary_acc(y_pred, y_test): 
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [18]:
#groundtruth model
df = pd.read_csv("Dataset_spine.csv")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")




X = df.iloc[:, 0:-1] #input columns
scaler = StandardScaler()
X_allscaled = scaler.fit_transform(X) #scales data
X_formatted = testData(torch.FloatTensor(X_allscaled)) #format data for input
Truth_loader = DataLoader(dataset=X_formatted, batch_size=1) #format data for input




GroundTruth = binaryClassification()


truth_list = []
GroundTruth.eval()
with torch.no_grad():
    for X_batch in Truth_loader:
        X_batch = X_batch.to(device)
        y_truth = GroundTruth(X_batch)
        y_truth = torch.sigmoid(y_truth)
        y_truthtag = torch.round(y_truth)
        truth_list.append(y_truthtag.cpu().numpy())

y = [a.squeeze().tolist() for a in truth_list] #new truth values


#split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=69)
#print(y_train)
#rescale data 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

EPOCHS = 50 #number of passes of whole data
BATCH_SIZE = 64 #size of data going through at once
LEARNING_RATE = 0.001



train_data = trainData(torch.FloatTensor(X_train), 
                       torch.FloatTensor(y_train))
## test data    ()

test_data = testData(torch.FloatTensor(X_test))

#data loader initiation
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=1)




device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



In [19]:
#train trained model
trained = binaryClassification()
trained.to(device)
#print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(trained.parameters(), lr=LEARNING_RATE)



#print(list(model.parameters())[0])

#train trained model
trained.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = trained(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    #print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
    
    
untrained=binaryClassification()


In [20]:
#prints results
y_trainedpred_list = []
trained.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = trained(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_trainedpred_list.append(y_pred_tag.cpu().numpy())

y_trainedpred_list = [a.squeeze().tolist() for a in y_trainedpred_list]
confusion_matrix(y_test, y_trainedpred_list)
print(classification_report(y_test, y_trainedpred_list))





#untrained results
y_untrainedpred_list = []
untrained.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = untrained(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_untrainedpred_list.append(y_pred_tag.cpu().numpy())

y_untrainedpred_list = [a.squeeze().tolist() for a in y_untrainedpred_list]
confusion_matrix(y_test, y_untrainedpred_list)
print(classification_report(y_test, y_untrainedpred_list))

trainedcounter=0
untrainedcounter=0
for i in range(len(y_trainedpred_list)):
    if y_trainedpred_list[i]==y_test[i]:
        trainedcounter=trainedcounter+1       
    if y_untrainedpred_list[i]==y_test[i]:
        untrainedcounter=untrainedcounter+1
        
print("trained correct: ",trainedcounter)
print("untrained correct: ",untrainedcounter)

              precision    recall  f1-score   support

         0.0       0.45      0.56      0.50         9
         1.0       0.96      0.94      0.95        94

    accuracy                           0.90       103
   macro avg       0.71      0.75      0.72       103
weighted avg       0.91      0.90      0.91       103

              precision    recall  f1-score   support

         0.0       0.06      0.33      0.10         9
         1.0       0.88      0.47      0.61        94

    accuracy                           0.46       103
   macro avg       0.47      0.40      0.35       103
weighted avg       0.81      0.46      0.57       103

trained correct:  93
untrained correct:  47
