In [25]:
# Import the libraries
import os
import torch 
import torchvision
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from PIL import Image
import numpy as np
import pandas as pd
import scipy.stats as st
from sklearn.model_selection import cross_val_score, KFold, train_test_split, StratifiedKFold
from torch.utils.data import Dataset, DataLoader
from imblearn.over_sampling import SMOTENC
import imblearn
from sklearn.metrics import accuracy_score 
from sklearn.preprocessing import MinMaxScaler

In [26]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from cross_validation import validation_pipeline

In [27]:
# setup_seed
def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     np.random.seed(seed)
     random.seed(seed)
     torch.backends.cudnn.deterministic = True
setup_seed(20)

In [28]:
# parameter for the NN
Epochs = 20
Lr_Rate = 1e-2
Batch_Size = 1024

In [29]:
# load the data
file_name='../../Dataset_2.csv' 
np.set_printoptions(suppress=True)

# read data from csv file
df = pd.read_csv(file_name)
data=np.asarray(df.values.tolist())
Y= np.asarray(data[:,0], dtype=np.float64)
X= np.asarray(data[:,1:], dtype=np.float64)
X=X.astype(np.float32)
X_train, X_test, y_train,y_test= train_test_split(X, Y, test_size= 0.2,random_state=0)

In [30]:
# Construct the dataset
class MyDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, X, Y, transform=None):
        """
        Args: The data X and the label Y
        """
        self.X= X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            print('yes')
            idx = idx.tolist()

        return  self.X[idx,:], self.Y[idx]

In [31]:
# construct the vanilla dataset
train_set = MyDataset(X_train,y_train)
test_set = MyDataset(X_test,y_test)
train_loader = DataLoader(train_set, batch_size=Batch_Size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=Batch_Size, shuffle=True)

In [32]:
# The neural network architecture
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        # 5 linear layers
        self.fc1 = nn.Linear(45, 30)
        self.fc2 = nn.Linear(30, 20)
        self.fc3 = nn.Linear(20, 10)
        self.fc4 = nn.Linear(10, 5)
        self.fc5 = nn.Linear(5, 2)

    def forward(self, x):
        # a combination of linear layers and relu activation function
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

In [33]:
model = MLP()
print(model)

MLP(
  (fc1): Linear(in_features=45, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=10, bias=True)
  (fc4): Linear(in_features=10, out_features=5, bias=True)
  (fc5): Linear(in_features=5, out_features=2, bias=True)
)


In [34]:
# loss function and optimizor
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=Lr_Rate)

In [35]:
# decide to use gpu or cpu
def get_device():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    return device

In [36]:
# training function
def training(model, train_loader, Epochs, optimizer_s=optimizer, test_loader = None):
    train_loss = []
    for epoch in range(Epochs):
        running_loss = 0.0
        for data in train_loader:
            # load data and label
            img, label = data
            img = img.to(device)
            label = label.to(device)
            img = img.view(img.size(0), -1)
            optimizer_s.zero_grad()
            outputs = model(img)
            loss = F.cross_entropy(outputs, label.long()) # nll_loss
            # loss  backward
            loss.backward()
            optimizer_s.step()
            running_loss += loss.item()
        # compute the overall loss
        loss = running_loss / len(train_loader)
        train_loss.append(loss)
        print('Epoch {} of {}, Train Loss: {:.3f}'.format(
            epoch+1, Epochs, loss))
        
        # compute the test accuracy at last
        if (epoch+1) % Epochs == 0:
            score, _, _ = test_data(model, test_loader)
            print(score)

    return model, train_loss

In [37]:
# test data
def test_data(model, test_loader):
     running_loss = 0.0
     correct=0
     len_test = 0
     pred_result = []
     label_org = []
     ii = 0
     for data in test_loader:
        # load the data
        img, label = data
        label=label.long()
        img = img.to(device)
        label = label.to(device)
        img = img.view(img.size(0), -1)
        optimizer.zero_grad()
        outputs = model(img)
        loss = F.cross_entropy(outputs, label.long()) # nll_loss
        outputs = F.softmax(outputs)
        outputs = outputs.argmax(dim=1)
        # save the results 
        pred_result.extend(outputs.cpu())
        label_org.extend(label.cpu())
        correct += balanced_accuracy_score(label.cpu(), outputs.cpu()) 
        running_loss += loss.item()
        len_test += len(img)
        ii += 1
     loss = running_loss / len(test_loader)
     # compute the balanced_accuracy_score 
     acc = balanced_accuracy_score(label_org, pred_result) 
     print(accuracy_score(label_org, pred_result), acc)
     return acc, label_org, pred_result

In [38]:
# get the device
device = get_device()
model.to(device)

MLP(
  (fc1): Linear(in_features=45, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=10, bias=True)
  (fc4): Linear(in_features=10, out_features=5, bias=True)
  (fc5): Linear(in_features=5, out_features=2, bias=True)
)

In [39]:
# oversample the data using SMOTENC
oversample = SMOTENC (categorical_features=[2,4,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
                                                21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,
                                                38,39,40,41,42,43,44], random_state=42)
X_train_fold_upsample, y_train_fold_upsample = oversample.fit_resample(X_train,y_train)
# Normalize the data
scaler = MinMaxScaler()
#Fit on training set
scaler.fit(X_train_fold_upsample)
#scale on training set
X_train_fold_upsample = scaler.transform(X_train_fold_upsample)
#scale the validation dataset
X_test = scaler.transform(X_test)

In [40]:
# Build the dataset and the dataloader
train_set_upsample = MyDataset(X_train_fold_upsample, y_train_fold_upsample)
train_loader_upsample = DataLoader(train_set_upsample, batch_size=Batch_Size, shuffle=True)
test_set = MyDataset(X_test,y_test)
test_loader = DataLoader(test_set, batch_size=Batch_Size, shuffle=True)

In [41]:
# training the model
model, train_loss = training(model, train_loader_upsample, Epochs = Epochs,  optimizer_s=optimizer, test_loader = test_loader)

Epoch 1 of 20, Train Loss: 0.447
Epoch 2 of 20, Train Loss: 0.392
Epoch 3 of 20, Train Loss: 0.385
Epoch 4 of 20, Train Loss: 0.381
Epoch 5 of 20, Train Loss: 0.379
Epoch 6 of 20, Train Loss: 0.378
Epoch 7 of 20, Train Loss: 0.375
Epoch 8 of 20, Train Loss: 0.375
Epoch 9 of 20, Train Loss: 0.373
Epoch 10 of 20, Train Loss: 0.372
Epoch 11 of 20, Train Loss: 0.371
Epoch 12 of 20, Train Loss: 0.371
Epoch 13 of 20, Train Loss: 0.370
Epoch 14 of 20, Train Loss: 0.371
Epoch 15 of 20, Train Loss: 0.370
Epoch 16 of 20, Train Loss: 0.368
Epoch 17 of 20, Train Loss: 0.368
Epoch 18 of 20, Train Loss: 0.369
Epoch 19 of 20, Train Loss: 0.367
Epoch 20 of 20, Train Loss: 0.366




0.7931684487633657 0.7172816248557129
0.7172816248557129


In [42]:
# compute the score (vanilla accuracy and balanced accuracy on test)
score, y_test, pred = test_data(model, test_loader)



0.7931684487633657 0.7172816248557129


In [43]:
score

0.7172816248557129

In [44]:
# compute Recall, Precision and F1 for class 1
recall_open = recall_score(y_test, pred, pos_label=1)
precision_open = precision_score(y_test, pred, pos_label=1)
f1_open = f1_score(y_test, pred, pos_label=1)

print("Recall: " + str(recall_open))
print("Precision: " + str(precision_open))
print("F1: " + str(f1_open))

Recall: 0.8367720135161996
Precision: 0.9031192345647231
F1: 0.8686806157401676


In [45]:
# compute Recall, Precision and F1 for class 0
recall_closed = recall_score(y_test, pred, pos_label=0)
precision_closed = precision_score(y_test, pred, pos_label=0)
f1_closed = f1_score(y_test, pred, pos_label=0)

print("Recall: " + str(recall_closed))
print("Precision: " + str(precision_closed))
print("F1: " + str(f1_closed))

Recall: 0.5977912361952262
Precision: 0.44974537657464486
F1: 0.5133068216579993


In [46]:
from numpy import *

# use SMOTE to conduct the cross validation
def score_model(model, cv, X_train, y_train, Epochs = 100):
    """
    Creates folds manually, and upsamples within each fold.
    Returns an array of validation (recall) scores
    """
    oversample = SMOTENC (categorical_features=[2,4,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
                                                21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,
                                                38,39,40,41,42,43,44], random_state=42)

    if cv is None:
        cv = KFold(n_splits=5, random_state=42)
    
    scores = []

    for train_fold_index, val_fold_index in cv.split(X_train, y_train):
        # Get the training data
        print('start...')
        X_train_fold, y_train_fold = X_train[train_fold_index], y_train[train_fold_index]
        # Get the validation data
        X_val_fold, y_val_fold = X_train[val_fold_index], y_train[val_fold_index]

        # Upsample only the data in the training section
        X_train_fold_upsample, y_train_fold_upsample = oversample.fit_resample(X_train_fold,y_train_fold)
        
        scaler = MinMaxScaler()

        #Fit on training set
        scaler.fit(X_train_fold_upsample)
        #scale on training set
        X_train_fold_upsample = scaler.transform(X_train_fold_upsample)
        #scale the validation dataset
        X_val_fold = scaler.transform(X_val_fold)
        # Build the datset and dataloader
        train_set = MyDataset(X_train_fold_upsample,y_train_fold_upsample)
        test_set = MyDataset(X_val_fold,y_val_fold)
        train_loader = DataLoader(train_set, batch_size=Batch_Size, shuffle=True)
        test_loader = DataLoader(test_set, batch_size=Batch_Size, shuffle=True)
        print('training...')
        # Build the model and optimizor
        model = MLP()
        device = get_device()
        model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=Lr_Rate)
        # training 
        model_save, _ = training(model, train_loader, Epochs,  optimizer_s=optimizer, test_loader = test_loader)
        # Score the model on the (non-upsampled) validation data
        # test the score
        score, label_org, pred_result = test_data(model_save, test_loader)
        scores.append(score)
    return np.array(scores)

In [48]:
# do cross validation
cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=20)
# get the cross validation results
scores = score_model(model, cv, X_train, y_train, Epochs = 20)

start...
training...
Epoch 1 of 20, Train Loss: 0.439
Epoch 2 of 20, Train Loss: 0.391
Epoch 3 of 20, Train Loss: 0.384
Epoch 4 of 20, Train Loss: 0.380
Epoch 5 of 20, Train Loss: 0.378
Epoch 6 of 20, Train Loss: 0.375
Epoch 7 of 20, Train Loss: 0.373
Epoch 8 of 20, Train Loss: 0.372
Epoch 9 of 20, Train Loss: 0.371
Epoch 10 of 20, Train Loss: 0.370
Epoch 11 of 20, Train Loss: 0.369
Epoch 12 of 20, Train Loss: 0.368
Epoch 13 of 20, Train Loss: 0.368
Epoch 14 of 20, Train Loss: 0.366
Epoch 15 of 20, Train Loss: 0.366
Epoch 16 of 20, Train Loss: 0.365
Epoch 17 of 20, Train Loss: 0.364
Epoch 18 of 20, Train Loss: 0.364
Epoch 19 of 20, Train Loss: 0.363
Epoch 20 of 20, Train Loss: 0.364




0.7789242768930776 0.7166998237200165
0.7166998237200165
0.7789242768930776 0.7166998237200165
start...
training...
Epoch 1 of 20, Train Loss: 0.453
Epoch 2 of 20, Train Loss: 0.389
Epoch 3 of 20, Train Loss: 0.381
Epoch 4 of 20, Train Loss: 0.377
Epoch 5 of 20, Train Loss: 0.374
Epoch 6 of 20, Train Loss: 0.373
Epoch 7 of 20, Train Loss: 0.373
Epoch 8 of 20, Train Loss: 0.370
Epoch 9 of 20, Train Loss: 0.369
Epoch 10 of 20, Train Loss: 0.368
Epoch 11 of 20, Train Loss: 0.366
Epoch 12 of 20, Train Loss: 0.366
Epoch 13 of 20, Train Loss: 0.365
Epoch 14 of 20, Train Loss: 0.364
Epoch 15 of 20, Train Loss: 0.363
Epoch 16 of 20, Train Loss: 0.362
Epoch 17 of 20, Train Loss: 0.363
Epoch 18 of 20, Train Loss: 0.361
Epoch 19 of 20, Train Loss: 0.360
Epoch 20 of 20, Train Loss: 0.360




0.7799805004874878 0.6976160090218675
0.6976160090218675
0.7799805004874878 0.6976160090218675
start...
training...
Epoch 1 of 20, Train Loss: 0.429
Epoch 2 of 20, Train Loss: 0.384
Epoch 3 of 20, Train Loss: 0.377
Epoch 4 of 20, Train Loss: 0.373
Epoch 5 of 20, Train Loss: 0.371
Epoch 6 of 20, Train Loss: 0.370
Epoch 7 of 20, Train Loss: 0.369
Epoch 8 of 20, Train Loss: 0.367
Epoch 9 of 20, Train Loss: 0.367
Epoch 10 of 20, Train Loss: 0.366
Epoch 11 of 20, Train Loss: 0.365
Epoch 12 of 20, Train Loss: 0.365
Epoch 13 of 20, Train Loss: 0.364
Epoch 14 of 20, Train Loss: 0.363
Epoch 15 of 20, Train Loss: 0.362
Epoch 16 of 20, Train Loss: 0.361
Epoch 17 of 20, Train Loss: 0.361
Epoch 18 of 20, Train Loss: 0.360
Epoch 19 of 20, Train Loss: 0.360
Epoch 20 of 20, Train Loss: 0.358




0.8065485862853429 0.704925654511724
0.704925654511724
0.8065485862853429 0.704925654511724
start...
training...
Epoch 1 of 20, Train Loss: 0.432
Epoch 2 of 20, Train Loss: 0.387
Epoch 3 of 20, Train Loss: 0.382
Epoch 4 of 20, Train Loss: 0.378
Epoch 5 of 20, Train Loss: 0.376
Epoch 6 of 20, Train Loss: 0.374
Epoch 7 of 20, Train Loss: 0.373
Epoch 8 of 20, Train Loss: 0.372
Epoch 9 of 20, Train Loss: 0.371
Epoch 10 of 20, Train Loss: 0.370
Epoch 11 of 20, Train Loss: 0.369
Epoch 12 of 20, Train Loss: 0.369
Epoch 13 of 20, Train Loss: 0.368
Epoch 14 of 20, Train Loss: 0.367
Epoch 15 of 20, Train Loss: 0.366
Epoch 16 of 20, Train Loss: 0.367
Epoch 17 of 20, Train Loss: 0.366
Epoch 18 of 20, Train Loss: 0.365
Epoch 19 of 20, Train Loss: 0.365
Epoch 20 of 20, Train Loss: 0.364




0.8170295742606435 0.7102011599295852
0.7102011599295852
0.8170295742606435 0.7102011599295852
start...
training...
Epoch 1 of 20, Train Loss: 0.448
Epoch 2 of 20, Train Loss: 0.390
Epoch 3 of 20, Train Loss: 0.381
Epoch 4 of 20, Train Loss: 0.379
Epoch 5 of 20, Train Loss: 0.374
Epoch 6 of 20, Train Loss: 0.373
Epoch 7 of 20, Train Loss: 0.372
Epoch 8 of 20, Train Loss: 0.372
Epoch 9 of 20, Train Loss: 0.369
Epoch 10 of 20, Train Loss: 0.370
Epoch 11 of 20, Train Loss: 0.369
Epoch 12 of 20, Train Loss: 0.368
Epoch 13 of 20, Train Loss: 0.367
Epoch 14 of 20, Train Loss: 0.367
Epoch 15 of 20, Train Loss: 0.366
Epoch 16 of 20, Train Loss: 0.366
Epoch 17 of 20, Train Loss: 0.365
Epoch 18 of 20, Train Loss: 0.364
Epoch 19 of 20, Train Loss: 0.363
Epoch 20 of 20, Train Loss: 0.364




0.788981880230763 0.7149485840872705
0.7149485840872705
0.788981880230763 0.7149485840872705
start...
training...
Epoch 1 of 20, Train Loss: 0.427
Epoch 2 of 20, Train Loss: 0.388
Epoch 3 of 20, Train Loss: 0.382
Epoch 4 of 20, Train Loss: 0.379
Epoch 5 of 20, Train Loss: 0.377
Epoch 6 of 20, Train Loss: 0.374
Epoch 7 of 20, Train Loss: 0.373
Epoch 8 of 20, Train Loss: 0.371
Epoch 9 of 20, Train Loss: 0.370
Epoch 10 of 20, Train Loss: 0.369
Epoch 11 of 20, Train Loss: 0.368
Epoch 12 of 20, Train Loss: 0.368
Epoch 13 of 20, Train Loss: 0.367
Epoch 14 of 20, Train Loss: 0.367
Epoch 15 of 20, Train Loss: 0.366
Epoch 16 of 20, Train Loss: 0.366
Epoch 17 of 20, Train Loss: 0.365
Epoch 18 of 20, Train Loss: 0.365
Epoch 19 of 20, Train Loss: 0.364
Epoch 20 of 20, Train Loss: 0.363




0.801413829527911 0.7191622477206228
0.7191622477206228
0.801413829527911 0.7191622477206228
start...
training...
Epoch 1 of 20, Train Loss: 0.423
Epoch 2 of 20, Train Loss: 0.387
Epoch 3 of 20, Train Loss: 0.382
Epoch 4 of 20, Train Loss: 0.377
Epoch 5 of 20, Train Loss: 0.374
Epoch 6 of 20, Train Loss: 0.372
Epoch 7 of 20, Train Loss: 0.370
Epoch 8 of 20, Train Loss: 0.368
Epoch 9 of 20, Train Loss: 0.367
Epoch 10 of 20, Train Loss: 0.366
Epoch 11 of 20, Train Loss: 0.365
Epoch 12 of 20, Train Loss: 0.365
Epoch 13 of 20, Train Loss: 0.364
Epoch 14 of 20, Train Loss: 0.363
Epoch 15 of 20, Train Loss: 0.362
Epoch 16 of 20, Train Loss: 0.361
Epoch 17 of 20, Train Loss: 0.361
Epoch 18 of 20, Train Loss: 0.361
Epoch 19 of 20, Train Loss: 0.359
Epoch 20 of 20, Train Loss: 0.359




0.7972698464288617 0.7013958268476534
0.7013958268476534
0.7972698464288617 0.7013958268476534
start...
training...
Epoch 1 of 20, Train Loss: 0.467
Epoch 2 of 20, Train Loss: 0.388
Epoch 3 of 20, Train Loss: 0.380
Epoch 4 of 20, Train Loss: 0.376
Epoch 5 of 20, Train Loss: 0.375
Epoch 6 of 20, Train Loss: 0.372
Epoch 7 of 20, Train Loss: 0.372
Epoch 8 of 20, Train Loss: 0.371
Epoch 9 of 20, Train Loss: 0.369
Epoch 10 of 20, Train Loss: 0.368
Epoch 11 of 20, Train Loss: 0.368
Epoch 12 of 20, Train Loss: 0.367
Epoch 13 of 20, Train Loss: 0.366
Epoch 14 of 20, Train Loss: 0.365
Epoch 15 of 20, Train Loss: 0.365
Epoch 16 of 20, Train Loss: 0.364
Epoch 17 of 20, Train Loss: 0.365
Epoch 18 of 20, Train Loss: 0.364
Epoch 19 of 20, Train Loss: 0.364
Epoch 20 of 20, Train Loss: 0.363




0.797188591858292 0.7163963196507741
0.7163963196507741
0.797188591858292 0.7163963196507741
start...
training...
Epoch 1 of 20, Train Loss: 0.440
Epoch 2 of 20, Train Loss: 0.388
Epoch 3 of 20, Train Loss: 0.380
Epoch 4 of 20, Train Loss: 0.376
Epoch 5 of 20, Train Loss: 0.375
Epoch 6 of 20, Train Loss: 0.371
Epoch 7 of 20, Train Loss: 0.370
Epoch 8 of 20, Train Loss: 0.370
Epoch 9 of 20, Train Loss: 0.367
Epoch 10 of 20, Train Loss: 0.367
Epoch 11 of 20, Train Loss: 0.367
Epoch 12 of 20, Train Loss: 0.365
Epoch 13 of 20, Train Loss: 0.364
Epoch 14 of 20, Train Loss: 0.364
Epoch 15 of 20, Train Loss: 0.363
Epoch 16 of 20, Train Loss: 0.363
Epoch 17 of 20, Train Loss: 0.363
Epoch 18 of 20, Train Loss: 0.362
Epoch 19 of 20, Train Loss: 0.360
Epoch 20 of 20, Train Loss: 0.361




0.8026326480864548 0.7222530567904579
0.7222530567904579
0.8026326480864548 0.7222530567904579
start...
training...
Epoch 1 of 20, Train Loss: 0.425
Epoch 2 of 20, Train Loss: 0.385
Epoch 3 of 20, Train Loss: 0.379
Epoch 4 of 20, Train Loss: 0.375
Epoch 5 of 20, Train Loss: 0.374
Epoch 6 of 20, Train Loss: 0.372
Epoch 7 of 20, Train Loss: 0.371
Epoch 8 of 20, Train Loss: 0.370
Epoch 9 of 20, Train Loss: 0.369
Epoch 10 of 20, Train Loss: 0.368
Epoch 11 of 20, Train Loss: 0.368
Epoch 12 of 20, Train Loss: 0.368
Epoch 13 of 20, Train Loss: 0.366
Epoch 14 of 20, Train Loss: 0.365
Epoch 15 of 20, Train Loss: 0.366
Epoch 16 of 20, Train Loss: 0.364
Epoch 17 of 20, Train Loss: 0.364
Epoch 18 of 20, Train Loss: 0.364
Epoch 19 of 20, Train Loss: 0.362
Epoch 20 of 20, Train Loss: 0.362




0.7948322093117738 0.7284924653225082
0.7284924653225082
0.7948322093117738 0.7284924653225082


In [50]:
# print the cross validation results
scores
mean(scores)

0.7132091147602481

In [23]:
len(scores)

10