In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CosineAnnealingLR

import torch
import torch.nn as nn

In [None]:
data = pd.read_csv('Data.csv')

In [None]:
EPOCHS = 50
CLASSES = 3

torch.manual_seed(42)
random.seed(42)
RANDOM_SEED = 42

In [None]:
NN_ARCHITECTURE = [113, 17, 85, 110]
BATCH_SIZE = 11
OPTIMIZER = 'Adam' # SGD vs. Adam vs. RMSprop
LEARNING_RATE = 0.0007041741173037534
WEIGHT_DECAY = 5.830095893789865e-05
SCHEDULER = True

In [None]:
data.head()

In [None]:
len(data)

In [None]:
data.isnull().any().any()

In [None]:
data.fetal_health.value_counts().plot(kind='bar')
data.fetal_health.value_counts()

In [None]:
null_accuracy = data.fetal_health.value_counts()[1.0]/len(data)
null_accuracy

In [None]:
encoder = OrdinalEncoder()
oe_columns = ['fetal_health']
encoder.fit(data[oe_columns])
data[oe_columns] = encoder.transform(data[oe_columns])

In [None]:
data.fetal_health.value_counts()

In [None]:
class FetalHealthData(torch.utils.data.Dataset):
    def __init__(self, data):
        self.labels = data.fetal_health.tolist()
        self.features = data.drop(columns=['fetal_health'], axis=1).values.tolist()
    
    def __getitem__(self, index):
        sample = np.array(self.features[index]), np.array(self.labels[index])
        return sample
        
    def __len__(self):
        return len(self.labels)

In [None]:
def get_model():
    layers = list()
    in_features = len(data.drop(columns=['fetal_health'], axis=1).columns)
    
    for layer_size in NN_ARCHITECTURE:
        layers.append(nn.Linear(in_features, layer_size))
        layers.append(nn.LeakyReLU())
        in_features = layer_size
    
    layers.append(nn.Dropout())
    layers.append(nn.Linear(in_features, CLASSES))

    return nn.Sequential(*layers)

In [None]:
def get_data():
    training_data, testing_data = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED, stratify=data.fetal_health)
    training_data, testing_data = FetalHealthData(training_data), FetalHealthData(testing_data)
    return torch.utils.data.DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=True), torch.utils.data.DataLoader(testing_data, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
def get_optimizer(model):
    if OPTIMIZER == 'Adam':
        return torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    elif OPTIMIZER == 'SGD':
        return torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    elif OPTIMIZER == 'RMSprop':
        return torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

In [None]:
def train(model, training_batches, testing_batches):
    accuracy = list()
    criterion = nn.CrossEntropyLoss()
    optimizer = get_optimizer(model)
    
    if SCHEDULER:
        scheduler = CosineAnnealingLR(optimizer, EPOCHS-1, verbose=False)
    
    for epoch in range(EPOCHS):
        ### Training
        model.train()
        for samples, labels in training_batches:
            optimizer.zero_grad()
            outputs = model(samples.float())
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
        
        num_samples = 0
        correct_predictions = 0
        ### Testing
        model.eval()
        with torch.no_grad():
            for samples, labels in testing_batches:
                output = model(samples.float())
                correct_predictions += (output.argmax(dim=1) == labels).sum().item()
                num_samples += labels.size(0)
            
        accuracy.append(100.0 * correct_predictions / num_samples)
    
    return accuracy

In [None]:
model = get_model()
training_batches, testing_batches = get_data()
history = train(model, training_batches, testing_batches)

In [None]:
plt.plot(history)
plt.ylabel('validation accuracy')
plt.xlabel('epoch')
plt.grid()

In [None]:
history[-1]