In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix , ConfusionMatrixDisplay , classification_report
from sklearn.preprocessing  import StandardScaler, LabelEncoder

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

In [3]:
RANDOM_STATE = 24 # REMEMBER: to remove at the time of promotion to production
np.random.seed(RANDOM_STATE) # Set Random Seed for reproducible  results
torch.manual_seed(RANDOM_STATE)

WEIGHT_DECAY = 0.001
EPOCHS = 51 # number of epochs
ALPHA = 0.001 # learning rate
BATCH_SIZE = 16

TRAIN_SIZE = int(BATCH_SIZE*18)
PATIENCE = 20          # for early stopping
LR_PATIENCE = 10
LR_FACTOR = 0.1       # by what factor we are reducing the lr

# parameters for Matplotlib
params = {'legend.fontsize': 'medium',
          'figure.figsize': (15, 6),
          'axes.labelsize': 'large',
          'axes.titlesize':'large',
          'xtick.labelsize':'medium',
          'ytick.labelsize':'medium'
         }

CMAP = plt.cm.jet

plt.rcParams.update(params)

In [9]:
# Basic Hyper-parameter

inpDir = os.path.join('..','..','input')
outDir = os.path.join('..','output')
modelDir = os.path.join('..','models')
subDir = 'fifa 2019'

if not os.path.exists(outDir):
  os.makedirs(outDir)

if not os.path.exists(modelDir):
  os.makedirs(modelDir)

if not os.path.exists(os.path.join(modelDir, subDir)):
  os.makedirs(os.path.join(modelDir, subDir))

In [10]:
# # for, Physical device :
# physical_device = tf.config.list_physical_devices('GPU')
# if len(physical_device) > 0:
#    tf.config.experimental.set_memory_growth(physical_device[0], True)

In [4]:
data_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets/ionosphere.data')
data_df.head()

Unnamed: 0,1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1.1,0.03760,...,-0.51171,0.41078,-0.46168,0.21266,-0.34090,0.42267,-0.54487,0.18641,-0.45300,g
0,1,0,1.0,-0.18829,0.93035,-0.36156,-0.10868,-0.93597,1.0,-0.04549,...,-0.26569,-0.20468,-0.18401,-0.1904,-0.11593,-0.16626,-0.06288,-0.13738,-0.02447,b
1,1,0,1.0,-0.03365,1.0,0.00485,1.0,-0.12062,0.88965,0.01198,...,-0.4022,0.58984,-0.22145,0.431,-0.17365,0.60436,-0.2418,0.56045,-0.38238,g
2,1,0,1.0,-0.45161,1.0,1.0,0.71216,-1.0,0.0,0.0,...,0.90695,0.51613,1.0,1.0,-0.20099,0.25682,1.0,-0.32382,1.0,b
3,1,0,1.0,-0.02401,0.9414,0.06531,0.92106,-0.23255,0.77152,-0.16399,...,-0.65158,0.1329,-0.53206,0.02431,-0.62197,-0.05707,-0.59573,-0.04608,-0.65697,g
4,1,0,0.02337,-0.00592,-0.09924,-0.11949,-0.00763,-0.11824,0.14706,0.06637,...,-0.01535,-0.0324,0.09223,-0.07859,0.00732,0.0,0.0,-0.00039,0.12011,b


In [14]:
train_df, test_df = train_test_split(data_df, test_size=TRAIN_SIZE, random_state=RANDOM_STATE)

train_df.shape, test_df.shape

((62, 35), (288, 35))

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} for execution')

Using cpu for execution


In [6]:
''' Using Singleton Class - Singleton Design Pattern '''

class Transformer:
  _instance = None     # Instance None

  def __init__(self):
     if Transformer._instance is not None:
       raise Exception('Transformer is a singleton class')
     self.scaler = StandardScaler()     # instantiate a scaler
     self.encoder = LabelEncoder()      # instantiate a label encoder

  @classmethod
  def get_instance(cls):
    if cls._instance is None:
      cls._instance = Transformer()
    return cls._instance

In [7]:
class IonoDS(Dataset):                             # This class loads our datasets in the class : It is an optimal way of loading
  transformers = Transformer.get_instance()

  def __init__(self, dataframe, device, is_train, label_col):
    super(IonoDS, self).__init__()
    self.df = dataframe
    self.device = device
    self.is_train = is_train
    self.label_col = label_col
    self.scaler = self.transformers.scaler
    self.encoder = self.transformers.encoder

    # Split in Features and Labels, and scale
    X = self.df.drop(label_col, axis=1)
    y = self.df[label_col].to_numpy()

    # Split in Features and Labels, and scale

    if self.is_train:
      self.label = self.encoder.fit_transform(y)   # Train Data
      self.features = self.scaler.fit_transform(X)  # Train Data
    else:
      self.label = self.encoder.transform(y)      # Other Data
      self.features = self.scaler.transform(X)    # Other Data

  def __len__ (self):
    return len(self.features)

  def __getitem__(self, index):
    # Extract features and labels from dataframe
    features = self.features[index]
    label = self.label[index]

    # convert to PyTorch Tensors
    features = torch.tensor(features, dtype=torch.float32, device=self.device)
    label = torch.tensor(label, dtype=torch.int64, device=self.device)
    return features, label

### Model

In [8]:
input_dim = 34
dor1 = 0.15
dor2 = 0.15
dor3 = 0.15

model = nn.Sequential(
    ## -----
    ## Set 1
    ## -----
    nn.Linear(input_dim, 26),        # First Hidden layer
    nn.BatchNorm1d(26),
    nn.ReLU(),
    nn.Dropout(dor1),

    ## -----
    ## Set 2
    ## -----
    nn.Linear(26,18),
    nn.BatchNorm1d(18),
    nn.ReLU(),
    nn.Dropout(dor2),

    ## -----
    ## Set 3
    ## -----
    nn.Linear(18,10),
    nn.BatchNorm1d(10),
    nn.ReLU(),
    nn.Dropout(dor3),

    ## -----
    ## Set 4
    ## -----
    nn.Linear(10,2),
    nn.LogSoftmax(dim=1)).to(device=device)    # Output Layer

print(model)

Sequential(
  (0): Linear(in_features=34, out_features=26, bias=True)
  (1): BatchNorm1d(26, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Dropout(p=0.15, inplace=False)
  (4): Linear(in_features=26, out_features=18, bias=True)
  (5): BatchNorm1d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Dropout(p=0.15, inplace=False)
  (8): Linear(in_features=18, out_features=10, bias=True)
  (9): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): Dropout(p=0.15, inplace=False)
  (12): Linear(in_features=10, out_features=2, bias=True)
  (13): LogSoftmax(dim=1)
)


In [9]:
# dataframe, device, is_train, label_col
label_col = 34
train_ds = IonoDS(train_df, device=device, is_train = True, label_col = label_col )
test_ds = IonoDS(test_df, device=device, is_train = True, label_col = label_col )

NameError: name 'train_df' is not defined

In [10]:
train_loader = DataLoader(train_ds, batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(test_ds, batch_size = BATCH_SIZE, shuffle = True)
next(iter(train_loader))

NameError: name 'train_ds' is not defined

In [None]:
# Loss Function
loss_fn = nn.CrossEntropyLoss()

#optimizers
optimizer = torch.optim.Adam(model.parameters(), lr = ALPHA)
loss, tloss, n_epoch, acc, tacc = [], [], [], [], []

# Iteration
for epoch in range(EPOCHS):
  # Training Loop
  train_loss, train_acc = 0.0, 0.0
  model.train()   # set mode to training mode
  for i, data in enumerate(train_loader):
    inputs, labels = data
    predict_prob = model(inputs)
    preds = predict_prob.argmax(dim=1)
    batch_loss = loss_fn(predict_prob, labels)
    batch_acc = accuracy_score(labels.cpu().numpy(), preds.cpu().numpy())

    # Back propagation
    optimizer.zero_grad()
    batch_loss.backward()
    optimizer.step()

    # Assemble Train loss
    train_loss += batch_loss.item()*inputs.size(0)
    train_acc += batch_acc*inputs.size(0)
  train_loss /= len(train_ds)
  train_acc /= len(train_ds)
  loss.append(train_loss)
  acc.append(train_acc)

  # Testing Loop
  with torch.inference_mode():
    test_loss, test_acc = 0.0, 0.0
    model.eval()    # Evaluation of Model
    for data in test_loader:
      inputs, labels = data
      predict_prob = model(inputs)
      preds = predict_prob.argmax(dim=1)
      batch_loss = loss_fn(predict_prob, labels)
      batch_acc = accuracy_score(labels.cpu().numpy(), preds.cpu().numpy())
      # Assemble test loss
      test_loss += batch_loss.item()*inputs.size(0)
      test_acc += batch_acc* inputs.size(0)
    test_loss /= len(test_ds)
    test_acc /= len(test_ds)
    tloss.append(test_loss)
    tacc.append(test_acc)

  n_epoch.append(epoch)
  if epoch%5 == 0:
    fmtStr = 'Epoch: {:05d}/{:05d}  --- Loss : {:.5f}/{:.5f} | Acc: {:.5f}/{:.5f}'
    print(fmtStr.format(epoch, EPOCHS, train_loss, test_loss,train_acc, test_acc))