In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv('./modified_seeds_dataset.txt', sep="\t")
inp_features = ["area_A","perimeter_P","compactness(4*pi*A/P^2)","kernel_length","kernel_width",
                           "asymmetry_coefficient", "kernel_groove_length"] #["f1", "f2", "f3", "f4", "f5", "f6", "f7"]
df.head()

Unnamed: 0,area_A,perimeter_P,compactness(4*pi*A/P^2),kernel_length,kernel_width,asymmetry_coefficient,kernel_groove_length,type
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1


In [3]:
inp_features_dict = {}
for i in inp_features:
    inp_features_dict[i] = {"range" : float(df[[i]].max() - df[[i]].min()) }

print(inp_features_dict)

{'area_A': {'range': 10.59}, 'perimeter_P': {'range': 4.84}, 'compactness(4*pi*A/P^2)': {'range': 0.11019999999999996}, 'kernel_length': {'range': 1.7759999999999998}, 'kernel_width': {'range': 1.4030000000000005}, 'asymmetry_coefficient': {'range': 7.690899999999999}, 'kernel_groove_length': {'range': 2.0309999999999997}}


In [4]:
scaled_df = df[inp_features].apply(lambda x:x/(x.max() - x.min()))
scaled_df = pd.concat([scaled_df, df[["type"]]], axis = 1)
scaled_df.head()

Unnamed: 0,area_A,perimeter_P,compactness(4*pi*A/P^2),kernel_length,kernel_width,asymmetry_coefficient,kernel_groove_length,type
0,1.440982,3.066116,7.903811,3.244932,2.360656,0.288783,2.570162,1
1,1.405099,3.010331,7.995463,3.127252,2.375624,0.132364,2.440177,1
2,1.349386,2.911157,8.212341,2.979167,2.378475,0.350934,2.375677,1
3,1.306893,2.880165,8.126134,2.997748,2.408411,0.293724,2.36583,1
4,1.524079,3.097107,8.197822,3.185811,2.538845,0.176182,2.548006,1


In [5]:
from sklearn.utils import shuffle
scaled_df = shuffle(scaled_df)
scaled_df.head()

Unnamed: 0,area_A,perimeter_P,compactness(4*pi*A/P^2),kernel_length,kernel_width,asymmetry_coefficient,kernel_groove_length,type
82,1.90746,3.489669,8.07078,3.538851,2.754098,0.672613,3.046283,2
71,1.590179,3.237603,7.824864,3.377252,2.48325,0.607861,2.893648,2
97,1.792257,3.423554,7.88294,3.631194,2.531718,0.278771,3.177253,2
75,1.583569,3.227273,7.838475,3.337275,2.450463,0.639717,2.853274,2
51,1.490085,3.080579,8.097096,3.19482,2.447612,0.727223,2.528804,1


In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test_val, y_train, y_test_val = train_test_split(scaled_df[inp_features], 
                                                            scaled_df[["type"]], train_size=0.6, shuffle=False)
x_test, x_val, y_test, y_val = train_test_split(x_test_val, y_test_val, test_size=0.5, shuffle=False)
print(len(x_train), len(y_train), len(x_test), len(y_test), len(x_val), len(y_val))

126 126 42 42 42 42


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

In [8]:
class my_model(nn.Module):
    '''A simple model with 1 hidden layer. 7,10 and 3 neurons in input, hidden and output layer respectively.'''
    def __init__(self): 
        super(my_model, self).__init__()
        self.input_layer = nn.Linear(in_features=7, out_features=10)
        self.output_layer = nn.Linear(in_features=10, out_features=3)
    
    def forward(self, x1):
        output = self.output_layer(F.relu(self.input_layer(x1)))
        return output
    
model = my_model()  
summary(model, (1,7))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 10]              80
            Linear-2                 [-1, 1, 3]              33
Total params: 113
Trainable params: 113
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [9]:
from torch.utils.data import Dataset

class MyDataFetcher(Dataset):    
    targets_dict = {1:[1,0,0], 2:[0,1,0], 3:[0,0,1]}
    def __init__(self, feature_cols, target_col):
        self.feature_cols = feature_cols
        self.target_col = target_col
    def __len__(self):
        return len(self.target_col)
    def __getitem__(self, idx):
        target = int(self.target_col.iloc[idx])
        target_list = torch.Tensor(self.targets_dict[target])
        features = self.feature_cols.iloc[idx]
        features = torch.FloatTensor(features)
        sample = {'features':features, 'targets':target_list}
        #print(sample)
        return sample

In [10]:
train_dataset = MyDataFetcher(feature_cols=x_train, target_col=y_train)
val_dataset = MyDataFetcher(feature_cols=x_val, target_col=y_val)
test_dataset = MyDataFetcher(feature_cols=x_test, target_col=y_test)
batch_size = 21

from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
def visualize_sample_batch(data):
    features, target = data['features'], data['targets']
    for i in range(3):
        print(features[i], target[i])

In [12]:
for i_batch, data in enumerate(train_dataloader):
    visualize_sample_batch(data)
    break

tensor([1.9075, 3.4897, 8.0708, 3.5389, 2.7541, 0.6726, 3.0463]) tensor([0., 1., 0.])
tensor([1.5902, 3.2376, 7.8249, 3.3773, 2.4833, 0.6079, 2.8936]) tensor([0., 1., 0.])
tensor([1.7923, 3.4236, 7.8829, 3.6312, 2.5317, 0.2788, 3.1773]) tensor([0., 1., 0.])


In [13]:
class EarlyStopping():
    """
    Early stopping to stop the training when the validation loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, val_loss):
        if self.best_loss == None:
            self.best_loss = val_loss
        elif self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            # reset counter if validation loss improves
            self.counter = 0
        elif self.best_loss - val_loss < self.min_delta:
            self.counter += 1
            print(f"INFO: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                print('INFO: Early stopping')
                self.early_stop = True

early_stopping = EarlyStopping(patience = 7, min_delta=0.05)

In [14]:
import torch.optim as optim
from torch.optim import lr_scheduler

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, 
                          centered=False)
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

In [15]:
xdataloaders = {'train': train_dataloader,
                'val' : val_dataloader}
xdataset_sizes = {'train':len(train_dataloader),
                 'val': len(val_dataloader)}
def get_lr(optimizer):
    '''Get Current Learning Rate'''
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [16]:
import copy, time
from torch.utils.tensorboard import SummaryWriter

def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    writer = SummaryWriter("./tensorboard_events")
    best_acc = 0.0
    flag_early_stop = False
    for epoch in range(num_epochs):
        t1 = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 40)
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for idx, xdata in enumerate(xdataloaders[phase], 0):
                inputs = xdata['features'] 
                targets = xdata['targets']
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward propagate
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                #running_corrects += torch.sum(outputs == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / xdataset_sizes[phase]
            #epoch_acc = running_corrects.double() / xdataset_sizes[phase]
            if phase == 'train':
                print('TRAIN PHASE: Loss: {:.4f}'.format(epoch_loss))
                writer.add_scalar("Loss/train", epoch_loss, epoch)
            elif phase == 'val':
                print('VALIDATION PHASE: Loss: {:.4f}'.format(epoch_loss))
                early_stopping(epoch_loss)
                writer.add_scalar("Loss/val", epoch_loss, epoch)
                if early_stopping.early_stop:
                    print('EARLY STOPPING')
                    flag_early_stop = True
                    break
            # deep copy the model
            # if phase == 'val' and epoch_acc > best_acc:
            #     best_acc = epoch_acc
            #     best_model_wts = copy.deepcopy(wpod_model.state_dict())
        t2 = time.time()
        print('EPOCH time taken =', t2 - t1, 'sec')
        print('Learning Rate', get_lr(optimizer))
        if flag_early_stop:
            print('*******EARLY STOPPING*******')
            break
        #fil1.writelines(str(epoch)+','+str(epoch_loss)+','+'\n')
        #writer.add_scalar("Loss/train", epoch_loss, epoch)
        #writer.add_scalar("Accuracy/train", epoch_acc, epoch)

    #print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    #wpod_model.load_state_dict(best_model_wts)
    print('Finished Training')
    writer.flush()
    writer.close()
    return model

In [17]:
model_ft = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=1000)

Epoch 0/999
----------------------------------------
TRAIN PHASE: Loss: 1287.1936
VALIDATION PHASE: Loss: 574.9231
EPOCH time taken = 0.027959346771240234 sec
Learning Rate 0.001
Epoch 1/999
----------------------------------------
TRAIN PHASE: Loss: 531.8331
VALIDATION PHASE: Loss: 386.0952
EPOCH time taken = 0.024903059005737305 sec
Learning Rate 0.001
Epoch 2/999
----------------------------------------
TRAIN PHASE: Loss: 406.2894
VALIDATION PHASE: Loss: 330.5419
EPOCH time taken = 0.025929689407348633 sec
Learning Rate 0.001
Epoch 3/999
----------------------------------------
TRAIN PHASE: Loss: 355.5838
VALIDATION PHASE: Loss: 308.0973
EPOCH time taken = 0.024933338165283203 sec
Learning Rate 0.001
Epoch 4/999
----------------------------------------
TRAIN PHASE: Loss: 330.6321
VALIDATION PHASE: Loss: 299.3188
EPOCH time taken = 0.023936033248901367 sec
Learning Rate 0.001
Epoch 5/999
----------------------------------------
TRAIN PHASE: Loss: 317.8460
VALIDATION PHASE: Loss: 296.

VALIDATION PHASE: Loss: 254.2591
EPOCH time taken = 0.02393651008605957 sec
Learning Rate 0.001
Epoch 48/999
----------------------------------------
TRAIN PHASE: Loss: 259.7804
VALIDATION PHASE: Loss: 253.0363
EPOCH time taken = 0.023932695388793945 sec
Learning Rate 0.001
Epoch 49/999
----------------------------------------
TRAIN PHASE: Loss: 258.5738
VALIDATION PHASE: Loss: 251.7923
EPOCH time taken = 0.023931264877319336 sec
Learning Rate 0.0005
Epoch 50/999
----------------------------------------
TRAIN PHASE: Loss: 255.6797
VALIDATION PHASE: Loss: 250.5577
EPOCH time taken = 0.02394270896911621 sec
Learning Rate 0.0005
Epoch 51/999
----------------------------------------
TRAIN PHASE: Loss: 254.8930
VALIDATION PHASE: Loss: 249.6964
EPOCH time taken = 0.023935794830322266 sec
Learning Rate 0.0005
Epoch 52/999
----------------------------------------
TRAIN PHASE: Loss: 254.2475
VALIDATION PHASE: Loss: 248.9487
EPOCH time taken = 0.023936986923217773 sec
Learning Rate 0.0005
Epoch 

VALIDATION PHASE: Loss: 221.8181
EPOCH time taken = 0.02492690086364746 sec
Learning Rate 0.0005
Epoch 96/999
----------------------------------------
TRAIN PHASE: Loss: 227.7494
VALIDATION PHASE: Loss: 221.2024
EPOCH time taken = 0.023940324783325195 sec
Learning Rate 0.0005
Epoch 97/999
----------------------------------------
TRAIN PHASE: Loss: 227.1659
VALIDATION PHASE: Loss: 220.5882
EPOCH time taken = 0.023935794830322266 sec
Learning Rate 0.0005
Epoch 98/999
----------------------------------------
TRAIN PHASE: Loss: 226.5840
VALIDATION PHASE: Loss: 219.9757
EPOCH time taken = 0.023941516876220703 sec
Learning Rate 0.0005
Epoch 99/999
----------------------------------------
TRAIN PHASE: Loss: 226.0037
VALIDATION PHASE: Loss: 219.3648
EPOCH time taken = 0.02393031120300293 sec
Learning Rate 0.00025
Epoch 100/999
----------------------------------------
TRAIN PHASE: Loss: 224.5956
VALIDATION PHASE: Loss: 218.8914
EPOCH time taken = 0.022939205169677734 sec
Learning Rate 0.00025
E

VALIDATION PHASE: Loss: 206.1332
EPOCH time taken = 0.023967981338500977 sec
Learning Rate 0.00025
Epoch 143/999
----------------------------------------
TRAIN PHASE: Loss: 212.4061
VALIDATION PHASE: Loss: 205.8427
EPOCH time taken = 0.022938013076782227 sec
Learning Rate 0.00025
Epoch 144/999
----------------------------------------
TRAIN PHASE: Loss: 212.1393
VALIDATION PHASE: Loss: 205.5508
EPOCH time taken = 0.02293562889099121 sec
Learning Rate 0.00025
Epoch 145/999
----------------------------------------
TRAIN PHASE: Loss: 211.8609
VALIDATION PHASE: Loss: 205.2635
EPOCH time taken = 0.02393507957458496 sec
Learning Rate 0.00025
Epoch 146/999
----------------------------------------
TRAIN PHASE: Loss: 211.5909
VALIDATION PHASE: Loss: 204.9756
EPOCH time taken = 0.022939682006835938 sec
Learning Rate 0.00025
Epoch 147/999
----------------------------------------
TRAIN PHASE: Loss: 211.3210
VALIDATION PHASE: Loss: 204.6877
EPOCH time taken = 0.02293086051940918 sec
Learning Rate 0.

VALIDATION PHASE: Loss: 198.1804
EPOCH time taken = 0.023935556411743164 sec
Learning Rate 0.000125
Epoch 191/999
----------------------------------------
TRAIN PHASE: Loss: 204.6617
VALIDATION PHASE: Loss: 198.0418
EPOCH time taken = 0.02496337890625 sec
Learning Rate 0.000125
Epoch 192/999
----------------------------------------
TRAIN PHASE: Loss: 204.5318
VALIDATION PHASE: Loss: 197.9032
EPOCH time taken = 0.02390575408935547 sec
Learning Rate 0.000125
Epoch 193/999
----------------------------------------
TRAIN PHASE: Loss: 204.4021
VALIDATION PHASE: Loss: 197.7648
EPOCH time taken = 0.023965835571289062 sec
Learning Rate 0.000125
Epoch 194/999
----------------------------------------
TRAIN PHASE: Loss: 204.2725
VALIDATION PHASE: Loss: 197.6265
EPOCH time taken = 0.023935794830322266 sec
Learning Rate 0.000125
Epoch 195/999
----------------------------------------
TRAIN PHASE: Loss: 204.1430
VALIDATION PHASE: Loss: 197.4884
EPOCH time taken = 0.022943735122680664 sec
Learning Rate

VALIDATION PHASE: Loss: 194.1492
EPOCH time taken = 0.022933483123779297 sec
Learning Rate 6.25e-05
Epoch 240/999
----------------------------------------
TRAIN PHASE: Loss: 200.7347
VALIDATION PHASE: Loss: 194.0809
EPOCH time taken = 0.0249326229095459 sec
Learning Rate 6.25e-05
Epoch 241/999
----------------------------------------
TRAIN PHASE: Loss: 200.6714
VALIDATION PHASE: Loss: 194.0126
EPOCH time taken = 0.023935317993164062 sec
Learning Rate 6.25e-05
Epoch 242/999
----------------------------------------
TRAIN PHASE: Loss: 200.6082
VALIDATION PHASE: Loss: 193.9444
EPOCH time taken = 0.022941112518310547 sec
Learning Rate 6.25e-05
Epoch 243/999
----------------------------------------
TRAIN PHASE: Loss: 200.5450
VALIDATION PHASE: Loss: 193.8761
EPOCH time taken = 0.023934364318847656 sec
Learning Rate 6.25e-05
Epoch 244/999
----------------------------------------
TRAIN PHASE: Loss: 200.4819
VALIDATION PHASE: Loss: 193.8080
EPOCH time taken = 0.022938966751098633 sec
Learning R

VALIDATION PHASE: Loss: 192.1620
EPOCH time taken = 0.024903297424316406 sec
Learning Rate 3.125e-05
Epoch 288/999
----------------------------------------
TRAIN PHASE: Loss: 198.8097
VALIDATION PHASE: Loss: 192.1283
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.024965524673461914 sec
Learning Rate 3.125e-05
Epoch 289/999
----------------------------------------
TRAIN PHASE: Loss: 198.7785
VALIDATION PHASE: Loss: 192.0946
EPOCH time taken = 0.024927616119384766 sec
Learning Rate 3.125e-05
Epoch 290/999
----------------------------------------
TRAIN PHASE: Loss: 198.7473
VALIDATION PHASE: Loss: 192.0609
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.023938655853271484 sec
Learning Rate 3.125e-05
Epoch 291/999
----------------------------------------
TRAIN PHASE: Loss: 198.7161
VALIDATION PHASE: Loss: 192.0272
EPOCH time taken = 0.02393651008605957 sec
Learning Rate 3.125e-05
Epoch 292/999
----------------------------------------
TRAIN PHASE: Loss: 198.6848
VALIDATIO

VALIDATION PHASE: Loss: 191.2827
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.02393627166748047 sec
Learning Rate 1.5625e-05
Epoch 328/999
----------------------------------------
TRAIN PHASE: Loss: 197.9497
VALIDATION PHASE: Loss: 191.2659
INFO: Early stopping counter 2 of 7
EPOCH time taken = 0.025929689407348633 sec
Learning Rate 1.5625e-05
Epoch 329/999
----------------------------------------
TRAIN PHASE: Loss: 197.9342
VALIDATION PHASE: Loss: 191.2491
EPOCH time taken = 0.02493429183959961 sec
Learning Rate 1.5625e-05
Epoch 330/999
----------------------------------------
TRAIN PHASE: Loss: 197.9187
VALIDATION PHASE: Loss: 191.2322
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.023935556411743164 sec
Learning Rate 1.5625e-05
Epoch 331/999
----------------------------------------
TRAIN PHASE: Loss: 197.9032
VALIDATION PHASE: Loss: 191.2154
INFO: Early stopping counter 2 of 7
EPOCH time taken = 0.023936033248901367 sec
Learning Rate 1.5625e-05
Epoch 332/999
--

TRAIN PHASE: Loss: 197.4598
VALIDATION PHASE: Loss: 190.7702
INFO: Early stopping counter 3 of 7
EPOCH time taken = 0.025901317596435547 sec
Learning Rate 7.8125e-06
Epoch 367/999
----------------------------------------
TRAIN PHASE: Loss: 197.4521
VALIDATION PHASE: Loss: 190.7618
INFO: Early stopping counter 4 of 7
EPOCH time taken = 0.02393651008605957 sec
Learning Rate 7.8125e-06
Epoch 368/999
----------------------------------------
TRAIN PHASE: Loss: 197.4443
VALIDATION PHASE: Loss: 190.7534
INFO: Early stopping counter 5 of 7
EPOCH time taken = 0.024891376495361328 sec
Learning Rate 7.8125e-06
Epoch 369/999
----------------------------------------
TRAIN PHASE: Loss: 197.4366
VALIDATION PHASE: Loss: 190.7450
EPOCH time taken = 0.02493429183959961 sec
Learning Rate 7.8125e-06
Epoch 370/999
----------------------------------------
TRAIN PHASE: Loss: 197.4289
VALIDATION PHASE: Loss: 190.7366
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.023935317993164062 sec
Learning Rate

VALIDATION PHASE: Loss: 190.4641
INFO: Early stopping counter 7 of 7
INFO: Early stopping
EARLY STOPPING
EPOCH time taken = 0.024906396865844727 sec
Learning Rate 3.90625e-06
*******EARLY STOPPING*******
Finished Training


In [18]:
def evaluator(model, dataloader):
    correct = 0
    incorrect = 0
    for i_batch, data in enumerate(dataloader):
        features, target = data['features'], data['targets'].detach().numpy()
        batch_output = model(features)
        for idx in range(batch_size):
            output = batch_output[idx].cpu().detach().numpy()
            op = np.zeros(3, dtype=np.float32)
            op[np.argmax(output)] = 1.0
            #print(output, op, target[idx])
            #print(print(type(target[idx])))
            if np.array_equal(target[idx], op):
                correct += 1
            else:
                incorrect += 1
    print("correct =", correct, "incorrect =", incorrect, "Total =", correct+incorrect)

In [19]:
evaluator(model_ft, test_dataloader)

correct = 23 incorrect = 19 Total = 42


In [20]:
evaluator(model_ft, val_dataloader)

correct = 33 incorrect = 9 Total = 42


In [21]:
evaluator(model_ft, train_dataloader)

correct = 84 incorrect = 42 Total = 126
