In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv('./modified_seeds_dataset.txt', sep="\t")
inp_features = ["area_A","perimeter_P","compactness(4*pi*A/P^2)","kernel_length","kernel_width",
                           "asymmetry_coefficient", "kernel_groove_length"] #["f1", "f2", "f3", "f4", "f5", "f6", "f7"]
df.head()

Unnamed: 0,area_A,perimeter_P,compactness(4*pi*A/P^2),kernel_length,kernel_width,asymmetry_coefficient,kernel_groove_length,type
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1


In [3]:
inp_features_dict = {}
for i in inp_features:
    inp_features_dict[i] = {"range" : float(df[[i]].max() - df[[i]].min()) }

print(inp_features_dict)

{'area_A': {'range': 10.59}, 'perimeter_P': {'range': 4.84}, 'compactness(4*pi*A/P^2)': {'range': 0.11019999999999996}, 'kernel_length': {'range': 1.7759999999999998}, 'kernel_width': {'range': 1.4030000000000005}, 'asymmetry_coefficient': {'range': 7.690899999999999}, 'kernel_groove_length': {'range': 2.0309999999999997}}


In [4]:
scaled_df = df[inp_features].apply(lambda x:x/(x.max() - x.min()))
scaled_df = pd.concat([scaled_df, df[["type"]]], axis = 1)
scaled_df.head()

Unnamed: 0,area_A,perimeter_P,compactness(4*pi*A/P^2),kernel_length,kernel_width,asymmetry_coefficient,kernel_groove_length,type
0,1.440982,3.066116,7.903811,3.244932,2.360656,0.288783,2.570162,1
1,1.405099,3.010331,7.995463,3.127252,2.375624,0.132364,2.440177,1
2,1.349386,2.911157,8.212341,2.979167,2.378475,0.350934,2.375677,1
3,1.306893,2.880165,8.126134,2.997748,2.408411,0.293724,2.36583,1
4,1.524079,3.097107,8.197822,3.185811,2.538845,0.176182,2.548006,1


In [5]:
from sklearn.utils import shuffle
scaled_df = shuffle(scaled_df)
scaled_df.head()

Unnamed: 0,area_A,perimeter_P,compactness(4*pi*A/P^2),kernel_length,kernel_width,asymmetry_coefficient,kernel_groove_length,type
7,1.332389,2.913223,8.086207,3.051802,2.353528,0.351064,2.461841,1
18,1.388102,2.93595,8.305808,2.930743,2.470421,0.229752,2.28902,1
184,1.217186,2.845041,7.750454,3.094032,2.156807,0.804197,2.61743,3
19,1.201133,2.803719,7.882033,2.942568,2.1732,0.533358,2.419498,1
87,1.792257,3.442149,7.794918,3.6875,2.615823,0.479918,3.199409,2


In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test_val, y_train, y_test_val = train_test_split(scaled_df[inp_features], 
                                                            scaled_df[["type"]], train_size=0.6, shuffle=False)
x_test, x_val, y_test, y_val = train_test_split(x_test_val, y_test_val, test_size=0.5, shuffle=False)
print(len(x_train), len(y_train), len(x_test), len(y_test), len(x_val), len(y_val))

126 126 42 42 42 42


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
class my_model(nn.Module):
    '''A simple model with 1 hidden layer. 7,10 and 3 neurons in input, hidden and output layer respectively.'''
    def __init__(self): 
        super(my_model, self).__init__()
        self.input_layer = nn.Linear(in_features=7, out_features=10)
        self.output_layer = nn.Linear(in_features=10, out_features=3)
    
    def forward(self, x1):
        output = self.output_layer(F.relu(self.input_layer(x1)))
        return output
    
model = my_model()  
summary(model, (1,7))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 10]              80
            Linear-2                 [-1, 1, 3]              33
Total params: 113
Trainable params: 113
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [9]:
from torch.utils.data import Dataset

class MyDataFetcher(Dataset):    
    targets_dict = {1:[1,0,0], 2:[0,1,0], 3:[0,0,1]}
    def __init__(self, feature_cols, target_col):
        self.feature_cols = feature_cols
        self.target_col = target_col
    def __len__(self):
        return len(self.target_col)
    def __getitem__(self, idx):
        target = int(self.target_col.iloc[idx])
        target_list = torch.Tensor(self.targets_dict[target])
        features = self.feature_cols.iloc[idx]
        features = torch.FloatTensor(features)
        sample = {'features':features, 'targets':target_list}
        #print(sample)
        return sample

In [10]:
train_dataset = MyDataFetcher(feature_cols=x_train, target_col=y_train)
val_dataset = MyDataFetcher(feature_cols=x_val, target_col=y_val)
test_dataset = MyDataFetcher(feature_cols=x_test, target_col=y_test)
batch_size = 21

from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
def visualize_sample_batch(data):
    features, target = data['features'], data['targets']
    for i in range(3):
        print(features[i], target[i])

In [12]:
for i_batch, data in enumerate(train_dataloader):
    visualize_sample_batch(data)
    break

tensor([1.3324, 2.9132, 8.0862, 3.0518, 2.3535, 0.3511, 2.4618]) tensor([1., 0., 0.])
tensor([1.3881, 2.9360, 8.3058, 2.9307, 2.4704, 0.2298, 2.2890]) tensor([1., 0., 0.])
tensor([1.2172, 2.8450, 7.7505, 3.0940, 2.1568, 0.8042, 2.6174]) tensor([0., 0., 1.])


In [13]:
class EarlyStopping():
    """
    Early stopping to stop the training when the validation loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, val_loss):
        if self.best_loss == None:
            self.best_loss = val_loss
        elif self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            # reset counter if validation loss improves
            self.counter = 0
        elif self.best_loss - val_loss < self.min_delta:
            self.counter += 1
            print(f"INFO: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                print('INFO: Early stopping')
                self.early_stop = True



In [14]:
import torch.optim as optim
from torch.optim import lr_scheduler

criterion = torch.nn.MSELoss(reduction='sum')
#optimizer = optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, 
#                          centered=False)

optimizer = optim.Adam(model.parameters())
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

In [15]:
xdataloaders = {'train': train_dataloader,
                'val' : val_dataloader}
xdataset_sizes = {'train':len(train_dataloader),
                 'val': len(val_dataloader)}
def get_lr(optimizer):
    '''Get Current Learning Rate'''
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [16]:
import copy, time
from torch.utils.tensorboard import SummaryWriter

early_stopping = EarlyStopping(patience = 7, min_delta=0.05)
def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    writer = SummaryWriter("./tensorboard_events")
    best_acc = 0.0
    flag_early_stop = False
    for epoch in range(num_epochs):
        t1 = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 40)
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for idx, xdata in enumerate(xdataloaders[phase], 0):
                inputs = xdata['features'] 
                targets = xdata['targets']
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward propagate
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                #running_corrects += torch.sum(outputs == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / xdataset_sizes[phase]
            #epoch_acc = running_corrects.double() / xdataset_sizes[phase]
            if phase == 'train':
                print('TRAIN PHASE: Loss: {:.4f}'.format(epoch_loss))
                writer.add_scalar("Loss/train", epoch_loss, epoch)
            elif phase == 'val':
                print('VALIDATION PHASE: Loss: {:.4f}'.format(epoch_loss))
                early_stopping(epoch_loss)
                writer.add_scalar("Loss/val", epoch_loss, epoch)
                if early_stopping.early_stop:
                    print('EARLY STOPPING')
                    flag_early_stop = True
                    break
            # deep copy the model
            # if phase == 'val' and epoch_acc > best_acc:
            #     best_acc = epoch_acc
            #     best_model_wts = copy.deepcopy(wpod_model.state_dict())
        t2 = time.time()
        print('EPOCH time taken =', t2 - t1, 'sec')
        print('Learning Rate', get_lr(optimizer))
        if flag_early_stop:
            print('*******EARLY STOPPING*******')
            break
        #fil1.writelines(str(epoch)+','+str(epoch_loss)+','+'\n')
        #writer.add_scalar("Loss/train", epoch_loss, epoch)
        #writer.add_scalar("Accuracy/train", epoch_acc, epoch)

    #print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    #wpod_model.load_state_dict(best_model_wts)
    print('Finished Training')
    writer.flush()
    writer.close()
    return model

In [17]:
model_ft = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=600)

Epoch 0/599
----------------------------------------
TRAIN PHASE: Loss: 931.1829
VALIDATION PHASE: Loss: 790.7601
EPOCH time taken = 0.02193164825439453 sec
Learning Rate 0.001
Epoch 1/599
----------------------------------------
TRAIN PHASE: Loss: 670.6293
VALIDATION PHASE: Loss: 578.1988
EPOCH time taken = 0.019947052001953125 sec
Learning Rate 0.001
Epoch 2/599
----------------------------------------
TRAIN PHASE: Loss: 507.0470
VALIDATION PHASE: Loss: 446.6767
EPOCH time taken = 0.018949031829833984 sec
Learning Rate 0.001
Epoch 3/599
----------------------------------------
TRAIN PHASE: Loss: 400.6880
VALIDATION PHASE: Loss: 374.4233
EPOCH time taken = 0.020946741104125977 sec
Learning Rate 0.001
Epoch 4/599
----------------------------------------
TRAIN PHASE: Loss: 343.2882
VALIDATION PHASE: Loss: 344.5700
EPOCH time taken = 0.02094125747680664 sec
Learning Rate 0.001
Epoch 5/599
----------------------------------------
TRAIN PHASE: Loss: 320.9739
VALIDATION PHASE: Loss: 334.796

TRAIN PHASE: Loss: 217.2606
VALIDATION PHASE: Loss: 227.8514
EPOCH time taken = 0.021942615509033203 sec
Learning Rate 0.0005
Epoch 55/599
----------------------------------------
TRAIN PHASE: Loss: 216.3383
VALIDATION PHASE: Loss: 226.8834
EPOCH time taken = 0.019945144653320312 sec
Learning Rate 0.0005
Epoch 56/599
----------------------------------------
TRAIN PHASE: Loss: 215.4228
VALIDATION PHASE: Loss: 225.9290
EPOCH time taken = 0.018949270248413086 sec
Learning Rate 0.0005
Epoch 57/599
----------------------------------------
TRAIN PHASE: Loss: 214.5068
VALIDATION PHASE: Loss: 224.9793
EPOCH time taken = 0.019946813583374023 sec
Learning Rate 0.0005
Epoch 58/599
----------------------------------------
TRAIN PHASE: Loss: 213.5904
VALIDATION PHASE: Loss: 224.0287
EPOCH time taken = 0.018949031829833984 sec
Learning Rate 0.0005
Epoch 59/599
----------------------------------------
TRAIN PHASE: Loss: 212.6775
VALIDATION PHASE: Loss: 223.0776
EPOCH time taken = 0.01894974708557129 

TRAIN PHASE: Loss: 175.8762
VALIDATION PHASE: Loss: 185.0127
EPOCH time taken = 0.020943880081176758 sec
Learning Rate 0.00025
Epoch 110/599
----------------------------------------
TRAIN PHASE: Loss: 175.5229
VALIDATION PHASE: Loss: 184.6439
EPOCH time taken = 0.018949508666992188 sec
Learning Rate 0.00025
Epoch 111/599
----------------------------------------
TRAIN PHASE: Loss: 175.1700
VALIDATION PHASE: Loss: 184.2742
EPOCH time taken = 0.019946575164794922 sec
Learning Rate 0.00025
Epoch 112/599
----------------------------------------
TRAIN PHASE: Loss: 174.8177
VALIDATION PHASE: Loss: 183.9042
EPOCH time taken = 0.018949508666992188 sec
Learning Rate 0.00025
Epoch 113/599
----------------------------------------
TRAIN PHASE: Loss: 174.4664
VALIDATION PHASE: Loss: 183.5349
EPOCH time taken = 0.018949031829833984 sec
Learning Rate 0.00025
Epoch 114/599
----------------------------------------
TRAIN PHASE: Loss: 174.1162
VALIDATION PHASE: Loss: 183.1672
EPOCH time taken = 0.01894903

TRAIN PHASE: Loss: 160.0137
VALIDATION PHASE: Loss: 168.5248
EPOCH time taken = 0.01994013786315918 sec
Learning Rate 0.000125
Epoch 165/599
----------------------------------------
TRAIN PHASE: Loss: 159.8599
VALIDATION PHASE: Loss: 168.3577
EPOCH time taken = 0.019954919815063477 sec
Learning Rate 0.000125
Epoch 166/599
----------------------------------------
TRAIN PHASE: Loss: 159.7064
VALIDATION PHASE: Loss: 168.1934
EPOCH time taken = 0.01994609832763672 sec
Learning Rate 0.000125
Epoch 167/599
----------------------------------------
TRAIN PHASE: Loss: 159.5533
VALIDATION PHASE: Loss: 168.0314
EPOCH time taken = 0.01894974708557129 sec
Learning Rate 0.000125
Epoch 168/599
----------------------------------------
TRAIN PHASE: Loss: 159.4006
VALIDATION PHASE: Loss: 167.8708
EPOCH time taken = 0.019947528839111328 sec
Learning Rate 0.000125
Epoch 169/599
----------------------------------------
TRAIN PHASE: Loss: 159.2482
VALIDATION PHASE: Loss: 167.7113
EPOCH time taken = 0.018945

TRAIN PHASE: Loss: 153.2401
VALIDATION PHASE: Loss: 161.4685
EPOCH time taken = 0.020946979522705078 sec
Learning Rate 6.25e-05
Epoch 220/599
----------------------------------------
TRAIN PHASE: Loss: 153.1680
VALIDATION PHASE: Loss: 161.3920
EPOCH time taken = 0.018952608108520508 sec
Learning Rate 6.25e-05
Epoch 221/599
----------------------------------------
TRAIN PHASE: Loss: 153.0960
VALIDATION PHASE: Loss: 161.3156
EPOCH time taken = 0.019947052001953125 sec
Learning Rate 6.25e-05
Epoch 222/599
----------------------------------------
TRAIN PHASE: Loss: 153.0241
VALIDATION PHASE: Loss: 161.2392
EPOCH time taken = 0.018944978713989258 sec
Learning Rate 6.25e-05
Epoch 223/599
----------------------------------------
TRAIN PHASE: Loss: 152.9521
VALIDATION PHASE: Loss: 161.1629
EPOCH time taken = 0.01994490623474121 sec
Learning Rate 6.25e-05
Epoch 224/599
----------------------------------------
TRAIN PHASE: Loss: 152.8803
VALIDATION PHASE: Loss: 161.0867
EPOCH time taken = 0.0189

TRAIN PHASE: Loss: 150.5558
VALIDATION PHASE: Loss: 158.6694
EPOCH time taken = 0.020942211151123047 sec
Learning Rate 3.125e-05
Epoch 264/599
----------------------------------------
TRAIN PHASE: Loss: 150.5204
VALIDATION PHASE: Loss: 158.6317
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.018950700759887695 sec
Learning Rate 3.125e-05
Epoch 265/599
----------------------------------------
TRAIN PHASE: Loss: 150.4851
VALIDATION PHASE: Loss: 158.5941
EPOCH time taken = 0.019945859909057617 sec
Learning Rate 3.125e-05
Epoch 266/599
----------------------------------------
TRAIN PHASE: Loss: 150.4497
VALIDATION PHASE: Loss: 158.5564
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.018954992294311523 sec
Learning Rate 3.125e-05
Epoch 267/599
----------------------------------------
TRAIN PHASE: Loss: 150.4143
VALIDATION PHASE: Loss: 158.5187
EPOCH time taken = 0.019941329956054688 sec
Learning Rate 3.125e-05
Epoch 268/599
----------------------------------------
TRAIN PH

TRAIN PHASE: Loss: 149.1186
VALIDATION PHASE: Loss: 157.1788
INFO: Early stopping counter 2 of 7
EPOCH time taken = 0.01993584632873535 sec
Learning Rate 1.5625e-05
Epoch 308/599
----------------------------------------
TRAIN PHASE: Loss: 149.1009
VALIDATION PHASE: Loss: 157.1601
EPOCH time taken = 0.01995372772216797 sec
Learning Rate 1.5625e-05
Epoch 309/599
----------------------------------------
TRAIN PHASE: Loss: 149.0832
VALIDATION PHASE: Loss: 157.1414
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.019953489303588867 sec
Learning Rate 1.5625e-05
Epoch 310/599
----------------------------------------
TRAIN PHASE: Loss: 149.0656
VALIDATION PHASE: Loss: 157.1226
INFO: Early stopping counter 2 of 7
EPOCH time taken = 0.018941640853881836 sec
Learning Rate 1.5625e-05
Epoch 311/599
----------------------------------------
TRAIN PHASE: Loss: 149.0479
VALIDATION PHASE: Loss: 157.1038
EPOCH time taken = 0.01895284652709961 sec
Learning Rate 1.5625e-05
Epoch 312/599
-----------

TRAIN PHASE: Loss: 148.3454
VALIDATION PHASE: Loss: 156.3720
EPOCH time taken = 0.020938634872436523 sec
Learning Rate 7.8125e-06
Epoch 352/599
----------------------------------------
TRAIN PHASE: Loss: 148.3365
VALIDATION PHASE: Loss: 156.3625
INFO: Early stopping counter 1 of 7
EPOCH time taken = 0.018954992294311523 sec
Learning Rate 7.8125e-06
Epoch 353/599
----------------------------------------
TRAIN PHASE: Loss: 148.3276
VALIDATION PHASE: Loss: 156.3532
INFO: Early stopping counter 2 of 7
EPOCH time taken = 0.019946575164794922 sec
Learning Rate 7.8125e-06
Epoch 354/599
----------------------------------------
TRAIN PHASE: Loss: 148.3187
VALIDATION PHASE: Loss: 156.3438
INFO: Early stopping counter 3 of 7
EPOCH time taken = 0.01995086669921875 sec
Learning Rate 7.8125e-06
Epoch 355/599
----------------------------------------
TRAIN PHASE: Loss: 148.3098
VALIDATION PHASE: Loss: 156.3344
INFO: Early stopping counter 4 of 7
EPOCH time taken = 0.018944263458251953 sec
Learning Rat

TRAIN PHASE: Loss: 147.9533
VALIDATION PHASE: Loss: 155.9551
INFO: Early stopping counter 2 of 7
EPOCH time taken = 0.02094268798828125 sec
Learning Rate 7.8125e-06
Epoch 396/599
----------------------------------------
TRAIN PHASE: Loss: 147.9444
VALIDATION PHASE: Loss: 155.9455
INFO: Early stopping counter 3 of 7
EPOCH time taken = 0.018947601318359375 sec
Learning Rate 7.8125e-06
Epoch 397/599
----------------------------------------
TRAIN PHASE: Loss: 147.9354
VALIDATION PHASE: Loss: 155.9360
INFO: Early stopping counter 4 of 7
EPOCH time taken = 0.019947528839111328 sec
Learning Rate 7.8125e-06
Epoch 398/599
----------------------------------------
TRAIN PHASE: Loss: 147.9265
VALIDATION PHASE: Loss: 155.9264
INFO: Early stopping counter 5 of 7
EPOCH time taken = 0.018949508666992188 sec
Learning Rate 7.8125e-06
Epoch 399/599
----------------------------------------
TRAIN PHASE: Loss: 147.9175
VALIDATION PHASE: Loss: 155.9169
EPOCH time taken = 0.019947290420532227 sec
Learning Rat

In [18]:
def evaluator(model, dataloader):
    correct = 0
    incorrect = 0
    for i_batch, data in enumerate(dataloader):
        features, target = data['features'], data['targets'].detach().numpy()
        batch_output = model(features)
        for idx in range(batch_size):
            output = batch_output[idx].cpu().detach().numpy()
            op = np.zeros(3, dtype=np.float32)
            op[np.argmax(output)] = 1.0
            #print(output, op, target[idx])
            #print(print(type(target[idx])))
            if np.array_equal(target[idx], op):
                correct += 1
            else:
                incorrect += 1
    print("correct =", correct, "incorrect =", incorrect, "Total =", correct+incorrect)

In [19]:
evaluator(model_ft, test_dataloader)

correct = 35 incorrect = 7 Total = 42


In [20]:
evaluator(model_ft, val_dataloader)

correct = 39 incorrect = 3 Total = 42


In [21]:
evaluator(model_ft, train_dataloader)

correct = 113 incorrect = 13 Total = 126


In [23]:
evaluate_df = pd.concat([x_test, y_test], axis = 1)
evaluate_df.head()

Unnamed: 0,area_A,perimeter_P,compactness(4*pi*A/P^2),kernel_length,kernel_width,asymmetry_coefficient,kernel_groove_length,type
31,1.462701,3.086777,7.916515,3.241554,2.402708,0.443641,2.574101,1
155,1.056657,2.696281,7.489111,2.956081,1.906629,0.755828,2.56967,3
101,1.698772,3.27686,8.15971,3.316441,2.632929,0.268889,2.873954,2
165,1.142587,2.716942,7.979129,2.874437,2.096222,0.286182,2.489414,3
24,1.417375,3.049587,7.855717,3.259572,2.312901,0.232873,2.462334,1


In [24]:
evaluate_df.to_csv('test.csv')

In [25]:
torch.save(model_ft.state_dict(), "./wheat_model_mse.pth")