In [1]:
import torch
import numpy as np
from random import shuffle
from sklearn import datasets
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, f1_score

## Exercise 2: End-to-End Self-Driving via Convolutional Neural Net- works 

In [7]:
import os
import zipfile
from skimage import io

#uploading data
data_name = 'driving_dataset'
data_dir = os.listdir(data_name)
#data description is in txt file so read it
txt_file = [i for i in data_dir if i.find('.txt') >= 0]
txt_file = txt_file[0]

with open(os.path.join(data_name, txt_file), 'r') as f1:
    markimg = f1.readlines()
mark = [i[:-2] for i in mark]
#In each line there are two words separated by blank. First word is img name and second is target variable value
dataset = [i.split(' ') for i in mark]

data_train_val = dataset[:-10000]
data_train = data_train_val[:int(len(data_train_val)*0.8)]
data_val = data_train_val[int(len(data_train_val)*0.8):]
data_test = dataset[-10000:]

learning on full dataset is very very very very very long (especially random search later), so it would be better to reduce data size

In [8]:
data_train = data_train[:min(len(data_train), 3000)]
data_val = data_val[:min(len(data_val), 1000)]
data_test = data_test[:min(len(data_test), 1000)]

In [9]:
#If cuda is not available, training on full data might be vary long process
#So if cuda is not available, we reduce size of datasets to reduce time of learning
if not(torch.cuda.is_available()):
    print('Cuda is not available so it would be better to reduce size of datasets')
    data_train = data_train[:min(len(data_train), 1000)]
    data_val = data_val[:min(len(data_val), 1000)]
    data_test = data_test[:min(len(data_test), 1000)]

In [10]:
torch.cuda.is_available()

True

4.	Implement the Convolutional Neural Network Architecture proposed in the paper titled, "End to End Learning for Self-Driving Cars". The paper can be accessed here: https://arxiv.org/abs/1604.07316

In [11]:
import torch.nn as nn
#creating neural network class
#Data in dataset and data in the paper has different img sizes so last two convolutions were made with stride 2 to reduce size
class NetRegressor(nn.Module):
    def __init__(self):
        super(NetRegressor, self).__init__()
        #self.input_norm = nn.LayerNorm()
        #convolutional layers
        self.layer1 = nn.Conv2d(in_channels = 3, out_channels = 24, kernel_size = 5, stride = 2)
        self.layer2 = nn.Conv2d(in_channels = 24, out_channels = 36, kernel_size = 5, stride = 2) 
        self.layer3 = nn.Conv2d(in_channels = 36, out_channels = 48, kernel_size = 5, stride = 2)
        self.layer4 = nn.Conv2d(in_channels = 48, out_channels = 64, kernel_size = 3, stride = 2)
        self.layer5 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride = 2)
        
        #self.flat = nn.Flatten()
        #linear layers
        self.lin1 = nn.Linear(4608, 100)
        self.lin2 = nn.Linear(100, 50)
        self.lin3 = nn.Linear(50, 10)
        self.lin4 = nn.Linear(10, 1)
    
    def forward(self, x):
        #x = self.input_norm(x)
        x = (x-x.mean())/x.std() #Do normalization
        #apply convolutional layers
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)
        x5 = self.layer5(x4)
        #x5 = self.flat(x5)
        #x5 = torch.flatten(x5, start_dim = 2)
        x5 = x5.view(x5.size(0), -1) #flatten
        #Then apply linear layers
        x6 = self.lin1(x5) 
        x7 = self.lin2(x6)
        x8 = self.lin3(x7)
        out = self.lin4(x8)
        return out

In [12]:
import torch.utils.data
batch_size = 10
n_epoch = 5
train_err_list = []
test_err_list = []
#Make dataloaders that will split data to batches for learning
trainLoader = torch.utils.data.DataLoader(data_train, batch_size=batch_size,
                                          shuffle=True, num_workers=1)
valLoader = torch.utils.data.DataLoader(data_val, batch_size=batch_size,
                                         shuffle=False, num_workers=1)
#Check if cuda is available. If it is, then we will do all learning on gpu to make it much faster
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

RegNet = NetRegressor()
RegNet.to(device)
lossMSE = nn.MSELoss(reduce = True, reduction = 'mean')
optimizer = torch.optim.Adam(RegNet.parameters(), lr=0.01, weight_decay = 0.1)

for epoch in range(n_epoch):
    print('Epoch number {}'.format(epoch))
    print('Number of batches is {}'.format(int(len(data_val)/batch_size)))
    train_losses = []
    RegNet.train() #Change neural network's mode to training mode
    for i, d in enumerate(trainLoader):
        #load data and make it right shape (batch size, number of channels, shape0, shape1)
        in_data = np.array([io.imread(os.path.join(data_name, name)) for name in d[0]])
        in_data = np.swapaxes(in_data, 1, 3)
        #if gpu is available, learning will be on gpu
        if device.type == 'cuda':
            in_data = torch.cuda.FloatTensor(in_data)
            target_data = np.array([float(i) for i in d[1]])
            target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
        else:
            in_data = torch.cuda.FloatTensor(in_data)
            target_data = np.array([float(i) for i in d[1]])
            target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
        optimizer.zero_grad()
        outputs = RegNet.forward(in_data)
        loss = lossMSE(outputs, target_data)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.cpu().item())
    train_losses = np.mean(train_losses)
    train_err_list.append(train_losses)
    print('Loss_train: {}'.format(train_losses))
    
    test_losses = []
    RegNet.eval()
    for i, d in enumerate(valLoader):
        in_data = np.array([io.imread(os.path.join(data_name, name)) for name in d[0]])
        in_data = np.swapaxes(in_data, 1, 3)
        if device.type == 'cuda':
            in_data = torch.cuda.FloatTensor(in_data)
            target_data = np.array([float(i) for i in d[1]])
            target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
        else:
            in_data = torch.cuda.FloatTensor(in_data)
            target_data = np.array([float(i) for i in d[1]])
            target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
        outputs = RegNet.forward(in_data)
        loss = lossMSE(outputs, target_data)
        test_losses.append(loss.cpu().item())
    
    test_losses = np.mean(test_losses)
    test_err_list.append(test_losses)
    print('Loss_test: {}'.format(test_losses))

print('Finished Training')



Epoch number 0
Number of batches is 100
Loss_train: 4149020.5038834633
Loss_test: 3118.5548056650164
Epoch number 1
Number of batches is 100
Loss_train: 4190.006430358887
Loss_test: 2178.7175324058535
Epoch number 2
Number of batches is 100
Loss_train: 3724.19593943278
Loss_test: 1432.921285095215
Epoch number 3
Number of batches is 100
Loss_train: 3463.870445696513
Loss_test: 3035.1163006591796
Epoch number 4
Number of batches is 100
Loss_train: 3225.475763804118
Loss_test: 3866.173816833496
Finished Training


5.	Report one test RMSE for the test set of images.

In [13]:
final_losses = []
for img, target in data_test:
    in_data = np.array([io.imread(os.path.join(data_name, img))])
    in_data = np.swapaxes(in_data, 1, 3)
    if device.type == 'cuda':
        in_data = torch.cuda.FloatTensor(in_data)
        target_data = torch.cuda.FloatTensor(np.array([[float(target)]]))
    else:
        in_data = torch.Tensor(in_data)
        target_data = torch.FloatTensor(np.array([[float(target)]]))
    outputs = RegNet.forward(in_data)
    loss = lossMSE(outputs, target_data)
    final_losses.append(np.sqrt(loss.cpu().item()))

print('Final RMSE loss is {}'.format(np.mean(final_losses)))

Final RMSE loss is 39.39970518967743


Hyperparameter Tuning, Regularization with Image Transformations 
The aim of this exercise is to further develop modifications on top, that can hopefully lead to performance gains over the architecture from the previous exercise.
1.	Tune the associated hyperparameters such as batch_size, number_of_layers, kernel_sizes, learn- ing_rate, l1_regularization, l2_regularization coefficients etc. Either implement Random Search or Hyperband.

In [14]:
import torch.nn as nn
#creating neural network class
#Data in dataset and data in the paper has different img sizes so last two convolutions were made with stride 2 to reduce size
class VariousNet(nn.Module):
    def __init__(self, n_layers = 5, kernel_sizes = 5, input_ch = 3, img_size = (256, 455)):
        super(VariousNet, self).__init__()
        
        if n_layers <= 0:
            raise "Number of layers must be positive integer!"
        
        n_lin_neurons1 = img_size[0]
        n_lin_neurons2 = img_size[1]
        n_lin_neurons3 = input_ch
        
        self.hidden = nn.ModuleList([])
        
        
        if isinstance(kernel_sizes, int):
            for i in range(n_layers):
                if (int((n_lin_neurons1 - kernel_sizes)/2 + 1) <= 0) or (int((n_lin_neurons2 - kernel_sizes)/2 + 1) <= 0):
                    print("You have chosen too much layers. Set number of layers to maximum equal " + str(i))
                    break
                
                if i == 0:
                    layer_to_add = nn.Conv2d(in_channels = input_ch, out_channels = 24,
                                             kernel_size = int(kernel_sizes), stride = 2)
                    n_lin_neurons3 = 24
                elif i == 1:
                    layer_to_add = nn.Conv2d(in_channels = 24, out_channels = 36,
                                             kernel_size = int(kernel_sizes), stride = 2)
                    n_lin_neurons3 = 36
                elif i == 2:
                    layer_to_add = nn.Conv2d(in_channels = 36, out_channels = 48,
                                             kernel_size = int(kernel_sizes), stride = 2)
                    n_lin_neurons3 = 48
                elif i == 3:
                    layer_to_add = nn.Conv2d(in_channels = 48, out_channels = 64,
                                             kernel_size = int(kernel_sizes), stride = 2)
                    n_lin_neurons3 = 64
                else:
                    layer_to_add = nn.Conv2d(in_channels = 64, out_channels = 64,
                                             kernel_size = int(kernel_sizes), stride = 2)
                    n_lin_neurons3 = 64
                
                n_lin_neurons1 = int((n_lin_neurons1 - kernel_sizes)/2 + 1)
                n_lin_neurons2 = int((n_lin_neurons2 - kernel_sizes)/2 + 1)
                
                self.hidden.append(layer_to_add)
                
        elif isinstance(kernel_sizes, list):
            self.hidden = nn.ModuleList()
            for i in range(n_layers):
                
                if (int((n_lin_neurons1 - kernel_sizes[i])/2 + 1) <= 0) or (int((n_lin_neurons2 - kernel_sizes[i])/2 + 1) <= 0):
                    print("You have chosen too much layers. Set number of layers to maximum equal " + str(i))
                    break
                
                
                if i == 0:
                    layer_to_add = nn.Conv2d(in_channels = input_ch, out_channels = 24,
                                             kernel_size = int(kernel_sizes[i]), stride = 2)
                    n_lin_neurons3 = 24
                elif i == 1:
                    layer_to_add = nn.Conv2d(in_channels = 24, out_channels = 36,
                                             kernel_size = int(kernel_sizes[i]), stride = 2)
                    n_lin_neurons3 = 36
                elif i == 2:
                    layer_to_add = nn.Conv2d(in_channels = 36, out_channels = 48,
                                             kernel_size = int(kernel_sizes[i]), stride = 2)
                    n_lin_neurons3 = 48
                elif i == 3:
                    layer_to_add = nn.Conv2d(in_channels = 48, out_channels = 64,
                                             kernel_size = int(kernel_sizes[i]), stride = 2)
                    n_lin_neurons3 = 64
                else:
                    layer_to_add = nn.Conv2d(in_channels = 64, out_channels = 64,
                                             kernel_size = int(kernel_sizes[i]), stride = 2)
                    n_lin_neurons3 = 64
                
                n_lin_neurons1 = int((n_lin_neurons1 - kernel_sizes[i])/2 + 1)
                n_lin_neurons2 = int((n_lin_neurons2 - kernel_sizes[i])/2 + 1)
                    
                
                self.hidden.append(layer_to_add)
        
        #self.flat = nn.Flatten()
        #linear layers
        self.lin1 = nn.Linear(n_lin_neurons1*n_lin_neurons2*n_lin_neurons3, 100)
        self.lin2 = nn.Linear(100, 50)
        self.lin3 = nn.Linear(50, 10)
        self.lin4 = nn.Linear(10, 1)
        
    def forward(self, x):
        #x = self.input_norm(x)
        x = (x-x.mean())/x.std() #Do normalization
        #apply convolutional layers
        out_conv = x
        for i, conv1 in enumerate(self.hidden):
            out_conv = self.hidden[i](out_conv)
        out_conv = out_conv.view(out_conv.size(0), -1) #flatten
        #Then apply linear layers
        x1 = self.lin1(out_conv) 
        x2 = self.lin2(x1)
        x3 = self.lin3(x2)
        out = self.lin4(x3)
        return out


In [15]:
def learning_process(params, data_train, data_val, n_epoch = 5):
    #Make dataloaders that will split data to batches for learning
    trainLoader = torch.utils.data.DataLoader(data_train, batch_size= int(params.get('batch_size', 5)),
                                              shuffle=True, num_workers=1)
    valLoader = torch.utils.data.DataLoader(data_val, batch_size=int(params.get('batch_size', 5)),
                                             shuffle=False, num_workers=1)
    #Check if cuda is available. If it is, then we will do all learning on gpu to make it much faster
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    n_layers = int(params.get('n_layers', 5))
    kernel_sizes = params.get('kernel_sizes', 3)
    if not isinstance(kernel_sizes, list):
        kernel_sizes = int(kernel_sizes)
    
    RegNet = VariousNet(n_layers = n_layers, kernel_sizes = kernel_sizes)
    RegNet.to(device)
   
    lossMSE = nn.MSELoss(reduce = True, reduction = 'mean')
    optimizer = torch.optim.Adam(RegNet.parameters(), lr = params.get('learning_rate', 0.01),
                                 weight_decay = params.get('l2_regularization', 0.1))

    for epoch in range(n_epoch):
        print('Epoch number {}'.format(epoch))
        #print('Number of batches is {}'.format(int(len(data_val)/batch_size)))
        train_losses = []
        RegNet.train() #Change neural network's mode to training mode
        for i, d in enumerate(trainLoader):
            #load data and make it right shape (batch size, number of channels, shape0, shape1)
            in_data = np.array([io.imread(os.path.join(data_name, name)) for name in d[0]])
            in_data = np.swapaxes(in_data, 1, 3)
            #if gpu is available, learning will be on gpu
            if device.type == 'cuda':
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
            else:
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
            optimizer.zero_grad()
            outputs = RegNet.forward(in_data)
            loss = lossMSE(outputs, target_data)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.cpu().item())
        train_losses = np.mean(train_losses)
        train_err_list.append(train_losses)
        print('Loss_train: {}'.format(train_losses))
        
        test_losses = []
        RegNet.eval()
        for i, d in enumerate(valLoader):
            in_data = np.array([io.imread(os.path.join(data_name, name)) for name in d[0]])
            in_data = np.swapaxes(in_data, 1, 3)
            if device.type == 'cuda':
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
            else:
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
            outputs = RegNet.forward(in_data)
            loss = lossMSE(outputs, target_data)
            test_losses.append(loss.cpu().item())
    
        test_losses = np.mean(test_losses)
        test_err_list.append(test_losses)
        print('Loss_test: {}'.format(test_losses))

    print('Finished Training')
   
    return RegNet, test_err_list[-1]

In [16]:
def RandomizedSearchArchitecture(grid_of_params, data_train, data_test, n_iter = 3):
    grid_seaved = {key: np.random.choice(grid_of_params[key], n_iter, replace = True) for key in grid_of_params.keys()}
    search_combinations = [{key: grid_seaved[key][i] for key in grid_seaved.keys()} for i in range(n_iter)]
    best_model = VariousNet()
    best_result = np.inf
    
    for i, comb in enumerate(search_combinations):
        print("Combination number {}".format(i))
        net, err = learning_process(comb, data_train, data_test)
        if err < best_result:
            best_result = err
            best_model = net
    print('Search is finished!')
    return best_model, best_result

In [17]:
grid_of_params = {'batch_size': [3, 5, 7], 'kernel_sizes': [2, 3, 5], 'n_layers': [3, 4, 5], 
                  'learning_rate': [0.05, 0.01, 0.1], 'l2_regularization': [0.1, 0.05, 0.01]}

good_model, res = RandomizedSearchArchitecture(grid_of_params, data_train, data_test, n_iter = 3)

Combination number 0
Epoch number 0
Loss_train: 6.669118993123595e+18
Loss_test: 3365753343611596.0
Epoch number 1
Loss_train: 1291188053516269.0
Loss_test: 1558556580684062.5
Epoch number 2
Loss_train: 497056757585886.56
Loss_test: 716181173179771.5
Epoch number 3
Loss_train: 253119420216680.44
Loss_test: 575486993042568.0
Epoch number 4
Loss_train: 158732122908552.66
Loss_test: 424747524508006.06
Finished Training
Combination number 1
Epoch number 0
Loss_train: 4772694656908.317
Loss_test: 370849591.4446108
Epoch number 1
Loss_train: 213493608.6145625
Loss_test: 93760578.44535927
Epoch number 2
Loss_train: 74615164.96530664
Loss_test: 40981787.99038057
Epoch number 3
Loss_train: 34072581.944832034
Loss_test: 20726352.956972774
Epoch number 4
Loss_train: 12914517.424693359
Loss_test: 13511501.735790139
Finished Training
Combination number 2
Epoch number 0
Loss_train: 5960860639982616.0
Loss_test: 56467907033.01
Epoch number 1
Loss_train: 52251756429.22667
Loss_test: 18515321632.72
Epo

In [18]:
good_model

VariousNet(
  (hidden): ModuleList(
    (0): Conv2d(3, 24, kernel_size=(2, 2), stride=(2, 2))
    (1): Conv2d(24, 36, kernel_size=(2, 2), stride=(2, 2))
    (2): Conv2d(36, 48, kernel_size=(2, 2), stride=(2, 2))
  )
  (lin1): Linear(in_features=86016, out_features=100, bias=True)
  (lin2): Linear(in_features=100, out_features=50, bias=True)
  (lin3): Linear(in_features=50, out_features=10, bias=True)
  (lin4): Linear(in_features=10, out_features=1, bias=True)
)

2.	Implementing the regularization scheme named "Cutout" as proposed in the paper titled, "Improved Regularization of Convolutional Neural Networks with Cutout" (landing page here: https:// arxiv.org/abs/1708.04552).

In [19]:
def cutout(img, cutout_prob = 0.2):
    #img must have shape (n_channels, shape1, shape2)
    ToBeOrNotToBe = np.random.binomial(1, cutout_prob)
    if ToBeOrNotToBe == 1:
        shape1 = img.shape[0]
        shape2 = img.shape[1]
        x1 = np.random.randint(0, int(shape1*0.8))
        y1 = np.random.randint(0, int(shape2*0.8))
        x_shape = np.random.randint(1, int(shape1*0.2))
        y_shape = np.random.randint(1, int(shape2*0.2))
        img[x1:(x1+x_shape), y1:(y1+y_shape), :] = np.zeros((x_shape, y_shape, img.shape[2]))
    return img

def learning_process_with_cutout(params, data_train, data_val):
    #Make dataloaders that will split data to batches for learning
    trainLoader = torch.utils.data.DataLoader(data_train, batch_size=params.get('batch_size', 5),
                                              shuffle=True, num_workers=1)
    valLoader = torch.utils.data.DataLoader(data_val, batch_size=params.get('batch_size', 5),
                                             shuffle=False, num_workers=1)
    #Check if cuda is available. If it is, then we will do all learning on gpu to make it much faster
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    RegNet = VariousNet(n_layers = params.get('n_layers', 5), kernel_sizes = params.get('kernel_sizes', 3))
    RegNet.to(device)
    lossMSE = nn.MSELoss(reduce = True, reduction = 'mean')
    optimizer = torch.optim.Adam(RegNet.parameters(), lr = params.get('learning_rate', 0.01),
                                 weight_decay = params.get('l2_regularization', 0.1))

    for epoch in range(n_epoch):
        print('Epoch number {}'.format(epoch))
        #print('Number of batches is {}'.format(int(len(data_val)/batch_size)))
        train_losses = []
        RegNet.train() #Change neural network's mode to training mode
        for i, d in enumerate(trainLoader):
            #load data and make it right shape (batch size, number of channels, shape0, shape1)
            in_data = np.array([cutout(io.imread(os.path.join(data_name, name))) for name in d[0]])
            in_data = np.swapaxes(in_data, 1, 3)
            #if gpu is available, learning will be on gpu
            if device.type == 'cuda':
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
            else:
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
            optimizer.zero_grad()
            outputs = RegNet.forward(in_data)
            loss = lossMSE(outputs, target_data)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.cpu().item())
        train_losses = np.mean(train_losses)
        train_err_list.append(train_losses)
        print('Loss_train: {}'.format(train_losses))
        
        test_losses = []
        RegNet.eval()
        for i, d in enumerate(valLoader):
            in_data = np.array([cutout(io.imread(os.path.join(data_name, name))) for name in d[0]])
            in_data = np.swapaxes(in_data, 1, 3)
            if device.type == 'cuda':
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
            else:
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
            outputs = RegNet.forward(in_data)
            loss = lossMSE(outputs, target_data)
            test_losses.append(loss.cpu().item())
    
        test_losses = np.mean(test_losses)
        test_err_list.append(test_losses)
        print('Loss_test: {}'.format(test_losses))

    print('Finished Training')
    return RegNet, test_err_list[-1]

In [20]:
params = {'n_layers': 5, 'kernel_sizes': 3}
cutoutNet, cutoutErr = learning_process_with_cutout(params, data_train, data_val)

Epoch number 0
Loss_train: 7160.256997307539
Loss_test: 1713.0972942848502
Epoch number 1
Loss_train: 4162.889698725541
Loss_test: 3782.00986618042
Epoch number 2
Loss_train: 4591.108309500218
Loss_test: 1230.48572381258
Epoch number 3
Loss_train: 1.1843448315799107e+17
Loss_test: 12458505958778.88
Epoch number 4
Loss_train: 6246338604400.64
Loss_test: 9968887390146.56
Finished Training


3.	Implementing the regularization scheme titled, "MixUp", as proposed in the paper titled, "mixup: Be- yond Empirical Risk Minimization" (landing page here: https://arxiv.org/abs/1710. 09412).


In [21]:
def mixup(imgs, targets):
    coef = np.random.uniform(0, 1)
    img_res = imgs[0]*coef + imgs[1]*(1-coef)
    target_res = targets[0]*coef + targets[1]*(1-coef)
    return img_res, target_res

def learning_process_with_mixup(params, data_train, data_val):
    #Make dataloaders that will split data to batches for learning
    trainLoader = torch.utils.data.DataLoader(data_train, batch_size=params.get('batch_size', 5),
                                              shuffle=True, num_workers=1)
    valLoader = torch.utils.data.DataLoader(data_val, batch_size=params.get('batch_size', 5),
                                             shuffle=False, num_workers=1)
    #Check if cuda is available. If it is, then we will do all learning on gpu to make it much faster
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    RegNet = VariousNet(n_layers = params.get('n_layers', 5), kernel_sizes = params.get('kernel_sizes', 3))
    RegNet.to(device)
    lossMSE = nn.MSELoss(reduce = True, reduction = 'mean')
    optimizer = torch.optim.Adam(RegNet.parameters(), lr = params.get('learning_rate', 0.01),
                                 weight_decay = params.get('l2_regularization', 0.1))

    for epoch in range(n_epoch):
        print('Epoch number {}'.format(epoch))
        #print('Number of batches is {}'.format(int(len(data_val)/batch_size)))
        train_losses = []
        RegNet.train() #Change neural network's mode to training mode
        for i, d in enumerate(trainLoader):
            #load data and make it right shape (batch size, number of channels, shape0, shape1)
            in_data = np.array([cutout(io.imread(os.path.join(data_name, name))) for name in d[0]])
            in_data = np.swapaxes(in_data, 1, 3)
            
            target_data = np.array([float(i) for i in d[1]])
            
            mix1_ind = np.random.randint(0, len(target_data), int(len(target_data)*0.3))
            to_mixup1 = in_data[mix1_ind, :, :, :]
            to_mixup11 = target_data[mix1_ind]
            mix2_ind = np.random.randint(0, len(target_data), int(len(target_data)*0.3))
            to_mixup2 = in_data[mix2_ind, :, :, :]
            to_mixup21 = target_data[mix2_ind]
            
            new_data, new_target = mixup((to_mixup1, to_mixup2), (to_mixup11, to_mixup21))
            
            in_data = np.concatenate((in_data, new_data), axis = 0)
            target_data = np.concatenate((target_data, new_target), axis = 0)
            #if gpu is available, learning will be on gpu
            if device.type == 'cuda':
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
            else:
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
            optimizer.zero_grad()
            outputs = RegNet.forward(in_data)
            loss = lossMSE(outputs, target_data)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.cpu().item())
        train_losses = np.mean(train_losses)
        train_err_list.append(train_losses)
        print('Loss_train: {}'.format(train_losses))
        
        test_losses = []
        RegNet.eval()
        for i, d in enumerate(valLoader):
            in_data = np.array([io.imread(os.path.join(data_name, name)) for name in d[0]])
            in_data = np.swapaxes(in_data, 1, 3)
            if device.type == 'cuda':
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.cuda.FloatTensor(target_data.reshape((target_data.shape[0], 1)))
            else:
                in_data = torch.cuda.FloatTensor(in_data)
                target_data = np.array([float(i) for i in d[1]])
                target_data = torch.Tensor(target_data.reshape((target_data.shape[0], 1)))
            outputs = RegNet.forward(in_data)
            loss = lossMSE(outputs, target_data)
            test_losses.append(loss.cpu().item())
    
        test_losses = np.mean(test_losses)
        test_err_list.append(test_losses)
        print('Loss_test: {}'.format(test_losses))

    print('Finished Training')
    return RegNet, test_err_list[-1]

In [22]:
mixNet, mixErr = learning_process_with_mixup(params, data_train, data_val)

Epoch number 0
Loss_train: 25331464.0863622
Loss_test: 5313.5547183990475
Epoch number 1
Loss_train: 6525.405307235717
Loss_test: 1958.419469306469
Epoch number 2
Loss_train: 4931.019961036046
Loss_test: 1846.9464272904397
Epoch number 3
Loss_train: 5091.423900222779
Loss_test: 1472.0443218493463
Epoch number 4
Loss_train: 3893.8353823216758
Loss_test: 2309.1153281229736
Finished Training
