In [14]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import random as rand
%matplotlib inline
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [15]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score

In [16]:
trainset = dsets.CIFAR10('./data', train=True, download=True, transform=transforms.ToTensor())
testset = dsets.CIFAR10('./data', train=False, download=True, transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [17]:
if torch.cuda.is_available():
    avDev = torch.device("cuda")
else:
    avDev = torch.device("cpu")

print(avDev)

cuda


In [18]:
torch.manual_seed(1)
np.random.seed(1)

In [19]:
len(trainset)

50000

In [20]:
# STEP 2: MAKING DATASET ITERABLE
 
batch_size = 500
n_iters = 5000
num_epochs = n_iters / (len(trainset) / batch_size)
num_epochs = int(num_epochs)
 
train_loader = torch.utils.data.DataLoader(dataset=trainset, 
                                           batch_size=batch_size, 
                                           shuffle=True)
 
test_loader = torch.utils.data.DataLoader(dataset=testset, 
                                          batch_size=len(testset), 
                                          shuffle=False)
 

In [21]:
'''
STEP 3: CREATE MODEL CLASS
'''
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim, activation_fn):
        super(LogisticRegressionModel, self).__init__()
        self.linear1 = nn.Linear(input_dim, 3000)
        self.linear1_drop = nn.Dropout(0.2)
        self.linear2 = nn.Linear(3000, 3000)
        self.linear2_drop = nn.Dropout(0.2)
        self.linear3 = nn.Linear(3000, output_dim)
        self.activation_fn = activation_fn
     
    def forward(self, x):
        out = self.activation_fn(self.linear1(x))
        layer1_out = self.linear1_drop(out)
        out = self.activation_fn(self.linear2(layer1_out))
        layer2_out = self.linear2_drop(out)
        out = self.linear3(layer2_out)
        return out
    

In [22]:
'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss().to(avDev)


In [29]:
# optimizers = [torch.optim.SGD, optimizer_Adam, optimizer_Adagrad, optimizer_Adadelta, optimizer_RMSprop]

optimizers = ['SGD','Adam','Adagrad', 'Adadelta', 'RMSprop']
activation_fns = [nn.ReLU(), nn.Tanh(), nn.Sigmoid()]

In [24]:
#L1, and L2 regularization parms

lambda1 = 0.001
lambda2 = 0.1

In [30]:
'''
STEP 7: TRAIN THE MODEL
'''
def train_model():
    for activation_fn in activation_fns:
        
        input_dim = 3*32*32
        output_dim = 10
        
        model = None
        model = LogisticRegressionModel(input_dim, output_dim, activation_fn)
        model.to(avDev)
        
        for optimizer in optimizers:
            
            print('Activation Function: {}. Optimizer: {}'.format(activation_fn,optimizer))
                  
            if optimizer == 'SGD':
                optimizer = torch.optim.SGD(model.parameters(), lr=0.03)
                
            elif optimizer == 'Adam':
                optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)
                
            elif optimizer == 'Adagrad':
                optimizer = torch.optim.Adagrad(model.parameters(), lr=0.0003)
                
            elif optimizer == 'Adadelta':
                optimizer = torch.optim.Adadelta(model.parameters(), lr=0.0003)
                
            elif optimizer == 'RMSprop':
                optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0003)

            loss_save = np.empty(n_iters-1)
            iter = 0
            for epoch in range(num_epochs):
                for i, (images, labels) in enumerate(train_loader):

                    images = images.view(-1, 3*32*32).to(avDev)
                    labels = labels.to(avDev)

                    # Clear gradients w.r.t. parameters
                    optimizer.zero_grad()


                    # Forward pass to get output/logits
                    outputs = model(images)
            #         outputs = model(images)

                    # Calculate Loss: softmax --> cross entropy loss
                    cross_entropy_loss = criterion(outputs, labels)#


                    all_linear1_params = torch.cat([x.view(-1) for x in model.linear1.parameters()])
                    all_linear2_params = torch.cat([x.view(-1) for x in model.linear2.parameters()])
                    l1_regularization = lambda1 * torch.norm(all_linear1_params, 1)
                    l2_regularization = lambda2 * torch.norm(all_linear2_params, 2)

                    loss = cross_entropy_loss
                    # loss = cross_entropy_loss + l1regularization + l2_regularization
                    loss = cross_entropy_loss + l2_regularization
                    # Getting gradients w.r.t. parameters
                    loss.backward()

                    # Updating parameters
                    optimizer.step()

                    #Save Loss    

                    loss_save[iter-1] = loss.item()
                    iter += 1

                    if iter % 200 == 0:
                        # Calculate Accuracy         
                        correct = 0
                        total = 0
                        # Iterate through test dataset
                        for images, labels in test_loader:
                            #######################
                            #  USE GPU FOR MODEL  #
                            #######################
                            images = images.view(-1, 3*32*32).to(avDev)

                            # Forward pass only to get logits/output
                            outputs = model(images)

                            # Get predictions from the maximum value
                            _, predicted = torch.max(outputs.data, 1)

                            # Total number of labels
                            total += labels.size(0)

                            #######################
                            #  USE GPU FOR MODEL  #
                            #######################
                            # Total correct predictions
                            correct += (predicted.cpu() == labels.cpu()).sum().float()

                        accuracy = 100. * correct / total

                        # Print Loss
                        print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))


In [None]:
train_model()

Activation Function: ReLU(). Optimizer: SGD
Iteration: 200. Loss: 5.036365032196045. Accuracy: 30.81999969482422
Iteration: 400. Loss: 4.907623767852783. Accuracy: 34.81999969482422
Iteration: 600. Loss: 4.817268371582031. Accuracy: 37.209999084472656
Iteration: 800. Loss: 4.669703006744385. Accuracy: 38.68000030517578
Iteration: 1000. Loss: 4.5388007164001465. Accuracy: 40.779998779296875
Iteration: 1200. Loss: 4.395656585693359. Accuracy: 42.029998779296875
Iteration: 1400. Loss: 4.325504779815674. Accuracy: 43.2599983215332
Iteration: 1600. Loss: 4.301024436950684. Accuracy: 42.119998931884766
Iteration: 1800. Loss: 4.226015567779541. Accuracy: 44.83000183105469
Iteration: 2000. Loss: 4.133245944976807. Accuracy: 44.650001525878906
Iteration: 2200. Loss: 4.029412269592285. Accuracy: 44.720001220703125
Iteration: 2400. Loss: 4.002126693725586. Accuracy: 43.97999954223633
Iteration: 2600. Loss: 3.946878433227539. Accuracy: 44.59000015258789
Iteration: 2800. Loss: 3.8098902702331543. A

Iteration: 3000. Loss: 1.5556707382202148. Accuracy: 50.150001525878906
Iteration: 3200. Loss: 1.418527603149414. Accuracy: 49.27000045776367
Iteration: 3400. Loss: 1.5757226943969727. Accuracy: 50.31999969482422
Iteration: 3600. Loss: 1.4734660387039185. Accuracy: 50.849998474121094
Iteration: 3800. Loss: 1.3816049098968506. Accuracy: 49.59000015258789
Iteration: 4000. Loss: 1.4171044826507568. Accuracy: 48.970001220703125
Iteration: 4200. Loss: 1.4374744892120361. Accuracy: 50.63999938964844
Iteration: 4400. Loss: 1.4622917175292969. Accuracy: 46.91999816894531
Iteration: 4600. Loss: 1.3940668106079102. Accuracy: 51.5
Iteration: 4800. Loss: 1.386926531791687. Accuracy: 49.36000061035156
Iteration: 5000. Loss: 1.2961565256118774. Accuracy: 51.15999984741211
Activation Function: Tanh(). Optimizer: SGD
Iteration: 200. Loss: 5.020430088043213. Accuracy: 34.77000045776367
Iteration: 400. Loss: 4.909233093261719. Accuracy: 36.0
Iteration: 600. Loss: 4.730772018432617. Accuracy: 37.72000122

In [None]:
correct = 0
total = 0
model.to(avDev)

with torch.no_grad():
    for data in train_loader:
        images, labels = data
        images = images.view(-1, 3*32*32).to(avDev)
        labels = labels.to(avDev)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the  train images: %d %%' % (
    100 * correct / total))

In [None]:
print(images.size())

In [None]:
outputs.size()

In [None]:
labels.size()

# Learning Curve

In [None]:
ax = sns.lineplot(data = loss_save)
ax.set(xlabel = "Iterations", ylabel="Loss")
plt.show()

# Confusion Matrix

In [None]:
model.to(avDev)

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.view(-1, 3*32*32).to(avDev)
        labels = labels.to(avDev)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
cm = confusion_matrix(predicted.cpu(), labels.cpu()).astype(np.int)

In [None]:
import pandas as pd
CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
plt.figure(figsize=(12,10))
df_cm = pd.DataFrame(cm,CLASSES,CLASSES)
sns.set(font_scale=1.4)
ax = sns.heatmap(df_cm,annot=True,fmt='d')
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
plt.show()