## Linear vs Non Linear

Can a Forward NN learn non-linear functions?

In [None]:
import torch
from torch.autograd import Variable
import numpy as np
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torchvision.utils import save_image
import torch.optim as optim
from torch import nn
import matplotlib.pyplot as plt
%matplotlib inline


# Decide if the system has GPUs
if torch.cuda.is_available():
    device = torch.device('cuda')
    dtype = torch.cuda.FloatTensor
else:
    device = torch.device('cpu')
    dtype = torch.FloatTensor

print(device)
print(dtype)

def save_everything(model_object,optimizer_object,filepath):
        """
        Save all the quantities related to model and optimizer
        """
        state = model_object.save_model()
        state['optimizer'] = optimizer_object.state_dict()
        torch.save(state, filepath)
        
def load_everything(model_object,optimizer_object,filepath):
        """
        Load all the quantities relateds to the object from file
        """
        state = torch.load(filepath)
        model_object.load_model(state)
        optimizer_object.load_state_dict(state['optimizer'])

def show_batch(images):
    """
    Simple function to show the images
    """
    im = torchvision.utils.make_grid(images)
    plt.imshow(np.transpose(im.numpy(), (1, 2, 0)))
    
def show_names(labels):
    labels_dict={
        0 : "Tshirt",
        1 : "Trouser",
        2 : "Pullover",
        3 : "Dress",
        4 : "Coat",
        5 : "Sandal",
        6 : "Shirt",
        7 : "Sneaker",
        8 : "Bag",
        9 : "Ankle boot"}
    names = []
    for i in labels:
        names.append(labels_dict[i.item()])
    print(names)

In [None]:
class CNNModule(nn.Module):
    def __init__(self,criterion):
        super().__init__()
        
        # Auxiliary variables
        self.loss_history = []
        self.epoch = 0
        self.training_time = 0
        self.criterion=criterion
        
        # First Convolution Layer. The new image size remains unchanged:
        # Floor[ (28 + 2*padding - kernel_size)/stride + 1 ] = Floor[28+4-5+1] = 28x28 
        self.cnn1 = nn.Conv2d(in_channels=1,out_channels=16,kernel_size=5,stride=1,padding=2)
        self.relu1=nn.ELU()
        nn.init.xavier_uniform_(self.cnn1.weight)

        # The max poll with kernel_size 2 reduces the image to size 14x14
        self.maxpool1=nn.MaxPool2d(kernel_size=2)

        # Second Convolution Layer. The new image size remains unchanged:
        # Floor[ (14 + 2*padding - kernel_size)/stride + 1 ] = Floor[14+4-5+1] = 14x14 
        self.cnn2=nn.Conv2d(in_channels=16,out_channels=32,kernel_size=5,stride=1,padding=2)
        self.relu2=nn.ELU()
        nn.init.xavier_uniform_(self.cnn2.weight)

        # The max poll with kernel_size 2 reduces the image to 7x7
        self.maxpool2=nn.MaxPool2d(kernel_size=2)

        # The last is a fully connected layer with 7x7xn_channel -> 10
        self.fcl=nn.Linear(32*7*7,10)
    
    def forward(self,x):
        #First convolution
        out=self.cnn1(x)
        out=self.relu1(out)
        # Max pool
        out=self.maxpool1(out)
        # Second convolution
        out=self.cnn2(out)
        out=self.relu2(out)
        # Second convolution
        out=self.maxpool2(out)
        # Flattening
        out=out.view(out.size(0),-1)
        # Dense lyer
        out=self.fcl(out)
        return out
    
    def save_model(self):
        state={
            'state_dict': self.state_dict(),
            'loss_history': self.loss_history,
            'epoch': self.epoch,
            'training_time' : self.training_time
        }
        return state
            
    def load_model(self,state):
        self.load_state_dict(state['state_dict'])
        self.loss_history = state['loss_history']
        self.epoch = state['epoch']
        self.training_time = state['training_time']
    
    def compute_loss(self, output, target):
        loss = self.criterion(output,target)
        return loss
        
    
    def train_one_epoch(self,trainloader):
        
        tmp = [] 
        for i, data in enumerate(trainloader, 0): #loop over minibatches
            
            img, labels  = data
            images_in = Variable(img.to(device),requires_grad=False) #put the data on GPU if necessary
            
            # Run the VAE forward 
            output = self.forward(images_in)
            #print("xout",x_output.dtype)
            #print("xout",x_output.device)
            
            # Compute the loss
            loss = self.compute_loss(output,labels)
            #print("loss",loss.dtype)
            #print("loss",loss.device)
            
            # compute the average loss in this epoch
            tmp.append(loss.item()) # add the loss to the tmp list
            
            # For each minibatch set the gradient to zero
            optimizer.zero_grad()
            loss.backward()  # do backprop and compute all the gradients
            optimizer.step() # update the parameters
            
        # Svae the average loss during the epoch and the final value at the end of epoch
        self.loss_history.append(np.mean(tmp))
        self.epoch += 1
        
        
    def train(self,trainloader,DELTA_EPOCHS):
        import timeit
        tic=timeit.default_timer()
        
        for nn in range(DELTA_EPOCHS):
            self.train_one_epoch(trainloader)
            print("EPOCH loss=",self.epoch,self.loss_history[-1])
        
        toc=timeit.default_timer()
        self.training_time += toc - tic

   

In [None]:
if __name__ == '__main__':
    
   
    BATCH_SIZE = 64

    transform = transforms.ToTensor()
    #transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])

    # Load and transform data
    trainset = torchvision.datasets.FashionMNIST('./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)

    testset = torchvision.datasets.FashionMNIST('./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
    
    # Visualize the data and extract the image size
    images, labels =next(iter(trainloader))
    _, n_channel, height, width = images.size()
    print(n_channel,height, width)
   
    # Save the same set of test images for testing   
    images_test_in, labels_test  = next(iter(testloader))
    
    # Look at the data
    show_batch(images_test_in)
    show_names(labels_test)
    
    # Instanciates the CNN
    criterion=nn.CrossEntropyLoss()
    cnn = CNNModule(criterion).to(device) #note that here I am creating the model to either gpu or cpu
    
    # Select the optimizer
    #optimizer = optim.Adam(vae.parameters(), lr=0.0001)
    #optimizer = optim.RMSprop(vae.parameters())
    optimizer=torch.optim.SGD(cnn.parameters(),lr=0.015)

    for k in range(0,11):
        if(k>0):
            load_everything(cnn,optimizer,'./cnn_'+str(k-1)+'.pth')
        
        cnn.train(trainloader,10)  # train the model for 10 EPOCHS 
        save_everything(cnn,optimizer,'./cnn_'+str(k)+'.pth')

        # Evaluate the accuracy on the test set
        with torch.no_grad():
            correct=0
            total=0
            for i, data in enumerate(testloader, 0):
                img,labels = data
                images_in = Variable(img.to(device),requires_grad=False) 
                output = cnn(images_in)              
                _,predicted=torch.max(output.data,1)
                total+=labels.size(0)
                correct+=(predicted==labels).sum()
            accuracy= (100.0* correct)/(total)
            print("Iteration:"+str(iter)+"  Loss:"+str(loss)+"  Accuracy:"+str(accuracy))