In [1]:
import numpy as np
import pandas as pd
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.autograd import Function
from torch.nn import Parameter
from torchvision import datasets,transforms
import torch.optim as optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from collections import OrderedDict 
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import time

In [2]:
def softplus_activation(Z):
    """
    computes the softplus_activation of the scalar or vector Z
    
    arguments:
    Z --- a scalar or vector of any size
    
    returns:
    sofplus --- softplus of Z
    """
    softplus=torch.log(1+torch.exp(Z))
    
    return softplus

class Softplus(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,Z):
        return softplus_activation(Z)

In [3]:
def tanh_activation(Z):
    """
    computes the tanh_activation of the scalar or vector Z
    
    arguments:
    Z --- a scalar or vector of any size
    
    returns:
    tanh --- tanh of Z
    """
    tanh=torch.tanh(Z)
    
    return tanh

class Tanh(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,Z):
        return tanh_activation(Z)

In [4]:
def swish_activation(Z):
    """
    computes the swish_activation of the scalar or vector Z
    
    arguments:
    Z --- a scalar or vector of any size
    
    returns:
    swish --- swish of Z
    """
    return Z*torch.sigmoid(Z)

class Swish(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,Z):
        return swish_activation(Z)
    

In [5]:
def mish_activation(Z):
    """
    computes the mish_activation of the scalar or vector Z
    
    arguments:
    Z --- a scalar or vector of any size
    
    returns:
    mish --- mish of Z
    """
    mish=Z*(torch.tanh(torch.log(1+torch.exp(Z))))
    
    return mish

class Mish(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,Z):
        return mish_activation(Z)


In [6]:
train=datasets.MNIST("",train=True,download=True, transform=transforms.Compose([transforms.ToTensor()]))
test=datasets.MNIST("",train=False,download=True,transform=transforms.Compose([transforms.ToTensor()]))
testset=torch.utils.data.DataLoader(test,batch_size=256, shuffle=True)
train_size = int(0.9 * len(train))
val_size = len(train) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train, [train_size, val_size])
train_dataset=torch.utils.data.DataLoader(train_dataset,batch_size=256,shuffle=True)
val_dataset=torch.utils.data.DataLoader(val_dataset,batch_size=256,shuffle=True)

In [13]:
class net(nn.Module):
    def __init__(self,activation):
        super().__init__()
        #self.activation=activation()
        self.fc1=nn.Linear(256,256)
        self.fc2=nn.Linear(256,128)
        self.fc3=nn.Linear(128,10)
        self.conv1=nn.Conv2d(1,32,kernel_size=2)
        self.conv2=nn.Conv2d(32,64,kernel_size=2)
        self.conv3=nn.Conv2d(64,128,kernel_size=2)
        self.conv4=nn.Conv2d(128,256,kernel_size=2)

    def forward(self,x,activation):
        
        #print(x.size())
        
        x=F.max_pool2d(activation(self.conv1(x)),(2,2))
        #print(x.size())
        
        x=F.max_pool2d(activation(self.conv2(x)),(2,2))
        #print(x.size())
        x=F.max_pool2d(activation(self.conv3(x)),(2,2))
        
        x=F.max_pool2d(activation(self.conv4(x)),(1,1))
        #print(x.size())
        x=x.view(x.size(0),-1)
        #print(x.size())
        x=activation(self.fc1(x))
        #print(x.size())
        x=activation(self.fc2(x))
        #print(x.size())
        x=self.fc3(x)
        #print(x.size())
        return x       
        
Net=net(activation)
print(Net)
    

net(
  (fc1): Linear(in_features=256, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
  (conv1): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(2, 2), stride=(1, 1))
)


In [14]:
optimizer =optim.Adam(Net.parameters())
loss_function=nn.CrossEntropyLoss()

In [15]:

activation_functions={ 'softplus':Softplus() , 'tanh':Tanh(),
                     'swish':Swish() , 'mish':Mish()}

for activation in activation_functions:
    print('\n',activation,'\n')
    #training
    epochs=5
    for epoch in range(epochs):
        start=time.time()
        loss_final=0
        loss_train=0
        for data_train in train_dataset:
        
            x,y=data_train
            #print(y)
        
            output=Net(x,activation_functions[activation])
            #print(output)
            Net.zero_grad()
            loss=loss_function(output,y)
            loss.backward()
            optimizer.step()
            loss_train+=loss.item()
        end=time.time()
        loss_final=loss_train/len(train_dataset)
        
        print("epoch:", {epoch} , "loss:" ,{loss_final},'time:' ,{end-start} ,'\n')
        
        #validation
        
    correct = 0
    total = 0
    total_val_loss=0
    with torch.no_grad():
         for data_val in val_dataset:
            image,label=data_val
            real_class = torch.argmax(image)
            net_out = Net(image,activation_functions[activation])  
            val_loss=loss_function(net_out,label)
            total_val_loss+=val_loss.item()
            for i,p in enumerate(net_out):
                if label[i]==torch.max(p,0)[1]:  # torch.max(p,0) calculates the maximum value along with its indices and returms a 2d tensor and by using [1] we take its indices
                    correct+=1
                    total+=1
    print("validation loss:",{total_val_loss/len(val_dataset)},"Accuracy: ", round(correct/total, 3),'\n')
    
    #Accuracy is in ratio and not percentage


 softplus 

epoch: {0} loss: {1.6385494544042796} time: {171.85923409461975} 

epoch: {1} loss: {0.3513472583056626} time: {173.3243043422699} 

epoch: {2} loss: {0.19736817927580874} time: {175.6757025718689} 

epoch: {3} loss: {0.13560946096811816} time: {175.922602891922} 

epoch: {4} loss: {0.09753877623668779} time: {175.08676838874817} 

validation loss: {0.11911824407676856} Accuracy:  1.0 


 tanh 

epoch: {0} loss: {0.2126905798770805} time: {159.35404706001282} 

epoch: {1} loss: {0.07592511978618341} time: {158.2499134540558} 

epoch: {2} loss: {0.054935668325890294} time: {157.91201257705688} 

epoch: {3} loss: {0.043647996714930117} time: {158.48081755638123} 

epoch: {4} loss: {0.03278982912199086} time: {160.1762056350708} 

validation loss: {0.07177762365123878} Accuracy:  1.0 


 swish 

epoch: {0} loss: {0.1456856356218669} time: {208.79032611846924} 

epoch: {1} loss: {0.045578881678882084} time: {206.36493635177612} 

epoch: {2} loss: {0.03239683791447731} time: {2