<a href="https://colab.research.google.com/github/itissandeep98/ML-Assignments/blob/master/Assignment3/ML_Assignment3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
import pickle
import random
import seaborn as sns
from PIL import Image
from copy import deepcopy
from sklearn.manifold import TSNE
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F

# Pre Processing

In [None]:
class MyPreProcessor():
  """
  My steps for pre-processing for the All datasets.
  """

  def __init__(self):
    pass

  def pre_process(self, dataset):
    """
    Reading the file and preprocessing the input and output.
    Note that you will encode any string value and/or remove empty entries in this function only.
    Further any pre processing steps have to be performed in this function too. 

    Parameters
    ----------

    dataset : integer with acceptable values 0, 1, or 2
    0 ->  Dataset
    1 ->  Dataset
    2 ->  Dataset

    Returns
    -------
    X : 2-dimensional numpy array of shape (n_samples, n_features)
    y : 1-dimensional numpy array of shape (n_samples,)
    """
    scaler = StandardScaler()
    if dataset == 0:
      # df=pd.read_csv("/content/sample_data/mnist_train_small.csv",header=None)
      
      df=pd.read_csv("/content/drive/MyDrive/ML_Assignment3/mnist_train.csv.zip")
      X=df.iloc[:,1:]
      y=df.iloc[:,0]
      b = np.zeros((y.size, y.max()+1))
      b[np.arange(y.size),y] = 1
      y=b
           
    
    elif dataset == 1:
      df=pd.read_csv("/content/drive/MyDrive/ML_Assignment3/largeTrain.csv",header=None)
      X=df.iloc[:,1:].to_numpy()
      y=df[0].to_numpy()

    elif dataset == 2:
      df=pd.read_csv("/content/drive/MyDrive/ML_Assignment3/largeValidation.csv",header=None)
      X=df.iloc[:,1:].to_numpy()
      y=df[0].to_numpy()
    
    elif dataset == 3:
      df= pickle.load(open("/content/drive/MyDrive/ML_Assignment3/train_CIFAR.pickle","rb"))
      X=df['X']
      y=df['Y']
    elif dataset == 4:
      df= pickle.load(open("/content/drive/MyDrive/ML_Assignment3/test_CIFAR.pickle","rb"))
      X=df['X']
      y=df['Y']
    return X, y

preprocessor = MyPreProcessor()


#My Neural Network

In [None]:
class MyNeuralNetwork():
    """
    My implementation of a Neural Network Classifier.
    """

    acti_fns = ['relu', 'sigmoid', 'linear', 'tanh']
    weight_inits = ['zero', 'random', 'normal']

    def __init__(self, n_layers, layer_sizes, activation, learning_rate, weight_init, batch_size, num_epochs):
        """
        Initializing a new MyNeuralNetwork object

        Parameters
        ----------
        n_layers : int value specifying the number of layers

        layer_sizes : integer array of size n_layers specifying the number of nodes in each layer

        activation : string specifying the activation function to be used
                     possible inputs: relu, sigmoid, linear, tanh

        learning_rate : float value specifying the learning rate to be used

        weight_init : string specifying the weight initialization function to be used
                      possible inputs: zero, random, normal

        batch_size : int value specifying the batch size to be used

        num_epochs : int value specifying the number of epochs to be used
        """

        if activation not in self.acti_fns:
            raise Exception('Incorrect Activation Function')

        if weight_init not in self.weight_inits:
            raise Exception('Incorrect Weight Initialization Function')
        
        # np.random.seed(10)
        self.n_layers=n_layers
        self.layer_sizes=layer_sizes 
        self.activation=activation 
        self.learning_rate=learning_rate 
        self.weight_init=weight_init
        self.batch_size=batch_size
        self.num_epochs=num_epochs
        
        weights={}
        bias={}
        for i in range(self.n_layers-1):
          weights[i]=np.array(self.weight_func((self.layer_sizes[i],self.layer_sizes[i+1])))
          bias[i]=np.zeros(self.layer_sizes[i+1])

        self.weights=weights
        self.bias=bias
        

    def activation_func(self,X):
      """
      Calculating the activation for a particular layer

      Parameters
      ----------
      X : 1-dimentional numpy array 

      Returns
      -------
      x_calc : 1-dimensional numpy array after calculating the necessary function over X
      x_derv : 1-dimensional numpy array after calculating the specified derivat function over X
      """
      if self.activation=="relu":
        return self.relu(X),self.relu_grad(X)
      elif self.activation=="sigmoid":
        return self.sigmoid(X),self.sigmoid_grad(X)
      elif self.activation=="linear":
        return self.linear(X),self.linear_grad(X)
      elif self.activation=="tanh":
        return self.tanh(X),self.tanh_grad(X)
      else:
        return self.softmax(X),self.softmax_grad(X)

    def relu(self, X):
      """
      Calculating the ReLU activation for a particular layer

      Parameters
      ----------
      X : 1-dimentional numpy array 

      Returns
      -------
      x_calc : 1-dimensional numpy array after calculating the necessary function over X
      """
      return X * (X>=0)

    def relu_grad(self, X):
        """
        Calculating the gradient of ReLU activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """

        return 1*(X>=0)

    def sigmoid(self, X):
        """
        Calculating the Sigmoid activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc= 1/(1+np.exp(-X))

        return x_calc

    def sigmoid_grad(self, X):
        """
        Calculating the gradient of Sigmoid activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        sig=self.sigmoid(X)
        x_calc=sig*(1-sig)
        return x_calc

    def linear(self, X):
        """
        Calculating the Linear activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc=X
        return x_calc

    def linear_grad(self, X):
        """
        Calculating the gradient of Linear activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc=np.ones(X.shape)
        return x_calc

    def tanh(self, X):
        """
        Calculating the Tanh activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc=np.tanh(X)
        return x_calc

    def tanh_grad(self, X):
        """
        Calculating the gradient of Tanh activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        tanh=self.tanh(X)
        x_calc=1-tanh**2
        return x_calc

    def softmax(self, X):
        """
        Calculating the ReLU activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        expo = np.exp(X)
        x_calc=expo/expo.sum(axis=1, keepdims = True)
        return x_calc
    

    def weight_func(self,shape):
      if self.weight_init=="zero":
        return self.zero_init(shape)
      elif self.weight_init=="random":
        return self.random_init(shape)
      else:
        return self.normal_init(shape)

    def zero_init(self, shape):
        """
        Calculating the initial weights after Zero Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        weight= np.zeros(shape)
        return weight 

    def random_init(self, shape):
        """
        Calculating the initial weights after Random Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        weight= np.random.rand(shape[0],shape[1])*0.01
        return weight 

    def normal_init(self, shape):
        """
        Calculating the initial weights after Normal(0,1) Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        weight=np.random.normal(size=shape)
        return weight
    
    def cross_entropy(self,y_hat,y):
        samples=y.shape[0]
        error=y_hat-y
        return error

    def cross_entropy_loss(self, A, y):
        n = len(y)
        logp = - np.log(A[np.arange(n), y.argmax(axis=1)])
        loss = np.sum(logp)/n
        return loss
    

    def fit(self, X, y,X_test=None,y_test=None):
        """
        Fitting (training) the linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as training data.

        y : 1-dimensional numpy array of shape (n_samples,) which acts as training labels.
        
        Returns
        -------
        self : an instance of self
        """
        train_error=[]
        test_error=[]

        for epoch in range(self.num_epochs):
          for batch in range(0,X.shape[0],self.batch_size):
            X_sample=deepcopy(X[batch:batch+self.batch_size,:])
            y_sample=deepcopy(y[batch:batch+self.batch_size,:])
            input=deepcopy(X_sample)
            output=deepcopy(y_sample)

            activations,preactivations = self.feed_forward(input)

            dervs = self.backward_prop(output,activations,preactivations)

            # Gradient updation
            activations[-1]=X_sample
            for layer in range(self.n_layers-1):
              grad=activations[layer-1].T.dot(dervs[layer])/len(X_sample)
              self.weights[layer]=self.weights[layer]-self.learning_rate*grad
              self.bias[layer]=self.bias[layer]-self.learning_rate*np.sum(dervs[layer],axis=0)/len(X_sample)

          if((epoch+1)%5==0):
            train_cost = self.cross_entropy_loss(activations[self.n_layers-2],y_sample)
            print("epoch",epoch,"\t",train_cost)
          if(X_test is not None):
            y_test_pred=self.predict_proba(X_test)
            test_cost=self.cross_entropy_loss(y_test_pred,y_test)
            train_cost = self.cross_entropy_loss(activations[self.n_layers-2],y_sample)
            train_error.append(train_cost)
            test_error.append(test_cost)
            self.activations=activations
            self.preactivations=preactivations

        self.train_error=np.array(train_error)
        self.test_error=np.array(test_error)

        return self

    def feed_forward(self,input):
      """
      Fitting (training) the linear model.

      Parameters
      ----------
      input : 2-dimensional numpy array of shape (n_samples, n_features) which acts as training data.
      
      Returns
      -------
      activations : dictionary of value of each layer after activation
      preactivations : dictionary of value of each layer before activation
      """
      preactivations={}
      activations={}
      for layer in range(self.n_layers-2):
        hidden_output=input.dot(self.weights[layer])+self.bias[layer]
        hidden_output_A,_=self.activation_func(hidden_output)          
        input=hidden_output_A 
        preactivations[layer]=hidden_output  
        activations[layer]=hidden_output_A

      hidden_output=input.dot(self.weights[self.n_layers-2])+self.bias[self.n_layers-2]      
      preactivations[self.n_layers-2]=hidden_output 
      activations[self.n_layers-2]=self.softmax(hidden_output)
      return activations,preactivations

    def backward_prop(self,y,activations,preactivations):
      """
      Fitting (training) the linear model.

      Parameters
      ----------
      activations : dictionary of value of each layer after activation

      preactivations : dictionary of value of each layer before activation
      
      Returns
      -------
      dervs: gradients that will be used to update weights and biases
      """
      dervs={}
      y_pred=activations[self.n_layers-2]
      delta=y_pred-y
      dervs[self.n_layers-2]=delta
      for layer in range(self.n_layers-3,-1,-1):
        error=delta.dot(self.weights[layer+1].T)
        _,derv=self.activation_func(preactivations[layer])
        delta=error*derv

        dervs[layer]=delta

      return dervs

    def predict_proba(self, X):
        """
        Predicting probabilities using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        Returns
        -------
        y : 2-dimensional numpy array of shape (n_samples, n_classes) which contains the 
            class wise prediction probabilities.
        """
        y,_=self.feed_forward(X)

        # return the numpy array y which contains the probability of predicted values
        return y[self.n_layers-2]

    def predict(self, X):
        """
        Predicting values using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        Returns
        -------
        y : 1-dimensional numpy array of shape (n_samples,) which contains the predicted values.
        """
        y=self.predict_proba(X)

        # return the numpy array y which contains the predicted values
        return y.argmax(axis=1)

    def score(self, X, y):
        """
        Predicting values using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        y : 1-dimensional numpy array of shape (n_samples,) which acts as testing labels.

        Returns
        -------
        acc : float value specifying the accuracy of the model on the provided testing set
        """

        y_pred=self.predict(X)
        y=y.argmax(axis=1)
        acc=metrics.accuracy_score(y,y_pred)
        return acc
    
    def asses(self):
      plt.plot(range(self.num_epochs),self.train_error,label="Training error")
      plt.plot(range(self.num_epochs),self.test_error,label="Testing error")
      plt.legend()
      plt.xlabel("Epochs")
      plt.ylabel("Error")



## Testing

In [None]:
scaler = StandardScaler()
X, y = preprocessor.pre_process(0)
print(X.shape,y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.10)
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
classifier=MyNeuralNetwork(5,[784,256, 128, 64,10],'relu',0.1,'random',3000,100)
classifier.fit(X_train,y_train,X_test,y_test)
classifier.asses()

In [None]:
act,pre=classifier.feed_forward(X_train)
a=act[2]
a.shape

In [None]:
tsne = TSNE(n_components=2, verbose=2, n_iter=1000)
tsne_results = tsne.fit_transform(a)

plt.figure(figsize=(16,10))


In [None]:
sns.scatterplot(
  x=tsne_results[:,0], y=tsne_results[:,1],
  hue=y_train.argmax(axis=1),
  palette=sns.color_palette("hls", 10),
  legend="full"
)

In [None]:
pickle.dump(classifier,open("linear_random.pkl","wb"))

## sklearn

In [None]:
# clf = MLPClassifier(activation="identity", hidden_layer_sizes=(256, 128, 64),learning_rate_init=0.1,batch_size=3000, max_iter=100)
clf = MLPClassifier(solver='lbfgs', alpha=0.1, hidden_layer_sizes=(256,128,64), random_state=1, activation = 'logistic')
clf.fit(X_train,y_train.argmax(axis=1))

In [None]:
clf.score(X_test,y_test.argmax(axis=1))

# Q3

In [None]:
X_train,y_train= preprocessor.pre_process(1)
X_val,y_val= preprocessor.pre_process(2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
X_train.shape,y_train.shape,X_val.shape,y_val.shape

In [None]:
class MLP(nn.Module):
  def __init__(self, input_dim, output_dim, hidden_dim):
    super(MLP,self).__init__()
    self.input_fc = nn.Linear(input_dim, hidden_dim)
    self.output_fc = nn.Linear(hidden_dim, output_dim)
        
  def forward(self, x):
    h_1 = F.relu(self.input_fc(x))
    y_pred = self.output_fc(h_1)
    return y_pred

In [None]:
class MyDataset(data.Dataset):
  def __init__(self,X,y):
    self.X=X
    self.y=y

  def __len__(self):
    return self.X.shape[0]

  def __getitem__(self,i):
    return self.X[i],self.y[i]

In [None]:
def train(model, train_iterator,val_iterator, optimizer, criterion, device,epochs,flag=True):  

    ce_loss=[]
    val_loss=[]
    for epoch in range(epochs):
      epoch_loss = 0
      epoch_loss_val=0
      for train,valid in zip(train_iterator,val_iterator):
          # training
          x,y = train
          x = x.to(device)
          y = y.to(device)

          optimizer.zero_grad()
          y_pred = model(x.float())
          loss = criterion(y_pred, y)
          loss.backward()
          optimizer.step()
          epoch_loss += loss.item()

          #validation
          x_val,y_val = valid
          x_val = x_val.to(device)
          y_val = y_val.to(device)          
          y_val_pred= model(x_val.float())
          loss= criterion(y_val_pred, y_val)
          epoch_loss_val += loss.item()
      
      ce_loss.append(epoch_loss/ len(train_iterator))
      val_loss.append(epoch_loss_val/ len(val_iterator))

      if((epoch+1)%100==0):
        print("epoch:",epoch+1,"\t",epoch_loss/ len(train_iterator),epoch_loss_val/ len(val_iterator) )
    if(not flag):
      return ce_loss,val_loss
    return np.mean(ce_loss),np.mean(val_loss)

## 1)

### a)

In [None]:
hidden_units=[5, 20, 50, 100 ,200]
input_size=128
output_size=10

ce_loss=[]
val_loss=[]

criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

train_data=MyDataset(X_train,y_train)
val_data=MyDataset(X_val,y_val)

for h_unit in hidden_units:
  print(h_unit)
  train_iterator = data.DataLoader(train_data, shuffle = True, batch_size = 1024)
  val_iterator = data.DataLoader(val_data, batch_size = 126)
  model = nn.Sequential(nn.Linear(input_size, h_unit),
                        nn.ReLU(),
                        nn.Linear(h_unit, output_size),
                        nn.Softmax(dim=1))
  # model=MLP(input_size,output_size,h_unit)
  optimizer = optim.Adam(model.parameters(),lr=0.01)

  t_loss,v_loss=train(model,
                      train_iterator,
                      val_iterator,
                      optimizer,
                      criterion,
                      device,
                      500)
  ce_loss.append(t_loss)
  val_loss.append(v_loss)

plt.plot(hidden_units,ce_loss,label="Average Training Loss")
plt.plot(hidden_units,val_loss,label="Average Validation Loss")
plt.ylabel("Cross Entropy Loss")
plt.xlabel("Number of Hidden Units")
plt.legend()

## 2)

### a)

In [None]:
learning_rates=[0.1, 0.01, 0.001]
input_size=128
hidden_size=4
output_size=10

criterion = nn.CrossEntropyLoss()

train_data=MyDataset(X_train,y_train)
val_data=MyDataset(X_val,y_val)

train_iterator = data.DataLoader(train_data, shuffle = True, batch_size = 1024)
val_iterator = data.DataLoader(val_data, shuffle = True, batch_size = 126)

model = nn.Sequential(nn.Linear(input_size, hidden_size),
                        nn.ReLU(),
                        nn.Linear(hidden_size, output_size),
                        nn.Softmax(dim=1))
for lr in learning_rates:
  print(lr)
  optimizer = optim.Adam(model.parameters(),lr=lr)
  t_loss,v_loss=train(model,
                      train_iterator,
                      val_iterator,
                      optimizer,
                      criterion,
                      device,
                      100,
                      False)
  
  plt.figure()
  plt.title("Learning Rate: "+str(lr))
  plt.plot(range(100),t_loss,label="Average Training Loss")
  # plt.plot(range(100),v_loss,label="Average Validation Loss")
  plt.ylabel("Cross Entropy Loss")
  plt.xlabel("Epochs")
  plt.legend()
  plt.show()

# Q4

In [None]:
X_train,y_train=preprocessor.pre_process(3)
X_test,y_test=preprocessor.pre_process(4)

X_train.shape,X_test.shape,y_train.shape

## 1) EDA

In [None]:
df=pd.DataFrame(X_train)
class_names=["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]

In [None]:
for i in range(16)
  plt.subplot(2,8,i+1)
  data = X_train[i, :] 
  data = np.reshape(data, (32,32,3), order='F' ) 
  plt.imshow(data)

In [None]:
def display_color_hists(images, labels, indices, class_names=class_names):
    fig = plt.figure(figsize=(10,6))
    n = 0
    for i in indices:
        plt.subplot(2,3,n+1)
        plt.hist(images[i][:1024],color = "red")
        plt.title(class_names[labels[i]])
        n += 1
        
        plt.subplot(2,3,n+1)
        plt.hist(images[i][1024:2048],color = "green")
        plt.title(class_names[labels[i]])
        n += 1
        
        plt.subplot(2,3,n+1)
        plt.hist(images[i][2048:],color = "skyblue")
        plt.title(class_names[labels[i]])
        n += 1
    plt.show()
display_color_hists(X_train, y_train,[0,2] )

In [None]:
np.unique(y_train, return_counts=True)

## 2) AlexNet

In [None]:
class MyDataset1(data.Dataset): 
    def __init__(self, data, label, transform=None):
        self.data = data
        self.label = label
        self.transform = transform
        self.img_shape = data.shape
        
    def __getitem__(self, index): 
        img_reshaped = np.transpose(np.reshape(self.data[index],(3, 32,32)))
  
        img = Image.fromarray(img_reshaped)
        label = self.label[index]
        if self.transform is not None:
            img = self.transform(img)
        else:
            img_to_tensor = transforms.ToTensor()
            img = img_to_tensor(img)
        return img, label
        
    def __len__(self):
        return self.data.shape[0]

In [None]:
alexnet = models.alexnet(pretrained=True)
alexnet.eval()
alexnet

In [None]:
train_transform_aug = transforms.Compose([
    transforms.Resize((40, 40)),       
    transforms.ToTensor(),
    transforms.Pad(16),
    transforms.Normalize( mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])]
  )
train_data=MyDataset1(X_train,y_train,train_transform_aug)

train_loader = data.DataLoader(dataset=train_data,
                          batch_size=X_train.shape[0], 
                          shuffle=True)

test_data=MyDataset1(X_test,y_test,train_transform_aug)

test_loader = data.DataLoader(dataset=test_data,
                          batch_size=X_test.shape[0], 
                          shuffle=True)

len(train_loader)

In [None]:
for x,y in train_loader:
  output=alexnet(x)
  print(output.size())

for x,y in test_loader:
  output_test=alexnet(x)
  print(output_test.size())

In [None]:
X_new=output.detach().numpy()
X_test_new=output_test.detach().numpy()

## 3)

In [None]:
class MyDataset2(data.Dataset):

  def __init__(self,X,y):
    self.X=X
    self.y=y

  def __len__(self):
    return self.X.shape[0]

  def __getitem__(self,i):
    return self.X[i],self.y[i]

In [None]:
def train(model, iterator, optimizer, criterion, device,epochs):  


    for epoch in range(epochs):
      epoch_loss = 0
      for x,y in iterator:
         
          x = x.to(device)
          y = y.to(device)

          optimizer.zero_grad()
          y_pred = model(x.float())

          model.zero_grad()

          loss = criterion(y_pred, y)
          loss.backward()

          optimizer.step()
          epoch_loss += loss.item()
      print(epoch_loss/len(iterator))
      


In [None]:
model = nn.Sequential(
          torch.nn.Linear(1000, 512),
          torch.nn.ReLU(),
          torch.nn.Linear(512, 256),
          torch.nn.ReLU(),
          torch.nn.Linear(256, 2),
        ).to(device)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = optim.Adam(model.parameters(),lr=0.1)
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
train_data=MyDataset2(X_new,y_train)
train_iterator = data.DataLoader(train_data, shuffle = True, batch_size = 1024)

train(model,train_iterator,optimizer,criterion,device,50)

## 4)

In [None]:
test_data=MyDataset2(X_test_new,y_test)
test_iterator = data.DataLoader(test_data, shuffle = True, batch_size = 2000)

In [None]:
model.eval()
for x,y in test_iterator:
  output=model(x)
  print(output.size())

In [None]:
ypred=output.detach().numpy()
ypred.shape

In [None]:
ypred

In [None]:
np.unique(y_test,return_counts=True)

### sklearn

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(512,256), max_iter=200, solver='sgd')
mlp.fit(X_new, y_train)
mlp.score(X_test_new,y_test)

In [None]:
y_pred=mlp.predict(X_test_new)
metrics.confusion_matrix(y,y_pred)

In [None]:
scores = mlp.predict_proba(X_test_new)
fpr, tpr, thresholds = metrics.roc_curve(y_test, scores[:,0], pos_label=2)
scores[:,0].shape

In [None]:
metrics.plot_roc_curve(mlp, X_test_new, y_test)

# Extra

In [None]:
model=pickle.load(open("/content/sigmoid_normal.pkl","rb"))
model.asses()
model.score(X_test,y_test)

In [None]:
/content/linear_random.pkl
/content/relu_random.pkl
/content/sigmoid_normal.pkl
/content/sigmoid_random.pkl
/content/tanh_normal.pkl
/content/tanh_random.pkl