# Deep Learning
## Practical Deep Learning Tutorial with PyTorch - Tutorial N° 3

### 2020-2021



# Importing necessary libraries

In [1]:
import torch
from torch.autograd import grad
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_blobs

# Adaline

1. Built ADALINE model using the nn.Module class 

In [2]:
class Adaline(torch.nn.Module):
    def __init__(self, num_features):
        super(Adaline, self).__init__()
        self.linear =                                   # une transformation lineare de la forme X.(W.transposé)
                                                        #X matrice de donnes qui a (num_features) variables
                                                        # chaque donne sera multiplie par un vecteur de poids 
                                                        # de taille (1,num_features), ca donne un veteur de taille
                                                        # (num_features,1)
        self.linear.weight.detach().zero_()  # changer les poids aleatoire à zero (pour l'initialisation)
        self.linear.bias.detach().zero_()  #meme chose pour le bias
    def forward(self, x):
        activations = self.linear(x)
        return activations.view(-1)

2. Using 'iris.txt', create a binary datasets in 2-D : The last 100 instances of iris described only by the 2nd and 3rd features
    
    Split the dataset into traing and test sets (70%,30%) 

    Normalize the dataset

In [3]:
import pandas as pd
df = pd.read_csv('iris.txt', index_col=None, header=None)    #lire le fichier iris
df.columns = ['x1', 'x2', 'x3', 'x4', 'y'] #renommer les colonnes
df = df.iloc[50:150]   #prendre que la 100 dernière données de la base, donc que les donnees des 2 dernieres classes
df['y'] = df['y'].apply(lambda x: 0 if x == 'Iris-versicolor' else 1) #coder la 2e classe par 0 la 3e par 1


# Assign features and target

X = torch.tensor(df[['x2', 'x3']].values, dtype=torch.float) #ne considerer que la 2e et 3e variables, c'est notre tensor X
y = torch.tensor(df['y'].values, dtype=torch.int) #le tensor y correeponds aux labels y

# Shuffling & train/test split

torch.manual_seed(123)
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)  #melanger les indices 
X, y = X[shuffle_idx], y[shuffle_idx]  #melanger les donnes, ca garde la corrspondance entre une donnée et son label
percent70 = int(shuffle_idx.size(0)*0.7)
X_train, X_test = X[shuffle_idx[:percent70]], X[shuffle_idx[percent70:]]  #70 premiers points pour training 
y_train, y_test = y[shuffle_idx[:percent70]], y[shuffle_idx[percent70:]]  # 30 dernieres données pour le test

# Normalize (mean zero, unit variance)

mu, sigma = X_train.mean(dim=0), X_train.std(dim=0) #normalization, soustraire la moyenne diviser par lecarte type
X_train =   
X_test = 

In [4]:
model = Adaline(num_features=X_train.size(1))
model

Adaline(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)

3. Train the model : we will use MSELoss (mean squared error (squared L2 norm)) as loss function. The optimizer is SGD (Stochastic Gradient Descent) with learning rate 0.01.

In [5]:
def train(model, x, y, num_epochs, learning_rate, seed):
    cost = []
    torch.manual_seed(seed)
    optimizer =  #use a SGD optimizer
    for e in range(num_epochs):
        yhat =                   #calcul yhat
        loss =                   #calcul the loss function using MSE
        optimizer.zero_grad()    # set the gradients to zero
                                 # calculer le gradients
                                 # mise a jour des poids ####

In [6]:
train(model, X_train, y_train.float(),num_epochs=142,learning_rate=0.01,seed=123)

4. Compute the model accuracy 

In [7]:
def custom_where(cond, x_1, x_2):
    return (cond * x_1) + (torch.logical_not(cond) * x_2)
train_pred = model.forward(X_train)
train_acc = torch.mean(
    (custom_where(train_pred > 0.5, 1, 0).int() == y_train).float())
test_pred = model.forward(X_test)
test_acc = torch.mean((custom_where(test_pred > 0.5, 1, 0).int() == y_test).float())
print('Training Accuracy: %.2f' % (train_acc*100))
print('Test Accuracy: %.2f' % (test_acc*100))
print('Weights', model.linear.weight)
print('Bias', model.linear.bias)

Training Accuracy: 92.86
Test Accuracy: 93.33
Weights Parameter containing:
tensor([[-0.0195,  0.3655]], requires_grad=True)
Bias Parameter containing:
tensor([0.4581], requires_grad=True)


# Perceptron

5. Built a Perceptron model using nn.Module class

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Perceptron():
    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = torch.zeros(num_features, 1, 
                                   dtype=torch.float32, device=device)
        self.bias = torch.zeros(1, dtype=torch.float32, device=device)
        self.ones = torch.ones(1)
        self.zeros = torch.zeros(1)

    def forward(self, x):
        
        return predictions
        
    def backward(self, x, y):  

        return errors
        
    def train(self, x, y, epochs):
        for e in range(epochs):
            for i in range(y.shape[0]):
                errors = 
                self.weights = 
                self.bias = 
                
    def evaluate(self, x, y):
        predictions = self.forward(x).reshape(-1)
        accuracy = torch.sum(predictions == y).float() / y.shape[0]
        return accuracy

6. Load the 'perceptron_toydata' dataset

    Split the dataset into train and test sets
    
    Normalize the data

In [1]:
df = pd.read_csv('perceptron_toydata.txt', index_col=None, header=None , delimiter='\t')
df.columns = ['x1', 'x2', 'y']
X = torch.tensor(df[['x1', 'x2']].values, dtype=torch.float) 
y = torch.tensor(df['y'].values, dtype=torch.int) 
print('Class label counts:', torch.bincount(y))
print('X.shape:', X.shape)
print('y.shape:', y.shape)

# Shuffling & train/test split
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)
X, y = X[shuffle_idx], y[shuffle_idx]
percent70 = int(shuffle_idx.size(0)*0.7)
X_train, X_test = X[shuffle_idx[:percent70]], X[shuffle_idx[percent70:]]
y_train, y_test = y[shuffle_idx[:percent70]], y[shuffle_idx[percent70:]]
# Normalize (mean zero, unit variance)
mu, sigma = X_train.mean(axis=0), X_train.std(axis=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

NameError: name 'pd' is not defined

7. Train the perceptron

In [10]:
model = Perceptron(num_features=2)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device)

model.train(X_train_tensor, y_train_tensor, epochs=5)

print('Model parameters:')
print('  Weights: %s' % model.weights)
print('  Bias: %s' % model.bias)

Model parameters:
  Weights: tensor([[2.4768],
        [0.8986]])
  Bias: tensor([-1.])


  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


8. evaluate the model (accuracy)

In [11]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device)

test_acc = model.evaluate(X_test_tensor, y_test_tensor)
print('Test set accuracy: %.2f%%' % (test_acc*100))

Test set accuracy: 100.00%


  """Entry point for launching an IPython kernel.
  


# Multi Layer Perceptron

Unlike the single-layer perceptron, the Multi Layer Perceptron models have hidden layers
between the input and the output layers. After every hidden layer, an activation function 
is applied to introduce non-linearity. 

9. Built a simple Multi Layer Perceptron model withe one hidden layer. 
After the hidden layer, we will use ReLU as activation before the information is sent to the output layer.
As an output activation function, we will use Sigmoid. 

In [12]:
class MultilayerPerceptron(torch.nn.Module):
    def __init__(self, num_features,num_hidden_1):
        super(MultilayerPerceptron, self).__init__()

        
        
        
        
        
        

    def forward(self, x):

        
        
        
        
        
        
        
        return out

10. Create a random datasets and assign binary labels {0,1}

In [2]:
def blob_label(y, label, loc): # assign labels
    target = np.copy(y)
    for l in loc:
        target[y == l] = label
    return target
x_train, y_train = make_blobs(n_samples=40, n_features=2, cluster_std=1.5, shuffle=True)
x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(blob_label(y_train, 0, [0]))
y_train = torch.FloatTensor(blob_label(y_train, 1, [1,2,3]))
x_test, y_test = make_blobs(n_samples=10, n_features=2, cluster_std=1.5, shuffle=True)
x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(blob_label(y_test, 0, [0]))
y_test = torch.FloatTensor(blob_label(y_test, 1, [1,2,3]))
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

NameError: name 'make_blobs' is not defined

11. Define the model with input dimension 2 and hidden dimension 10. 
Since the task is to classify binary labels, we can use as criterion BCELoss (Binary Cross Entropy Loss) : loss function.
The optimizer is SGD (Stochastic Gradient Descent) with learning rate 0.01.

In [14]:
model = MultilayerPerceptron(2, 10)
criterion = 
optimizer = 

12. Check the test loss before the model training and compare it with the test loss after the training.

In [15]:
model.eval()
y_pred = model(x_test)
before_train = criterion(y_pred.squeeze(), y_test)
print('Test loss before training' , before_train.item())

Test loss before training 0.7917705774307251




In [3]:
model.train()
epoch = 20
for epoch in range(epoch):
    optimizer.zero_grad()
    y_pred = 
    loss = 
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward()
    optimizer.step()

SyntaxError: invalid syntax (<ipython-input-3-1d4a560dc198>, line 5)

In [17]:
model.eval()
y_pred = model(x_test)
after_train = criterion(y_pred.squeeze(), y_test) 
print('Test loss after Training' , after_train.item())

Test loss after Training 0.710935115814209


13. In order to improve the model, you can try out different parameter values for your
hyperparameters(ie. hidden dimension size, epoch size, learning rates). You can also 
try changing the structure of your model (ie. adding more hidden layers) to see if your
mode improves. 