# Gradient descent with pytorch

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import math

Let us consider the rosenbrock function $f(x,y) = 100(y-x^2)^2 + (1-x)^2$ 

We will in this practical session see what automatoc differentiation is with pytorch, and how to optimize with pytorch


In [None]:
def rosenbrok(x):
    ## YOUR CODE HERE
    f = 
    return f

In [None]:
x = torch.tensor([.5,.5], requires_grad=True, dtype=torch.float64)

n_iter = 10000
lr = torch.tensor(0.001, dtype=torch.float64)


# define optimizer 
optimizer = torch.optim.SGD([x], lr=lr)

for i in range(1, n_iter):
    # zero out the gradient (by default PyTorch accumulates gradients)
    optimizer.zero_grad()
    y = rosenbrok(x)
    # compute the gradient
    y.backward(retain_graph=True)
    # apply GD step
    optimizer.step()
print("Final value: ",x)

### TODO:
try different initial conditions and learning rate
see if schema is more robust, change optimizer

# Automatic differentiation in pytorch


In [None]:
!pip install torch
import torch

In [None]:
x = torch.tensor(1., requires_grad=True)
y = torch.tensor(3., requires_grad=True)
z=x**2 + torch.log(y)

In [None]:
print("Value of z: ",z)
z.backward()

In [None]:
dx=x.grad
dy=y.grad
print("Gradient w.r.t. x : ",dx)
print("Gradient w.r.t. y : ",dy)

### TODO: plugin the computational graph studied in course, verify the manual computation

# Train classification network with pytorch

This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases. The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements included in the dataset. Several constraints were placed on the selection of these instances from a larger database. In particular, all patients here are females at least 21 years old of Pima Indian heritage. The datasets consists of several medical predictor variables and one target variable, Outcome. Predictor variables includes the number of pregnancies the patient has had, their BMI, insulin level, age, and so on.

In [None]:
import numpy as np
import pandas as pd 
#importing Libraries
import seaborn as sns
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('diabetes.csv')
df.head()

In [None]:
df.isnull().sum()

In [None]:
patient_count = df['Outcome'].value_counts()
plt.figure(figsize=(7,5))
#sns.barplot(patient_count.index, patient_count.values, alpha=0.8 , palette = 'rocket')
# graph here barplot

In [None]:
# plot the distribution of glucose features
sns.distplot(df['Glucose'])

In [None]:
# plot the distribution of body mass index
sns.distplot(df['BMI'])

In [None]:
# plot the distribution of blood pressure
sns.distplot(df['BloodPressure'])

In [None]:
# split features and outcome
X = df.drop('Outcome' , axis = 1) #independent Feature
y = df['Outcome'] #dependent Feature

In [None]:
# Use scikit learn to easily split data into two sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y , test_size =0.2,random_state=0)

In [None]:
# Creating Tensors
X_train=torch.FloatTensor(X_train.values)
X_test=torch.FloatTensor(X_test.values)
y_train=torch.LongTensor(y_train.values)
y_test=torch.LongTensor(y_test.values)

In [None]:
# get the dimension of the entry
input_dim=X_train.shape[1]
print("Number of input features:",input_dim)

In [None]:
#Creating the Model
class ANN_model(nn.Module):
    def __init__(self,input_features=input_dim,hidden1=20, hidden2=10,out_features=2):
        super().__init__()
        self.f_connected1 = nn.Linear(input_features,hidden1)
        self.f_connected2 = nn.Linear(hidden1,hidden2)
        self.out = nn.Linear(hidden2,out_features)
        
    def forward(self,x):
        x = F.relu(self.f_connected1(x))
        x = F.relu(self.f_connected2(x))
        x = self.out(x)
        return x

In [None]:
# declare an instance of the model and display parameters
#torch.manual_seed(20)
model = ANN_model()
model.parameters

In [None]:
# declare loss function to use, as well as optimizer
# Backward Propergation - loss and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)

In [None]:
epochs=5000
final_losses=[]
for i in range(epochs):
    i= i+1
    y_pred=model.forward(X_train)
    loss=loss_function(y_pred,y_train)
    final_losses.append(loss.item())
    if i % 100 == 0:
        print("Epoch number: {} and the loss : {}".format(i,loss.item()))
    optimizer.zero_grad()
    loss. # YOUR CODE HERE
    optimizer.step()

In [None]:
#plot the loss function
plt.plot(range(epochs),final_losses)
plt.ylabel('Loss')
plt.ylabel('Epochs')

In [None]:
predictions = []
with torch.no_grad():
    for i,data in enumerate(X_test):
        y_pred = # YOUR CODE HERE
        predictions.append(y_pred.argmax().item())

In [None]:
cm = confusion_matrix(y_test,predictions)
cm
sns.heatmap(cm,annot=True)
plt.xlabel('Actual')
plt.ylabel('Predicted')

### TODO:
- try to improve and play with different parameters (optimizer, hyperparameters, network structure, etc)
- train a classifier on the customer dataset (seen during practical session logistic): can we improve over logistic regression?
- train a NN for prediction, compare with results obtained during last sessions (TP-Regression)

## Learn MNIST classifier (lecun net)

In [None]:
%matplotlib inline
import numpy as np
import torch,torchvision
import matplotlib.pyplot as plt
import plotly_express as px
import matplotlib.pyplot as plt
import os
from mpl_toolkits import mplot3d

from plotly.offline import download_plotlyjs, init_notebook_mode
from plotly.offline import plot, iplot

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [None]:
class MnistModel(nn.Module):
    def __init__(self):
        super(MnistModel, self).__init__()
        # input is 28x28
        # padding=2 for same padding
        self.conv1 = nn.Conv2d(1, 32, 5, padding=2)
        # feature map size is 14*14 by pooling
        # padding=2 for same padding
        self.conv2 = nn.Conv2d(32, 64, 5, padding=2)
        # feature map size is 7*7 by pooling
        self.fc1 = nn.Linear(64*7*7, 1024)
        self.fc2 = nn.Linear(1024, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 64*7*7)   # reshape Variable
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)
    
model = MnistModel()
model

In [None]:
batch_size = 50
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

In [None]:
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
    batch_size=1000)

In [None]:
for p in model.parameters():
    print(p.size())

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
model.train()
train_loss = []
train_accu = []
i = 0
for epoch in range(1):
    for data, target in train_loader:
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        #loss = F.cross_entropy(output, target)
        loss.backward()    # calc gradients
        train_loss.append(loss.item())
        optimizer.step()   # update gradients
        prediction = output.data.max(1)[1]   # first column has actual prob.
        accuracy = prediction.eq(target.data).sum()/batch_size*100
        train_accu.append(accuracy)
        if i % 100 == 0:
            print('Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(i, loss.item(), accuracy))
        i += 1

In [None]:
plt.figure(figsize = (10,8))
plt.plot(np.arange(len(train_loss)), train_loss)

In [None]:
plt.figure(figsize = (10,8))
plt.plot(np.arange(len(train_accu)), train_accu)

In [None]:
model.eval()
correct = 0
for data, target in test_loader:
    data, target = Variable(data, volatile=True), Variable(target)
    output = model(data)
    prediction = output.data.max(1)[1]
    correct += prediction.eq(target.data).sum()

print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))