 # Part I: Multi-layer Perceptron with sklearn

## 1 Learning Boolean Operators

### 1. Classifier for AND operator

In [1]:
#import classifier as classifier
import numpy as np
from sklearn.neural_network import MLPClassifier 
X = [ [0., 0.], [0., 1.], [1., 0.], [1., 1.] ] # Inputs
y = [0, 0, 0, 1] # Outputs

X_test = [[1., 1.]]

classifier = MLPClassifier(hidden_layer_sizes=(), activation="identity", solver='lbfgs')
classifier.fit(X, y)

classifier.predict(X_test)

array([1])

### 2. Classifier for OR operator

In [6]:
X = [ [0., 0.], [0., 1.], [1., 0.], [1., 1.] ] # Inputs
y = [0, 1, 1, 1] # Outputs

X_test = [[1., 0.]]

classifier = MLPClassifier(hidden_layer_sizes=(), activation="identity", solver='lbfgs')
classifier.fit(X, y)

classifier.predict(X_test)

array([1])

### 2. Classifier for XOR operator

#### (a)

In [10]:
X = [ [0., 0.], [0., 1.], [1., 0.], [1., 1.] ] # Inputs
y = [0, 1, 1, 0] # Outputs

X_test = [[1., 1.]]

classifier = MLPClassifier(hidden_layer_sizes=(), activation="identity", solver='lbfgs')
classifier.fit(X, y)

classifier.predict(X_test)

array([1])

#### (b)

In [14]:
X = [ [0., 0.], [0., 1.], [1., 0.], [1., 1.] ] # Inputs
y = [0, 1, 1, 0] # Outputs

X_test = [[1., 1.]]

classifier = MLPClassifier(hidden_layer_sizes=(4, 4), activation="identity", solver='lbfgs')
classifier.fit(X, y)

classifier.predict(X_test)

array([1])

#### (c)

In [28]:
X = [ [0., 0.], [0., 1.], [1., 0.], [1., 1.] ] # Inputs
y = [0, 1, 1, 0] # Outputs

X_test = [[1., 1.]]

classifier = MLPClassifier(hidden_layer_sizes=(4, 4),
                           activation="tanh", solver='lbfgs', max_iter=1000)
classifier.fit(X, y)

classifier.predict(X_test)


array([0])

## 2 Image Classification

In [320]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

dataset = load_digits()
X = dataset.data # inputs
y = dataset.target # Associated outputs

train_X, test_X, train_y, test_y = train_test_split(X, y,
                                                    test_size = 0.1,
                                                    random_state=3)
print(train_X.shape)

(1617, 64)


In [344]:
classifier = MLPClassifier(hidden_layer_sizes=(),
                           activation="identity",
                           solver='sgd', tol=1e-10, max_iter=1000000)
classifier.fit(train_X, train_y)
classifier.predict(test_X)

from sklearn.metrics import accuracy_score

test_y_pred = classifier.predict(test_X) # Predicted results
print(" Accuracy: ", accuracy_score(test_y, test_y_pred ))

 Accuracy:  0.9666666666666667


| Experiment No. | Hidden Layers | Activation | Solver | Iterations |   Accuracy   |
|:--------------:|:-------------:|:----------:|:------:|:----------:|:------------:|
|        1       |       0       |  identity  |  lbfgs |    1000    |  [0.93, 1.0] |
|        2       |       0       |    tanh    |  lbfgs |    1000    | [0.94, 0.97] |
|        3       |       0       |  logistic  |  lbfgs |    1000    |     0.94     |
|        4       |       0       |    reLU    |  lbfgs |    1000    | [0.93, 0.97] |


---

# Part II: PyTorch and CNN

## 1. PyTorch Tensor Basics

In [4]:
import torch
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)

np_array = np.array(data)
x_np = torch.from_numpy(np_array)

print(x_data)

tensor([[1, 2],
        [3, 4]])


In [12]:
# 4X4 tensor matrix
tensor = torch.ones(4, 4)
tensor = tensor + 2

# Add 1 to each element
tensor2 = tensor + 1

print(tensor)
print(tensor2)

tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])
tensor([[4., 4., 4., 4.],
        [4., 4., 4., 4.],
        [4., 4., 4., 4.],
        [4., 4., 4., 4.]])


In [13]:
# tensor multiplication
tensor * tensor2

tensor([[12., 12., 12., 12.],
        [12., 12., 12., 12.],
        [12., 12., 12., 12.],
        [12., 12., 12., 12.]])

## 2. Torch Autograd

Say, we want to compute the derivative of the following equation:

$Q = 3a^3 - b^2$

We know that

\frac{\partial Q}{\partial a} = 9a^2

and

\frac{\partial Q}{\partial b} = -2b

In [27]:
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

print(a)
print(b)

Q = 3*a**3 - b**2

tensor([2., 3.], requires_grad=True)
tensor([6., 4.], requires_grad=True)


In [31]:
# Gradient of Q wrt itself = 1
external_grad = torch.tensor([1., 1.])

Q.backward(gradient=external_grad, retain_graph=True)

print("Gradient of Q wrt a:", a.grad)
print("Gradient of Q wrt a:", b.grad)

Gradient of Q wrt a: tensor([144., 324.])
Gradient of Q wrt a: tensor([-48., -32.])


In [None]:
import torch, torchvision
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

In [None]:
prediction = model(data) # forward pass

loss = (prediction - labels).sum()
loss.backward() # backward pass

In [None]:
# Load SGD optimizer
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

# Initiate gradient descent
optim.step() #gradient descent

## 2. Neural Networks

We take the following module form the PyTorch docs

In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)



Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [37]:
# Parameters
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight
print(params[1].size())  # conv2's .weight
print(params[2].size())  # conv2's .weight

10
torch.Size([6, 1, 3, 3])
torch.Size([6])
torch.Size([16, 6, 3, 3])


In [38]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0712,  0.0681,  0.0193, -0.1568, -0.0419, -0.1266, -0.1221,  0.0602,
          0.0865, -0.0258]], grad_fn=<AddmmBackward>)


In [39]:
net.zero_grad()
out.backward(torch.randn(1, 10))
