# Toy example

In this homework exercise we give two classification toy examples. The first implements the classification problem of the Iris dataset explained in the lectures, in the second the students are expected to implement a neural network classification algorithm for the MNIST dataset, labeling handwritten digits.

# Iris dataset

See lecture 1 for details.
We use PyTorch. Additionally, we rely on the following libraries.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.set_default_dtype(torch.float64)

import numpy as np

import matplotlib.pyplot as plt

# for loading the dataset only
from sklearn.datasets import load_iris

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

We first load the dataset.

In [None]:
dataset = load_iris()  # dictionary
X = dataset['data']
y = dataset['target']
names = dataset['target_names']
feature_names = dataset['feature_names']

In [None]:
X.shape

In [None]:
y.shape

In [None]:
names

In [None]:
feature_names

# Logistic Regression

Split the dataset in a disjoint partition of train and test sets. We take a random 4:1 ratio between the sample sizes.

In [None]:
M = X.shape[0]
random_idx_ord = np.random.permutation(M)
train_indices = random_idx_ord[0: int(0.8 * M)]
test_indices = random_idx_ord[int(0.8 * M): ]

X_train = X[train_indices, :]
y_train = y[train_indices]

X_test = X[test_indices, :]
y_test = y[test_indices]

print('Number of training samples: %d'%X_train.shape[0])
print('Number of test samples: %d'%X_test.shape[0])

Build the logistic regression and optimize logistic regression model.

In [None]:
model = LogisticRegression(penalty=None, fit_intercept=True,
                           solver='newton-cg', max_iter=10000, verbose=0)
clf = model.fit(X_train, y_train)

The optimal coefficients

In [None]:
beta = clf.coef_
beta

The resulting predictions are as follows.

In [None]:
y_pred = clf.predict(X_test)

print("Estimated test labels: ", y_pred)
print("True test labels:      ", y_test)

The accuracy over the test sample

In [None]:
clf.score(X_test, y_test)

# Neural network regressions

At this point, we define fully-connected, feedforward neural network with $L$ hidden layers, $p_n$ neurons in each layer and a given activation $\varphi: R\to R$

In [None]:
torch.set_default_dtype(torch.float64)  # mismatch between numpy and pytorch default

## Shallow neural nets

In [None]:
class ShallowNet(nn.Module):
    def __init__(self, input_dimension, output_dimension, num_neurons,
                 activation, output_activation):
        super(ShallowNet, self).__init__()

        self.hidden_layer = nn.Linear(input_dimension, num_neurons)
        # the corresponding affine transformation of two full-connected dense
        # layers
        self.output_layer = nn.Linear(num_neurons, output_dimension)

        self.activation = activation
        self.output_activation = output_activation


        self.optimizer = torch.optim.SGD(self.parameters(), lr=0.1)
        self.loss = torch.nn.CrossEntropyLoss()

    def forward(self, x):
        x = self.hidden_layer(x)
        x = self.activation(x)
        x = self.output_layer(x)
        output = self.output_activation(x)
        return output

    def train_minibatch(self, x_train, y_train, epochs=200, log_freq=10,
                        batch_size=4):

        losses = []

        permutation = torch.randperm(x_train.size()[0])

        for epoch in range(epochs):
            for i in range(0, x_train.size()[0], batch_size):
                self.optimizer.zero_grad()

                indices = permutation[i: i+batch_size]
                batch_x, batch_y = x_train[indices], y_train[indices]

                y_pred = self.forward(batch_x)
                current_loss = self.loss(y_pred, batch_y)

                self.optimizer.zero_grad()
                current_loss.backward()
                self.optimizer.step()
            losses.append(current_loss.item())
            if epoch % log_freq == 0:
              print(f'epoch: {epoch:2}  training loss: {current_loss.item():10.8f}')

        return None

    def train(self, x_train, y_train, epochs=10**5, log_freq=1000):
        losses = []

        for i in range(epochs):
          y_pred = self.forward(x_train)
          current_loss = self.loss(y_pred, y_train)
          losses.append(current_loss.item())
          if i % log_freq == 0:
            print(f'epoch: {i:2}  training loss: {current_loss.item():10.8f}')

          self.optimizer.zero_grad()
          current_loss.backward()
          self.optimizer.step()

        return None



For reasons which become clear in Lecture 3, PyTorch does not "like" numpy inputs. Therefore, we convert the corresponding data sets to so called "tensors", and pass those onto the model.

In [None]:
model = ShallowNet(len(feature_names), len(names), 10, torch.relu, torch.nn.Softmax(dim=-1))

y_train_pt = np.zeros(shape=[len(y_train), len(names)])
for m in range(len(y_train)):
  idx = y_train[m]
  y_train_pt[m, idx] = 1

X_train_pt = torch.from_numpy(X_train).type(torch.DoubleTensor)
y_train_pt = torch.from_numpy(y_train_pt).type(torch.DoubleTensor)

X_test_pt = torch.from_numpy(X_test).type(torch.DoubleTensor)


model(X_train_pt).shape

In [None]:
model.train(X_train_pt, y_train_pt)

In [None]:
y_pred = model(X_test_pt).detach().numpy().argmax(axis=-1)

num_misclassified = np.sum(y_pred != y_test)

print("Estimated test labels: ", y_pred)
print("True test labels:      ", y_test)

print("Test classification accuracy: %.2f"%(1 - num_misclassified / len(y_test)))

## Deep neural networks

In [None]:
class DeepNet(nn.Module):
    def __init__(self, input_dimension, output_dimension, num_neurons,
                 activation, output_activation):
        super(DeepNet, self).__init__()

        self.hidden_layers = [nn.Linear(input_dimension, num_neurons[0])]
        layers = [input_dimension] + num_neurons
        self.hidden_layers = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers) - 1)])


        # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
        # In PyTorch you need to use the construction of nn.ModuleList() by which your variables are properly appended to self.parameters().
        # See also the discussion:
        # https://discuss.pytorch.org/t/function-class-for-generating-variable-depth-networks/59223
        # For those of you who end up using Tensorflow in the future,
        # the way I did it in the handout is perfectly fine there, Tensorflow
        # does not require custom list structures.
        # (That's also the reason why I made this mistake in the first place.)
        #
        #
        # for idx, width in enumerate(num_neurons):
        #     if idx < (len(num_neurons) - 1):
        #       self.hidden_layers.append(nn.Linear(width, num_neurons[idx + 1]))
        # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

        self.output_layer = nn.Linear(num_neurons[-1], output_dimension)

        self.activation = activation
        self.output_activation = output_activation

        self.optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
        self.loss = torch.nn.CrossEntropyLoss()

    def forward(self, x):
        for idx, layer in enumerate(self.hidden_layers):
          x = layer(x)
          x = self.activation(x)

        x = self.output_layer(x)
        output = self.output_activation(x)
        return output

    def train_minibatch(self, x_train, y_train, epochs=200, log_freq=10,
                        batch_size=4):

        losses = []

        permutation = torch.randperm(x_train.size()[0])

        for epoch in range(epochs):
            for i in range(0, x_train.size()[0], batch_size):
                self.optimizer.zero_grad()

                indices = permutation[i: i+batch_size]
                batch_x, batch_y = x_train[indices], y_train[indices]

                y_pred = self.forward(batch_x)
                current_loss = self.loss(y_pred, batch_y)

                self.optimizer.zero_grad()
                current_loss.backward()
                self.optimizer.step()
            losses.append(current_loss.item())
            if epoch % log_freq == 0:
              print(f'epoch: {epoch:2}  training loss: {current_loss.item():10.8f}')

        return None

    def train(self, x_train, y_train, epochs=10**5, log_freq=1000):
        losses = []

        for i in range(epochs):
          y_pred = self.forward(x_train)
          current_loss = self.loss(y_pred, y_train)
          losses.append(current_loss.item())
          if i % log_freq == 0:
            print(f'epoch: {i:2}  training loss: {current_loss.item():10.8f}')

          self.optimizer.zero_grad()
          current_loss.backward()
          self.optimizer.step()

        return None

Choose network architecture (width, activation functions)

In [None]:
deep_model = DeepNet(len(feature_names), len(names), [10, 10], torch.relu, torch.nn.Softmax(dim=-1))

deep_model.train(X_train_pt, y_train_pt)

In [None]:
    y_pred = deep_model(X_test_pt).detach().numpy().argmax(axis=-1)

num_misclassified = np.sum(y_pred != y_test)

print("Estimated test labels: ", y_pred)
print("True test labels:      ", y_test)

print("Test classification accuracy: %.2f"%(1 - num_misclassified / len(y_test)))

# Comparison

In [None]:
y_pred_logit = clf.predict(X_test)
y_pred_shallow = model(X_test_pt).detach().numpy().argmax(axis=-1)
y_pred_deep = deep_model(X_test_pt).detach().numpy().argmax(axis=-1)

In [None]:
confusion_logit = np.zeros(shape=[len(names), len(names)])
confusion_shallow = np.zeros(shape=[len(names), len(names)])
confusion_deep = np.zeros(shape=[len(names), len(names)])

In [None]:
for true_class in range(3):
    for predicted_class in range(3):
      confusion_logit[true_class, predicted_class] = np.sum(y_pred_logit[y_test == true_class] == predicted_class)
      confusion_shallow[true_class, predicted_class] = np.sum(y_pred_shallow[y_test == true_class] == predicted_class)
      confusion_deep[true_class, predicted_class] = np.sum(y_pred_deep[y_test == true_class] == predicted_class)



In [None]:
confusion_logit

In [None]:
confusion_shallow

In [None]:
confusion_deep

And the corresponding accuracies

In [None]:
acc_logit = np.sum(np.diag(confusion_logit)) / len(y_test)
acc_shallow = np.sum(np.diag(confusion_shallow)) / len(y_test)
acc_deep = np.sum(np.diag(confusion_deep)) / len(y_test)


print('Accuracy logistic regression: %.2f'%acc_logit)
print('Accuracy shallow neural network: %.2f'%acc_shallow)
print('Accuracy deep neural network: %.2f'%acc_deep)