# HW 4 - Developing Neural Network Architectures

**Author**: Ewan Lister
Completed 05/08/2023

In this notebook we will use neural networks create fits for numeric data.

## I Reconsider the data from homework one:

    X=np.arange(0,31)
    Y=np.array([30, 35, 33, 32, 34, 37, 39, 38, 36, 36, 37, 39, 42, 45, 45, 41,
    40, 39, 42, 44, 47, 49, 50, 49, 46, 48, 50, 53, 55, 54, 53])

In [1]:
# imports
import torch
import torch.nn as nn
from torchviz import make_dot
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.decomposition import PCA
from scipy.io import loadmat
from sklearn.datasets import fetch_openml

X = torch.arange(0, 31, dtype=torch.float32).reshape(-1, 1)
Y = torch.tensor([30, 35, 33, 32, 34, 37, 39, 38, 36, 36, 37, 39, 42, 45, 45, 41,
                  40, 39, 42, 44, 47, 49, 50, 49, 46, 48, 50, 53, 55, 54, 53],
                 dtype=torch.float32).reshape(-1, 1)

data = dict(zip(X, Y))

ModuleNotFoundError: No module named 'torch'

### (i) Fit the data to a three layer feed forward neural network.

In [None]:
# Define the neural network architecture
class ThreeLayerNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(1, 20)  # input layer -> hidden layer
        self.fc2 = nn.Linear(20, 10) # hidden layer -> hidden layer
        self.fc3 = nn.Linear(10, 1)  # hidden layer -> output layer
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# initialize network
net = ThreeLayerNet()

# make a forward pass with dummy data
x = torch.randn(1)
output = net(x)

# create a visualization
vis_graph = make_dot(output, params=dict(net.named_parameters()))
vis_graph.view()

# use SGD for fitting
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

# loss function
criterion = nn.MSELoss()

# create training data loader
train_loader = torch.utils.data.DataLoader(dataset=data, batch_size=1, shuffle=True)

# Train the neural network using gradient descent
num_epochs = 15
for epoch in range(num_epochs):
    for i, (x) in enumerate(X):
        optimizer.zero_grad()
        outputs = net(x)
        loss = criterion(outputs, Y[i])
        loss.backward()
        optimizer.step()
        
        if (i + 1) % 31 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, 31, loss.item()))

Epoch [1/15], Step [31/31], Loss: 1379.1917
Epoch [2/15], Step [31/31], Loss: 589.0872
Epoch [3/15], Step [31/31], Loss: 302.5200
Epoch [4/15], Step [31/31], Loss: 188.1364
Epoch [5/15], Step [31/31], Loss: 138.0802
Epoch [6/15], Step [31/31], Loss: 114.4907
Epoch [7/15], Step [31/31], Loss: 102.7859
Epoch [8/15], Step [31/31], Loss: 96.7878
Epoch [9/15], Step [31/31], Loss: 93.6552
Epoch [10/15], Step [31/31], Loss: 92.0017
Epoch [11/15], Step [31/31], Loss: 91.1240
Epoch [12/15], Step [31/31], Loss: 90.6564
Epoch [13/15], Step [31/31], Loss: 90.4069
Epoch [14/15], Step [31/31], Loss: 90.2738
Epoch [15/15], Step [31/31], Loss: 90.2027


### (ii) Using the first 20 data points as training data, fit the neural network. Compute the least-square error for each of these over the training points. Then compute the least square error of these models on the test data which are the remaining 10 data points.

In [None]:
def check_train_test_error(x_train, y_train, x_test, y_test):
    for i, (x) in enumerate(x_train):
        outputs = net(x)
        error = criterion(outputs, y_train[i])
        print('Train error for x = {}, y = {} : {:.4f}'.format(x, y_train[i], error))
    print('\n')
    for i, (x) in enumerate(x_test):
        outputs = net(x)
        error = criterion(outputs, y_test[i])
        print('Test error for x = {}, y = {} : {:.4f}'.format(x, y_test[i], error))

# isolate first 20 data points
x_train = X[0:20]
y_train = Y[0:20]
x_test = X[20:31]
y_test = Y[20:31]

# train network on first 20 data points, examine progress of SGD via print statements
num_epochs = 15
for epoch in range(num_epochs):
    for i, (x) in enumerate(x_train):
        optimizer.zero_grad()
        outputs = net(x)
        loss = criterion(outputs, y_train[i])
        loss.backward()
        optimizer.step()
        
        if (i + 1) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, 20, loss.item()))

check_train_test_error(x_train, y_train, x_test, y_test)

Epoch [1/15], Step [20/20], Loss: 4.1679
Epoch [2/15], Step [20/20], Loss: 10.2078
Epoch [3/15], Step [20/20], Loss: 15.7213
Epoch [4/15], Step [20/20], Loss: 20.0622
Epoch [5/15], Step [20/20], Loss: 23.2546
Epoch [6/15], Step [20/20], Loss: 25.5169
Epoch [7/15], Step [20/20], Loss: 27.0857
Epoch [8/15], Step [20/20], Loss: 28.1591
Epoch [9/15], Step [20/20], Loss: 28.8874
Epoch [10/15], Step [20/20], Loss: 29.3787
Epoch [11/15], Step [20/20], Loss: 29.7090
Epoch [12/15], Step [20/20], Loss: 29.9306
Epoch [13/15], Step [20/20], Loss: 30.0789
Epoch [14/15], Step [20/20], Loss: 30.1782
Epoch [15/15], Step [20/20], Loss: 30.2446
Train error for x = tensor([0.]), y = tensor([30.]) : 74.1404
Train error for x = tensor([1.]), y = tensor([35.]) : 13.0356
Train error for x = tensor([2.]), y = tensor([33.]) : 31.4775
Train error for x = tensor([3.]), y = tensor([32.]) : 43.6985
Train error for x = tensor([4.]), y = tensor([34.]) : 21.2565
Train error for x = tensor([5.]), y = tensor([37.]) : 2

### (iii) Repeat (iii) but use the first 10 and last 10 data points as training data. Then fit the model to the test data (which are the 10 held out middle data points). Compare these results to (iii)

In [None]:
# isolate first and last 10 training points
x_train = torch.cat([X[0:10], X[20:31]])
y_train = torch.cat([Y[0:10], Y[20:31]])
x_test = X[10:20]
y_test = Y[10:20]

optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

# train network on first and last 10 data points, examine progress of SGD via print statements
num_epochs = 50
for epoch in range(num_epochs):
    for i, (x) in enumerate(x_train):
        optimizer.zero_grad()
        outputs = net(x)
        loss = criterion(outputs, y_train[i])
        loss.backward()
        optimizer.step()
        
        if (i + 1) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, 20, loss.item()))

check_train_test_error(x_train, y_train, x_test, y_test)

Epoch [1/50], Step [20/20], Loss: 197.9372
Epoch [2/50], Step [20/20], Loss: 163.9305
Epoch [3/50], Step [20/20], Loss: 143.4147
Epoch [4/50], Step [20/20], Loss: 130.7341
Epoch [5/50], Step [20/20], Loss: 122.7551
Epoch [6/50], Step [20/20], Loss: 117.6710
Epoch [7/50], Step [20/20], Loss: 114.4027
Epoch [8/50], Step [20/20], Loss: 112.2894
Epoch [9/50], Step [20/20], Loss: 110.9174
Epoch [10/50], Step [20/20], Loss: 110.0242
Epoch [11/50], Step [20/20], Loss: 109.4417
Epoch [12/50], Step [20/20], Loss: 109.0614
Epoch [13/50], Step [20/20], Loss: 108.8132
Epoch [14/50], Step [20/20], Loss: 108.6509
Epoch [15/50], Step [20/20], Loss: 108.5447
Epoch [16/50], Step [20/20], Loss: 108.4753
Epoch [17/50], Step [20/20], Loss: 108.4300
Epoch [18/50], Step [20/20], Loss: 108.4004
Epoch [19/50], Step [20/20], Loss: 108.3809
Epoch [20/50], Step [20/20], Loss: 108.3682
Epoch [21/50], Step [20/20], Loss: 108.3599
Epoch [22/50], Step [20/20], Loss: 108.3545
Epoch [23/50], Step [20/20], Loss: 108.35

### (iv) Compare the models fit in homework one to the neural networks in (ii) and (iii)

Similarly to the curve fittin in homework 1, the neural network does a poor job of making any extrapolations about its test data if the data is outside of the domain of the training data. For example, the network did well when test data contained the 10 points between point 9 and point 20, but poorly when the test data was that from 20 to 31, which is unbounded by any training data. Thus is performs very similarly to the curve fitting in homework 1. However, the loss, for each value is still much greater in the case of the neural network.


## II Now train a feedforward neural network on the MNIST data set. You will start by performing the following analysis:

### (i) Compute the first 20 PCA modes of the digit images.


In [None]:
# fetch MNIST dataset
mnist = fetch_openml('mnist_784', version=1)

# Convert the data and labels into numpy arrays
data = np.array(mnist['data'])
labels = np.array(mnist['target'])

# apply PCA transformation onto the first 20 modes
pca = PCA(n_components=20)

print(np.shape(data))
data_pca_1 = pca.fit_transform(data)

(70000, 784)


In [None]:
from sklearn.model_selection import train_test_split
print(np.shape(data_pca_1))
print(np.shape(labels))


(70000, 20)
(70000,)



### (ii) Build a feed-forward neural network to classify the digits. Compare the results of the neural network against LSTM, SVM (support vector machines) and decision tree classifiers.

In [None]:
# separate training and test data for use in LSTM, SVM, and DTC classifiers
data_train, data_test, label_train, label_test = train_test_split(data_pca_1, labels, test_size=0.3, random_state=42)

# convert labels to ints
label_train = label_train.astype(np.int16)
label_test = label_test.astype(np.int16)

### testing neural network on MNIST data

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Define the hyperparameters
batch_size = 128
learning_rate = 0.001
num_epochs = 10

# Download and prepare the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

# Create data loaders for the training and testing datasets
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the model architecture
class FeedforwardNN(nn.Module):
    def __init__(self):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model and optimizer
model = FeedforwardNN()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = nn.CrossEntropyLoss()(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Epoch {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

# Evaluate the model on the test set
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += nn.CrossEntropyLoss()(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))

Test set: Average loss: 0.0006, Accuracy: 9775/10000 (98%)


### testing LSTM on MNIST data

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Define the hyperparameters
batch_size = 128
learning_rate = 0.001
num_epochs = 10
hidden_size = 128
num_layers = 2

# Download and prepare the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

# Create data loaders for the training and testing datasets
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the LSTM architecture
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=28, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 10)
        
    def forward(self, x):
        h0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        c0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        out, (h_n, c_n) = self.lstm(x, (h0, c0))
        out = self.fc(h_n[-1])
        return out

# Initialize the model and optimizer
model = LSTM()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        data = data.view(batch_size, 28, 28)
        output = model(data)
        loss = nn.CrossEntropyLoss()(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Epoch {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

# Evaluate the model on the test set
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data = data.view(data.shape[0], 28, 28)
        output = model(data)
        test_loss += nn.CrossEntropyLoss()(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))




RuntimeError: shape '[128, 28, 28]' is invalid for input of size 75264

### fitting an SVM classifier

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
# Train a linear classifier
clf = SVC()
clf.fit(data_train, label_train)

# Evaluate the performance on the test set
y_pred = clf.predict(data_test)
acc = accuracy_score(label_test, y_pred)
print(f"Accuracy for SVM: {acc:.2f}")

Accuracy for SVM: 0.97


### fitting a DTC classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Train a DTC classifier
clf = DecisionTreeClassifier()
clf.fit(data_train, label_train)

# Evaluate the performance on the test set
y_pred = clf.predict(data_test)
acc = accuracy_score(label_test, y_pred)
print(f"Accuracy for DTC: {acc:.2f}")

Accuracy for DTC: 0.84
