In [137]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from sklearn.decomposition import PCA
import numpy as np

## Problem 1

In [138]:
# Initializing data
X = np.arange(0,31)
Y = np.array([30, 35, 33, 32, 34, 37, 39, 38, 36, 36, 37, 39, 42, 45, 45, 41, 40, 39, 42, 44, 47, 49, 50, 49, 46, 48, 50, 53, 55, 54, 53])

### (i) Fit the data to a three layer feed forward neural network

In [139]:
# Creating the neural network class
class ThreeLayerNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ThreeLayerNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.layer3(x)
        return x

def train_network(model, X_train, Y_train, epochs, learning_rate):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        inputs = Variable(X_train)
        targets = Variable(Y_train)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        if (epoch+1) % 100 == 0:
            print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

def evaluate_least_squares_loss(model, X_train, Y_train, X_test, Y_test):
    criterion = nn.MSELoss()

    with torch.no_grad():
        train_outputs = model(X_train)
        train_mse = criterion(train_outputs, Y_train)
        test_outputs = model(X_test)
        test_mse = criterion(test_outputs, Y_test)

    train_least_squares_error = torch.sqrt(train_mse).item()
    test_least_squares_error = torch.sqrt(test_mse).item()

    return train_least_squares_error, test_least_squares_error

def compute_accuracy(y_true, y_pred):
    mae = torch.mean(torch.abs(y_true - y_pred))
    return 1 - mae.item()


### (ii) Using the first 20 data points as training data, fit the neural network. Compute the least-square error for each of these over the training points. Then compute the least square error of these models on the test data which are the remaining 10 data points.

In [140]:
# Normalizing the data
X = X.reshape(-1, 1)
Y = Y.reshape(-1, 1)

X = X / 31
Y = Y / 55

# Creating the training and test sets
X_train_extrap = X[:20]
Y_train_extrap = Y[:20]
X_test_extrap = X[20:]
Y_test_extrap = Y[20:]

# Converting the data to pytorch tensors
X_train_tensor_extrap = torch.FloatTensor(X_train_extrap)
Y_train_tensor_extrap = torch.FloatTensor(Y_train_extrap)
X_test_tensor_extrap = torch.FloatTensor(X_test_extrap)
Y_test_tensor_extrap = torch.FloatTensor(Y_test_extrap)

In [141]:
# Training the neural network
input_size = 1
hidden_size = 64
output_size = 1
learning_rate = 0.01
epochs = 1000

model = ThreeLayerNN(input_size, hidden_size, output_size)

train_network(model, X_train_tensor_extrap, Y_train_tensor_extrap, epochs, learning_rate)

Epoch: 100, Loss: 0.0016041655326262116
Epoch: 200, Loss: 0.0014134616358205676
Epoch: 300, Loss: 0.0013217905070632696
Epoch: 400, Loss: 0.001248661894351244
Epoch: 500, Loss: 0.0011748403776437044
Epoch: 600, Loss: 0.0010809521190822124
Epoch: 700, Loss: 0.0009588543325662613
Epoch: 800, Loss: 0.0008379902574233711
Epoch: 900, Loss: 0.0007454089354723692
Epoch: 1000, Loss: 0.0006987500237300992


In [142]:
# Evaluate the loss and accuracy
train_loss, test_loss = evaluate_least_squares_loss(model, X_train_tensor_extrap, Y_train_tensor_extrap, X_test_tensor_extrap, Y_test_tensor_extrap)
train_accuracy = compute_accuracy(Y_train_tensor_extrap, model(X_train_tensor_extrap))
test_accuracy = compute_accuracy(Y_test_tensor_extrap, model(X_test_tensor_extrap))

print(f'Training Loss (Extrapolation): {round(train_loss, 4)}')
print(f'Test Loss (Extrapolation): {round(test_loss, 4)}')
print(f'Training Accuracy (Extrapolation): {round(train_accuracy * 100, 4)}%')
print(f'Test Accuracy (Extrapolation): {round(test_accuracy * 100, 4)}%')

Training Loss (Extrapolation): 0.0263
Test Loss (Extrapolation): 0.1827
Training Accuracy (Extrapolation): 97.9064%
Test Accuracy (Extrapolation): 82.5943%


### (iii) Repeat (ii) but use the first 10 and last 10 data points as training data. Then fit the model to the test data (which are the 10 held out middle data points). Compare these results to (ii)

In [143]:
# Creating the training and test sets
X_train_interp = np.concatenate((X[:10], X[-10:]))
Y_train_interp = np.concatenate((Y[:10], Y[-10:]))
X_test_interp = X[10:20]
Y_test_interp = Y[10:20]

# Converting the data to pytorch tensors
X_train_tensor_interp = torch.FloatTensor(X_train_interp)
Y_train_tensor_interp = torch.FloatTensor(Y_train_interp)
X_test_tensor_interp = torch.FloatTensor(X_test_interp)
Y_test_tensor_interp = torch.FloatTensor(Y_test_interp)

In [144]:
# Training the neural network for interpolation
model_interp = ThreeLayerNN(input_size, hidden_size, output_size)

train_network(model_interp, X_train_tensor_interp, Y_train_tensor_interp, epochs, learning_rate)

Epoch: 100, Loss: 0.0011257014703005552
Epoch: 200, Loss: 0.001104111666791141
Epoch: 300, Loss: 0.0010807627113536
Epoch: 400, Loss: 0.0010470146080479026
Epoch: 500, Loss: 0.0010048819240182638
Epoch: 600, Loss: 0.0009732529288157821
Epoch: 700, Loss: 0.0009528251248411834
Epoch: 800, Loss: 0.0009335579234175384
Epoch: 900, Loss: 0.0009110727114602923
Epoch: 1000, Loss: 0.0008885601419024169


In [145]:
# Evaluate the loss and accuracy
train_loss_interp, test_loss_interp = evaluate_least_squares_loss(model_interp, X_train_tensor_interp, Y_train_tensor_interp, X_test_tensor_interp, Y_test_tensor_interp)
train_accuracy_interp = compute_accuracy(Y_train_tensor_interp, model_interp(X_train_tensor_interp))
test_accuracy_interp = compute_accuracy(Y_test_interp_tensor, model_interp(X_test_tensor_interp))

print(f'Training Loss (Interpolation): {round(train_loss_interp, 4)}')
print(f'Test Loss (Interpolation): {round(test_loss_interp, 4)}')
print(f'Training Accuracy (Interpolation): {round(train_accuracy_interp * 100, 4)}%')
print(f'Test Accuracy (Interpolation): {round(test_accuracy_interp * 100, 4)}%')

Training Loss (Interpolation): 0.0298
Test Loss (Interpolation): 0.0553
Training Accuracy (Interpolation): 97.3836%
Test Accuracy (Interpolation): 95.0551%


### (iv) Compare the models fit in homework one to the neural networks in (ii) and (iii)

Comparison can be found in the homework4 report

## Problem 2

In [146]:
# Creating the neural network

class FFNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FFNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = self.layer2(x)
        return x

def accuracy(y_pred, y_true):
    _, predicted = torch.max(y_pred, 1)
    total = y_true.size(0)
    correct = (predicted == y_true).sum().item()
    return correct / total

def train_network(model, train_loader, epochs, learning_rate):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        for i, (inputs, targets) in enumerate(train_loader):
            inputs = Variable(inputs)
            targets = Variable(targets)

            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        if (epoch+1) % 10 == 0:
            print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

def evaluate_loss_accuracy(model, data_loader):
    criterion = nn.CrossEntropyLoss()
    correct = 0
    total = 0
    total_loss = 0

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs = Variable(inputs)
            targets = Variable(targets)

            outputs = model(inputs)
            loss = criterion(outputs, targets)

            total_loss += loss.item()
            total += targets.size(0)
            correct += (torch.max(outputs, 1)[1] == targets).sum().item()

    return total_loss / len(data_loader), correct / total

In [147]:
# Loading the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

X_train = train_dataset.data.numpy()
X_test = test_dataset.data.numpy()

# Flatten the images
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

### (i) Compute the first 20 PCA modes of the digit images

In [148]:
# Compute the first 20 PCA modes
pca = PCA(n_components=20)
pca.fit(X_train_flat)
X_train_pca = pca.transform(X_train_flat)
X_test_pca = pca.transform(X_test_flat)

### (ii) Classify the digits with the neural network

In [149]:
# Convert the PCA data to tensors
X_train_pca_tensor = torch.FloatTensor(X_train_pca)
X_test_pca_tensor = torch.FloatTensor(X_test_pca)
Y_train_tensor = torch.LongTensor(train_dataset.targets)
Y_test_tensor = torch.LongTensor(test_dataset.targets)

train_pca_dataset = torch.utils.data.TensorDataset(X_train_pca_tensor, Y_train_tensor)
test_pca_dataset = torch.utils.data.TensorDataset(X_test_pca_tensor, Y_test_tensor)
train_pca_loader = torch.utils.data.DataLoader(train_pca_dataset, batch_size=100, shuffle=True)
test_pca_loader = torch.utils.data.DataLoader(test_pca_dataset, batch_size=100, shuffle=False)

input_size = 20
hidden_size = 128
output_size = 10
learning_rate = 0.001
epochs = 50

# Create and train the model
model = FFNN(input_size, hidden_size, output_size)

train_network(model, train_pca_loader, epochs, learning_rate)

Epoch: 10, Loss: 0.264059454202652
Epoch: 20, Loss: 0.8404315114021301
Epoch: 30, Loss: 0.23342067003250122
Epoch: 40, Loss: 0.10142076760530472
Epoch: 50, Loss: 0.08207272738218307


In [152]:
# Evaluate the loss and accuracy
train_loss, train_accuracy = evaluate_loss_accuracy(model, train_pca_loader)
test_loss, test_accuracy = evaluate_loss_accuracy(model, test_pca_loader)

print(f'Training Loss: {round(train_loss, 4)}')
print(f'Test Loss: {round(test_loss, 4)}')
print(f'Training Accuracy: {round(train_accuracy * 100, 4)}%')
print(f'Test Accuracy: {round(test_accuracy * 100, 4)}%')

Training Loss: 0.1844
Test Loss: 0.2654
Training Accuracy: 96.775%
Test Accuracy: 96.27%
