In [29]:
import numpy as np
import pandas as pd

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [30]:
train_data = pd.read_csv('bank-note/train.csv', header=None)
test_data = pd.read_csv('bank-note/test.csv', header=None)

X_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values
X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values

class NeuralNetwork:
    def __init__(self, layer_sizes):
        self.layer_sizes = layer_sizes
        self.weights = [np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1]) for i in range(1, len(self.layer_sizes))]
        self.biases = [np.random.randn(y, 1) for y in self.layer_sizes[1:]]

    def forward_pass(self, x):
        activations = [x]
        for w, b in zip(self.weights, self.biases):
            z = np.dot(w, activations[-1]) + b
            if w is not self.weights[-1]:
                activations.append(sigmoid(z))
            else:
                activations.append(z)
        return activations

    def backpropagation(self, x, y):
        activations = self.forward_pass(x)
        weight_gradients = [np.zeros(w.shape) for w in self.weights]
        bias_gradients = [np.zeros(b.shape) for b in self.biases]

        delta = activations[-1] - y 

        weight_gradients[-1] = np.dot(delta, activations[-2].T)
        bias_gradients[-1] = delta

        for l in range(2, len(self.layer_sizes)):
            z = np.dot(self.weights[-l+1].T, delta)
            delta = z * sigmoid_derivative(activations[-l])
            weight_gradients[-l] = np.dot(delta, activations[-l-1].T)
            bias_gradients[-l] = delta

        return weight_gradients, bias_gradients

    def train(self, X, y, epochs, gamma_0, d):
        for epoch in range(epochs):
            permutation = np.random.permutation(X.shape[0])
            X_shuffled = X[permutation]
            y_shuffled = y[permutation]

            for i, (x, y_true) in enumerate(zip(X_shuffled, y_shuffled)):
                x = x.reshape(-1, 1)
                y_true = np.array([[y_true]])

                gamma_t = gamma_0 / (1 + (gamma_0 / d) * i)

                weight_gradients, bias_gradients = self.backpropagation(x, y_true)
                self.weights = [w - gamma_t * dw for w, dw in zip(self.weights, weight_gradients)]
                self.biases = [b - gamma_t * db for b, db in zip(self.biases, bias_gradients)]

    def predict(self, X):
        predictions = []
        for x in X:
            x = x.reshape(-1, 1)
            activations = self.forward_pass(x)
            predictions.append(activations[-1][0, 0] > 0.5)
        return np.array(predictions)

    def evaluate(self, X, y):
        predictions = self.predict(X)
        accuracy = np.mean(predictions == y)
        return accuracy
    
    #initialize weights to 0
    def zero_init(self):
        self.weights = [np.zeros((self.layer_sizes[i], self.layer_sizes[i-1])) for i in range(1, len(self.layer_sizes))]
        self.biases = [np.zeros((y, 1)) for y in self.layer_sizes[1:]]
        return self.weights, self.biases
    
print("Question 2A: Backpropagation")
nn = NeuralNetwork([4, 10, 10, 1])
x_sample = X_train[0].reshape(-1, 1)
y_sample = np.array([[y_train[0]]])
weight_gradients, bias_gradients = nn.backpropagation(x_sample, y_sample)
print("Backpropagation Gradients Example:")
print("Weight Gradients:", weight_gradients)
print("Bias Gradients:", bias_gradients)

Question 2A: Backpropagation
Backpropagation Gradients Example:
Weight Gradients: [array([[-3.51210427, -9.26731517,  3.51940575,  3.85408744],
       [-3.62988658, -9.57810486,  3.63743292,  3.98333854],
       [-0.53829502, -1.42038766,  0.5394141 ,  0.59071027],
       [-1.73313934, -4.5731981 ,  1.73674245,  1.90189985],
       [-0.83426319, -2.20135262,  0.83599758,  0.91549767],
       [ 2.49257621,  6.57710806, -2.49775814, -2.73528515],
       [ 0.77190929,  2.03682071, -0.77351405, -0.84707221],
       [ 0.05224832,  0.13786655, -0.05235695, -0.05733589],
       [-0.08384749, -0.2212466 ,  0.08402181,  0.09201195],
       [ 3.30475842,  8.72019608, -3.31162884, -3.62655177]]), array([[ 5.01193811e-04,  8.02345365e-08,  1.62653771e+00,
         1.62643781e+00,  1.60079468e+00,  6.97652709e-05,
         1.61181089e+00,  1.62590287e+00,  1.62653771e+00,
         8.62127730e-02],
       [-3.73820470e-04, -5.98437400e-08, -1.21316959e+00,
        -1.21309508e+00, -1.19396889e+00, -

In [31]:
print("Question 2B: Stochastic Gradient Descent")
widths = [5, 10, 25, 50, 100]
for width in widths:
    nn = NeuralNetwork([X_train.shape[1], width, width, 1])
    nn.train(X_train, y_train, epochs=100, gamma_0=0.01, d=0.01)
    training_accuracy = nn.evaluate(X_train, y_train)
    test_accuracy = nn.evaluate(X_test, y_test)
    print(f"Width: {width}, Training Accuracy: {training_accuracy}, Test Accuracy: {test_accuracy}")

Question 2B: Stochastic Gradient Descent
Width: 5, Training Accuracy: 0.8692660550458715, Test Accuracy: 0.864
Width: 10, Training Accuracy: 0.8646788990825688, Test Accuracy: 0.854
Width: 25, Training Accuracy: 0.9094036697247706, Test Accuracy: 0.894
Width: 50, Training Accuracy: 0.8107798165137615, Test Accuracy: 0.818
Width: 100, Training Accuracy: 0.698394495412844, Test Accuracy: 0.678


In [27]:
widths = [5, 10, 25, 50, 100]
for width in widths:
    nn_zero_init = NeuralNetwork([X_train.shape[1], width, width, 1])
    nn_zero_init.train(X_train, y_train, epochs=100, gamma_0=0.01, d=0.01)  
    training_accuracy_zero_init = nn_zero_init.evaluate(X_train, y_train)
    test_accuracy_zero_init = nn_zero_init.evaluate(X_test, y_test)
    print(f"Width: {width}, Zero-Initialized Weights, Training Accuracy: {training_accuracy_zero_init}, Test Accuracy: {test_accuracy_zero_init}")

Width: 5, Zero-Initialized Weights, Training Accuracy: 0.9094036697247706, Test Accuracy: 0.894
Width: 10, Zero-Initialized Weights, Training Accuracy: 0.9174311926605505, Test Accuracy: 0.92
Width: 25, Zero-Initialized Weights, Training Accuracy: 0.838302752293578, Test Accuracy: 0.828
Width: 50, Zero-Initialized Weights, Training Accuracy: 0.8635321100917431, Test Accuracy: 0.878
Width: 100, Zero-Initialized Weights, Training Accuracy: 0.768348623853211, Test Accuracy: 0.744


In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd


def create_dataloader(csv_file, batch_size=32):
    data = pd.read_csv(csv_file)
    features = torch.tensor(data.iloc[:, :-1].values, dtype=torch.float32)
    targets = torch.tensor(data.iloc[:, -1].values, dtype=torch.int64)
    dataset = TensorDataset(features, targets)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)


train_loader = create_dataloader('bank-note/train.csv')
test_loader = create_dataloader('bank-note/test.csv')


class PTNeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size, hidden_layers, activation_fn):
        super(PTNeuralNetwork, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(hidden_layers)):
            if i == 0:
                self.layers.append(nn.Linear(input_size, hidden_layers[i]))
            else:
                self.layers.append(nn.Linear(hidden_layers[i - 1], hidden_layers[i]))
        self.layers.append(nn.Linear(hidden_layers[-1], output_size))
        self.activation_fn = activation_fn

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i < len(self.layers) - 1:
                x = self.activation_fn(x)
        return x


def evaluate_accuracy(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in data_loader:
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    return 100 * correct / total


def train_and_evaluate(depths, widths, activation_fns, init_methods, train_loader, test_loader):
    input_size = 4
    output_size = 2
    num_epochs = 20

    for depth in depths:
        for width in widths:
            for activation_fn, init_method in zip(activation_fns, init_methods):
                hidden_layers = [width] * depth
                model = PTNeuralNetwork(input_size, output_size, hidden_layers, activation_fn)
                for layer in model.layers:
                    if isinstance(layer, nn.Linear):
                        init_method(layer.weight)
                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.parameters(), lr=1e-3)

                for epoch in range(num_epochs):
                    model.train()
                    for data, target in train_loader:
                        optimizer.zero_grad()
                        output = model(data)
                        loss = criterion(output, target)
                        loss.backward()
                        optimizer.step()

                train_accuracy = evaluate_accuracy(model, train_loader)
                test_accuracy = evaluate_accuracy(model, test_loader)
                print(f"Depth: {depth}, Width: {width}, Activation: {activation_fn.__name__}, "
                      f"Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")


activation_fns = [torch.tanh, torch.relu]
init_methods = [nn.init.xavier_normal_, nn.init.kaiming_normal_]

train_and_evaluate([3, 5, 9], [5, 10, 25, 50, 100], activation_fns, init_methods, train_loader, test_loader)

Depth: 3, Width: 5, Activation: tanh, Train Accuracy: 99.54%, Test Accuracy: 99.40%
Depth: 3, Width: 5, Activation: relu, Train Accuracy: 99.31%, Test Accuracy: 99.20%
Depth: 3, Width: 10, Activation: tanh, Train Accuracy: 100.00%, Test Accuracy: 100.00%
Depth: 3, Width: 10, Activation: relu, Train Accuracy: 99.77%, Test Accuracy: 100.00%
Depth: 3, Width: 25, Activation: tanh, Train Accuracy: 100.00%, Test Accuracy: 100.00%
Depth: 3, Width: 25, Activation: relu, Train Accuracy: 100.00%, Test Accuracy: 100.00%
Depth: 3, Width: 50, Activation: tanh, Train Accuracy: 100.00%, Test Accuracy: 100.00%
Depth: 3, Width: 50, Activation: relu, Train Accuracy: 100.00%, Test Accuracy: 100.00%
Depth: 3, Width: 100, Activation: tanh, Train Accuracy: 100.00%, Test Accuracy: 100.00%
Depth: 3, Width: 100, Activation: relu, Train Accuracy: 100.00%, Test Accuracy: 100.00%
Depth: 5, Width: 5, Activation: tanh, Train Accuracy: 99.89%, Test Accuracy: 99.60%
Depth: 5, Width: 5, Activation: relu, Train Accurac