## 1. Import Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import numpy as np

In [None]:
# Assignment marks: training set
train_samples_np = np.array([78, 100, 52, 89, 92, 87, 65, 40, 78, 82, 64, 78, 98, 86, 72, 81, 94, 92, 51, 71])
train_labels_np = np.array([  1,   1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  1,  0,  1,  1,  1,  0,  0])

# Assignment marks: testing set
test_samples_np = np.array([75, 68, 99, 82, 71, 70, 68, 84, 87, 72, 61, 92, 93, 54, 63, 45, 74, 76, 83, 91])
test_labels_np = np.array([  1,  0,  1,  1,  0,  0,  0,  1,  1,  0,  0,  1,  1,  0,  0,  0,  0,  1,  1,  1])

In [None]:
# # print(train_samples_np)
# for grade in train_samples_np:
#     if grade >= 75:
#         print(f'{grade}:1')
#     else:
#         print(f'{grade}:0')
        
#     # print(grade)

## 2. Define the Data loader

In [None]:
class SimpleDataset(Dataset):
    """
    A simple dataset for the assignment marks example.
    """
    def __init__(self, samples, labels):
        self.samples = torch.tensor(samples, dtype = torch.float32) # Convert samples to float32 tensor and assign to self.samples
        self.labels = torch.tensor(labels, dtype = torch.long) # Convert labels to long tensor and assign to self.labels
        self.n_samples = len(self.samples) # Store the number of samples in self.n_sample

    def __len__(self):
        # Return the total number of samples
        return self.n_samples

    def __getitem__(self, index):
        # Return the sample and label at the given index
        return self.samples[index], self.labels[index]


# loader = SimpleDataset(train_samples_np, train_labels_np)
# print(loader.n_samples)


In [None]:
# Rescale the samples to have a mean of 0 and a variance of 1
scaler = StandardScaler()  # Initialize the scaler
train_samples_scaled = scaler.fit_transform(train_samples_np.reshape(-1,1))  # Fit the scaler on train_samples_np and transform
test_samples_scaled = scaler.fit_transform(test_samples_np.reshape(-1,1))   # Transform test_samples_np using the same scaler

# Create PyTorch Datasets
train_dataset = SimpleDataset(train_samples_scaled, train_labels_np)  # Create SimpleDataset with train_samples_scaled and train_labels_np
test_dataset = SimpleDataset(test_samples_scaled, test_labels_np)  # Create SimpleDataset with test_samples_scaled and test_labels_np

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=6, shuffle=True) # Create DataLoader with train_dataset, batch_size=6, shuffle=True
test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False)   # Create DataLoader with test_dataset, batch_size=5, shuffle=False

## 4. Define the PyTorch Model

In [None]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.hidden = nn.Linear(input_size, hidden_size)   # Define a Linear layer from input_size to hidden_size
        self.sigmoid = nn.Sigmoid()  # Define a Sigmoid activation
        self.output = nn.Linear(hidden_size, output_size)    # Define a Linear layer from hidden_size to output_size
        self.softmax = nn.Softmax(dim=1)  # Define a Softmax activation along dimension 1

    def forward(self, x):
        x = self.hidden(x)  # Pass x through the hidden layer
        x = self.sigmoid(x)  # Apply the sigmoid activation
        x = self.output(x)  # Pass through the output layer
        x = self.softmax(x)  # Apply softmax to get output probabilities
        return x

# Instantiate the model
input_size = 1
hidden_size = 4
output_size = 2 # Two output classes
model = SimpleNN(input_size, hidden_size, output_size)


## 5. Define Loss Function and Optimizer

In [None]:
criterion = nn.CrossEntropyLoss()  # Define a loss function suitable for multi-class classification (e.g., CrossEntropyLoss)
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Define an optimizer (e.g., SGD or Adam) with model parameters and learning rate

## 6. Train the Model

In [None]:
num_epochs = 1000  # Set the number of training epochs

for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        #print(inputs[0], labels[0])
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass through the model to get outputs
        outputs = model(inputs)

        
        # Compute the loss using criterion
        loss = criterion(outputs, labels)
        
        # Backward pass (loss.backward)
        loss.backward()
        optimizer.step()


    if (epoch + 1) % 50 == 0:
        # Print epoch number and current loss
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Finished Training")

## 7. Evaluate the Model

In [None]:
# Set the model to evaluation mode
model.eval()
all_predicted_labels = []
all_test_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs) # Perform a forward pass to get outputs

        _, predicted = torch.max(outputs, 1)
        all_predicted_labels.extend(predicted.numpy())
        all_test_labels.extend(labels.numpy())

predicted_labels_np = np.array(all_predicted_labels)  # Convert all_predicted_labels to a NumPy array
test_labels_np =  np.array(all_test_labels)       # Convert all_test_labels to a NumPy array

# Print predicted and true labels
print("Predicted labels on testing set:", predicted_labels_np)
print("True labels on testing set:", test_labels_np)

# Compute prediction error as a percentage
prediction_error_test = np.sum(np.abs(predicted_labels_np - test_labels_np)/len(test_labels_np))*100 # Compute the average absolute error percentage
print("Prediction error on testing set:", prediction_error_test)