# Introduction to Neural Networks: Perceptron and Multi-Layer Perceptron

In this tutorial, we will learn how to use Perceptron and Multi-Layer Perceptron (MLP) neural networks to solve simple classification tasks. We will walk through the necessary steps to implement and train these networks using PyTorch.

## Index
1. Installs, Packages, and Auxiliary Functions
2. Preparing for the Training
    - Training Parameters
    - Define the Task (Sample Dataset)
    - Define the Network
    - Define the Algorithm to Train the Network
    - Save Config
3. Supervised Training of the Network
4. Run the Trained Network (and Save the Behavioral Data)
5. Network Analysis
    - Behavioral Analysis
    - General Neural Analysis
    - Stimulus and Choice Decoding from Network Activity

## 1. Installs, Packages, and Auxiliary Functions

In [None]:
# Install necessary packages
!pip install torch numpy matplotlib

In [None]:
# Import required packages
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

## 2. Preparing for the Training

### a. Training Parameters
First, we will set up the parameters that will be used throughout the training process, such as learning rates, batch sizes, and the number of epochs.

In [None]:
# Training parameters
learning_rate = 0.01
num_epochs = 1000
hidden_size = 2
input_size = 2
output_size = 1

### b. Define the Task (Sample Dataset)
We will define two tasks: a linearly separable task and the XOR task. These tasks will be used to train and evaluate our Perceptron and MLP models.

In [None]:
def generate_linear_data(N=100, D=2):
    X = np.random.randn(N, D)
    X[:N//2, :] += 1
    X[N//2:, :] -= 1
    Y = np.concatenate((np.zeros(N//2), np.ones(N//2)))
    X = torch.tensor(X, dtype=torch.float32)
    Y = torch.tensor(Y, dtype=torch.float32)
    return X, Y

def generate_xor_data():
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    Y = np.array([0, 1, 1, 0], dtype=np.float32)
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y

### c. Define the Network
Next, we define our Perceptron and MLP models.

In [None]:
class Perceptron(nn.Module):
    def __init__(self, input_size, output_size):
        super(Perceptron, self).__init__()
        self.W = nn.Parameter(torch.randn(output_size, input_size) * 0.01)
        self.b = nn.Parameter(torch.zeros(output_size))

    def forward(self, x):
        x = torch.matmul(x, self.W.T) + self.b
        return x

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.W_ih = nn.Parameter(torch.randn(hidden_size, input_size) * 0.01)
        self.b_ih = nn.Parameter(torch.zeros(hidden_size))
        self.W_ho = nn.Parameter(torch.randn(output_size, hidden_size) * 0.01)
        self.b_ho = nn.Parameter(torch.zeros(output_size))

    def forward(self, x):
        h = torch.relu(torch.matmul(x, self.W_ih.T) + self.b_ih)
        x = torch.matmul(h, self.W_ho.T) + self.b_ho
        return x

### d. Define the Algorithm to Train the Network
We will create functions to train and evaluate our networks.

In [None]:
def train_model(model, criterion, optimizer, X, Y, num_epochs=1000, print_interval=100):
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs.squeeze(), Y.view(-1, 1))
        loss.backward()
        optimizer.step()
        if (epoch + 1) % print_interval == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

def evaluate_model(model, X):
    with torch.no_grad():
        outputs = model(X)
        predicted = (torch.sigmoid(outputs.squeeze()) > 0.5).float()
    return predicted

### e. Save Config
You can save your configuration settings for reproducibility.

In [None]:
config = {
    'learning_rate': learning_rate,
    'num_epochs': num_epochs,
    'hidden_size': hidden_size,
    'input_size': input_size,
    'output_size': output_size
}

## 3. Supervised Training of the Network
We will train our Perceptron and MLP on the defined tasks.

In [None]:
# Training Perceptron on linear data
X_linear, Y_linear = generate_linear_data()
model_perceptron = Perceptron(input_size, output_size)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model_perceptron.parameters(), lr=learning_rate)
train_model(model_perceptron, criterion, optimizer, X_linear, Y_linear)

In [None]:
# Training MLP on XOR data
X_xor, Y_xor = generate_xor_data()
model_mlp = MLP(input_size, hidden_size, output_size)
optimizer = optim.SGD(model_mlp.parameters(), lr=learning_rate)
train_model(model_mlp, criterion, optimizer, X_xor, Y_xor, num_epochs=10000)

## 4. Run the Trained Network (and Save the Behavioral Data)
Run the trained models on test data and save the predictions.

In [None]:
# Evaluate Perceptron
predictions_perceptron = evaluate_model(model_perceptron, X_linear)
print("Perceptron Predictions on Linear Data:", predictions_perceptron)

In [None]:
# Evaluate MLP
predictions_mlp = evaluate_model(model_mlp, X_xor)
print("MLP Predictions on XOR Data:", predictions_mlp)

## 5. Network Analysis

### a. Behavioral Analysis
Analyze the accuracy of the models.

In [None]:
def accuracy(predictions, labels):
    return (predictions == labels).float().mean()

acc_perceptron = accuracy(predictions_perceptron, Y_linear)
acc_mlp = accuracy(predictions_mlp, Y_xor)
print(f'Perceptron Accuracy on Linear Data: {acc_perceptron * 100:.2f}%')
print(f'MLP Accuracy on XOR Data: {acc_mlp * 100:.2f}%')

### b. General Neural Analysis
Plot the decision boundaries to visualize how the models are classifying the data.

In [None]:
def plot_decision_boundary(model, X, Y, title):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                         np.arange(y_min, y_max, 0.01))
    grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)
    with torch.no_grad():
        Z = model(grid)
        Z = torch.sigmoid(Z).numpy()
        Z = (Z > 0.5).astype(int)
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.8)
    plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', marker='o')
    plt.title(title)
    plt.show()

plot_decision_boundary(model_perceptron, X_linear, Y_linear, 'Perceptron - Linear Data')
plot_decision_boundary(model_mlp, X_xor, Y_xor, 'MLP - XOR Data')