In [None]:
#Deep Learning Day 1

In [3]:
# Import PyTorch core libraries
import torch            # Main PyTorch library for tensor operations
import torch.nn as nn   # Neural network modules and layers
import torch.optim as optim  # Optimization algorithms like SGD, Adam

In [6]:
# Define a Perceptron model class inheriting from nn.Module
class Perceptron(nn.Module):
    def __init__(self, input_size):
        super(Perceptron, self).__init__()
        # Define a linear layer with 'input_size' inputs and 1 output
        # This layer will learn weights and a bias term
        self.linear = nn.Linear(input_size, 1)

    def forward(self, x):
        # Forward pass: input tensor x passes through linear layer
        out = self.linear(x)
        # Apply sigmoid activation function to squash output between 0 and 1
        # Sigmoid outputs probability-like values suitable for binary classification
        out = torch.sigmoid(out)
        return out


In [4]:
# Prepare training data for the AND logic gate
# Inputs (X) are all combinations of 0 and 1 for two variables
X = torch.tensor([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
], dtype=torch.float32)  # Use float32 for compatibility with PyTorch

# Labels (y) are the expected outputs of AND gate:
# Only [1,1] maps to 1, all others map to 0
y = torch.tensor([
    [0],
    [0],
    [0],
    [1]
], dtype=torch.float32)


In [7]:
# Instantiate the Perceptron model with 2 input features
model = Perceptron(input_size=2)

In [8]:
# Define the loss function:
# Binary Cross Entropy Loss measures the difference between predicted probabilities and true labels
criterion = nn.BCELoss()


In [9]:
# Define the optimizer:
# Stochastic Gradient Descent (SGD) will adjust model weights to minimize loss
# Learning rate (lr) controls step size during weight updates
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [10]:
# Set number of epochs (full passes over the dataset)
epochs = 1000

In [11]:
# Training loop
for epoch in range(epochs):
    model.train()              # Set model to training mode (important for some layers)
    optimizer.zero_grad()      # Clear previously computed gradients to avoid accumulation

    outputs = model(X)         # Forward pass: predict outputs for all inputs in batch
    loss = criterion(outputs, y)  # Calculate how far off predictions are from true labels

    loss.backward()            # Backpropagation: compute gradients of loss w.r.t. weights
    optimizer.step()           # Update weights using gradients and optimizer algorithm

    # Print training loss every 100 epochs to monitor progress
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

Epoch 0, Loss: 0.6083
Epoch 100, Loss: 0.4411
Epoch 200, Loss: 0.3500
Epoch 300, Loss: 0.2927
Epoch 400, Loss: 0.2529
Epoch 500, Loss: 0.2231
Epoch 600, Loss: 0.1999
Epoch 700, Loss: 0.1811
Epoch 800, Loss: 0.1656
Epoch 900, Loss: 0.1525


In [12]:
# After training is complete, evaluate the model on training data
model.eval()  # Set model to evaluation mode (turns off behaviors like dropout)

with torch.no_grad():  # Disable gradient computation since we are only testing
    print("\nTesting results:")
    for xi in X:
        # Unsqueeze adds batch dimension to input tensor (shape: [1, 2]) because model expects batches
        output = model(xi.unsqueeze(0))
        # Convert sigmoid output probability to binary prediction using 0.5 threshold
        prediction = 1 if output.item() >= 0.5 else 0
        # Print the input and predicted output
        print(f"Input: {xi.tolist()} => Predicted output: {prediction}")


Testing results:
Input: [0.0, 0.0] => Predicted output: 0
Input: [0.0, 1.0] => Predicted output: 0
Input: [1.0, 0.0] => Predicted output: 0
Input: [1.0, 1.0] => Predicted output: 1
