<a href="https://colab.research.google.com/github/arutraj/ML_Basics/blob/main/Solution_DL_Data_Structures_and_Frameworks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Question 1: Design the neural network

In [10]:
import torch
import torch.nn as nn

# Step 1: Layer Dimensions
input_dim = 5
hidden_dim = 10
output_dim = 3

# Step 2: Activation Function
def activation_function(x):
    return torch.relu(x)  # Using ReLU activation function

# Step 3: Neural Network Architecture
class ThreeLayerNN(nn.Module):
    def __init__(self):
        super(ThreeLayerNN, self).__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.hidden_layer = nn.Linear(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        hidden_output = activation_function(self.input_layer(x))
        hidden_output = activation_function(self.hidden_layer(hidden_output))
        output = self.output_layer(hidden_output)
        return output

# Step 4: Initialization and Testing
x = torch.randn(1, input_dim)
model = ThreeLayerNN()
output = model(x)
print("Output tensor:", output)


Output tensor: tensor([[-0.2675, -0.1715,  0.3112]], grad_fn=<AddmmBackward0>)


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Question 2: Autograd

In [11]:
import torch

# Define the real function f(x) = (x^4 - 3x^3 + 2x^2 - 5x + 7) / (2x^3 + 5x^2 - x + 3)
def real_function(x):
    result = (x**4 - 3*x**3 + 2*x**2 - 5*x + 7) / (2*x**3 + 5*x**2 - x + 3)
    return result

# Define a real number for which to compute the gradient
x = torch.tensor(1.0, requires_grad=True, dtype=torch.float32)

# Compute the real function f(x)
result = real_function(x)

# Compute the gradient of f(x) with respect to x
result.backward()

# Obtain the gradient of x
gradient_x = x.grad

print("Gradient of x:", gradient_x)


Gradient of x: tensor(-1.0370)


Optional Question: Logistic Regression

In [12]:
# Assignment Question 1:
import torch
import torch.nn as nn
import torch.optim as optim

# Step 1: Prepare the data
# Define the number of features (input size)
input_size = 2

# Define the number of classes (output size)
output_size = 2

# Create the training dataset
X_train = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [5.0, 1.0]])
y_train = torch.tensor([0, 0, 1, 1])  # Binary classification labels (0 or 1)

# Step 2: Define the logistic regression model
class LogisticRegressionModel(nn.Module):
    def __init__(self):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

model = LogisticRegressionModel()

# Step 3: Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # CrossEntropyLoss is used for binary classification
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Step 4: Train the model
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Step 5: Test the model
X_test = torch.tensor([[4.0, 3.0]])
predicted = model(X_test)
predicted_class = torch.argmax(predicted).item()
print("Predicted class:", predicted_class)


Epoch [100/1000], Loss: 0.6747
Epoch [200/1000], Loss: 0.6543
Epoch [300/1000], Loss: 0.6419
Epoch [400/1000], Loss: 0.6333
Epoch [500/1000], Loss: 0.6265
Epoch [600/1000], Loss: 0.6206
Epoch [700/1000], Loss: 0.6152
Epoch [800/1000], Loss: 0.6101
Epoch [900/1000], Loss: 0.6052
Epoch [1000/1000], Loss: 0.6006
Predicted class: 1


Optional Question: Attention with Einsum

In [13]:
import torch
import torch.nn.functional as F

def single_head_attention_with_einsum(query, key, value):
    # Calculate the dot product between query and key using einsum
    attention_scores = torch.einsum('bqd,bkd->bqk', query, key)

    # Scale the dot product by dividing it by the square root of the dimension of the key vector
    scaled_attention_scores = attention_scores / (key.size(-1) ** 0.5)

    # Apply softmax to obtain attention weights along the last dimension (key dimension)
    attention_weights = F.softmax(scaled_attention_scores, dim=-1)

    # Compute the weighted sum of value vectors using einsum
    attended_values = torch.einsum('bqk,bvd->bqd', attention_weights, value)

    return attended_values, attention_weights
