## Import necessary libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from tqdm import tqdm

# Basic Operations

In [2]:
# Create tensors
a = torch.tensor([1, 2, 3, 4])
b = torch.tensor([5, 6, 7, 8])

In [3]:
# Basic operations
print("Sum:", torch.sum(a))  # Sum all elements
print("Mean:", torch.mean(a.float()))  # Mean (convert to float for division)
print("Element-wise addition:", a + b)
print("Element-wise multiplication:", a * b)

Sum: tensor(10)
Mean: tensor(2.5000)
Element-wise addition: tensor([ 6,  8, 10, 12])
Element-wise multiplication: tensor([ 5, 12, 21, 32])


In [4]:
# Matrix operations
matrix1 = torch.tensor([[1, 2], [3, 4]])
matrix2 = torch.tensor([[5, 6], [7, 8]])
print("Matrix multiplication:", torch.mm(matrix1, matrix2))
print("Matrix multiplication (similar):", matrix1 @ matrix2)
print("Hadamard product (element-wise multiplication):", matrix1 * matrix2)

Matrix multiplication: tensor([[19, 22],
        [43, 50]])
Matrix multiplication (similar): tensor([[19, 22],
        [43, 50]])
Hadamard product (element-wise multiplication): tensor([[ 5, 12],
        [21, 32]])


In [5]:
# Reshaping
c = torch.tensor([1, 2, 3, 4, 5, 6])
print("Reshape:", c.reshape(2, 3))
print("View:", c.view(3, 2))  # View is similar to reshape but shares memory

Reshape: tensor([[1, 2, 3],
        [4, 5, 6]])
View: tensor([[1, 2],
        [3, 4],
        [5, 6]])


# Initialize a Simple Neural Network Model

In [6]:

class SimpleNLP(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(SimpleNLP, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # Sequential layers
        self.layer1 = nn.Linear(embedding_dim, hidden_dim)
        self.activation1 = nn.ReLU()
        self.layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.activation2 = nn.ReLU()
        self.layer3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x shape: (batch_size, seq_len)
        
        # Embed the input
        # Output shape: (batch_size, seq_len, embedding_dim)
        embedded = self.embedding(x)
        
        # Average over sequence length (simple pooling)
        # Output shape: (batch_size, embedding_dim)
        pooled = torch.mean(embedded, dim=1)
        
        # Forward pass through layers
        x = self.layer1(pooled)
        x = self.activation1(x)
        x = self.layer2(x)
        x = self.activation2(x)
        x = self.layer3(x)
                
        return x

## How to train

In [7]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Model parameters
vocab_size = 10000
embedding_dim = 128
hidden_dim = 256
output_dim = 2  # e.g., binary classification
batch_size = 32
seq_len = 50

In [8]:
# Create random input data (simulating tokenized text)
x = torch.randint(0, vocab_size, (batch_size, seq_len))  # Random token ids
y = torch.randint(0, output_dim, (batch_size,))  # Random labels

In [9]:
print(x, y)

tensor([[7542, 6067, 6876,  ..., 5294, 5693, 1677],
        [5070, 7709, 2370,  ..., 4339, 3861, 9564],
        [4270, 5553,  137,  ..., 1618, 9168,  407],
        ...,
        [7071, 9164, 7767,  ..., 6551, 1778,   38],
        [3266, 9237, 8229,  ..., 2014, 7532, 3783],
        [ 436, 9500, 7246,  ..., 7652, 4695, 1316]]) tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,
        1, 0, 0, 0, 0, 1, 0, 0])


In [10]:
model = SimpleNLP(vocab_size, embedding_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()  # Common for classification tasks
optimizer = optim.Adam(model.parameters(), lr=0.001)

one forward pass

In [11]:
outputs = model(x)
print(outputs.shape)

torch.Size([32, 2])


In [12]:
loss = criterion(outputs, y)  # calculate loss

In [13]:
print(loss)

tensor(0.6897, grad_fn=<NllLossBackward0>)


In [14]:
loss.backward()  # calculate gradients (backpropagation)
optimizer.step()  # update weights
optimizer.zero_grad()  # clear gradients

In [15]:
outputs = model(x)
loss = criterion(outputs, y)  # calculate loss

In [16]:
loss  # it's decreasing

tensor(0.6780, grad_fn=<NllLossBackward0>)

We do it multiple times

In [17]:
num_epochs = 40
for epoch in tqdm(range(num_epochs)):
    # Forward pass
    outputs = model(x)
    
    # Compute loss
    loss = criterion(outputs, y)
    
    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

 35%|███▌      | 14/40 [00:00<00:00, 133.42it/s]

Epoch [1/40], Loss: 0.6780
Epoch [2/40], Loss: 0.6669
Epoch [3/40], Loss: 0.6554
Epoch [4/40], Loss: 0.6424
Epoch [5/40], Loss: 0.6276
Epoch [6/40], Loss: 0.6101
Epoch [7/40], Loss: 0.5897
Epoch [8/40], Loss: 0.5661
Epoch [9/40], Loss: 0.5389
Epoch [10/40], Loss: 0.5083
Epoch [11/40], Loss: 0.4742
Epoch [12/40], Loss: 0.4369
Epoch [13/40], Loss: 0.3969
Epoch [14/40], Loss: 0.3551
Epoch [15/40], Loss: 0.3122
Epoch [16/40], Loss: 0.2693
Epoch [17/40], Loss: 0.2276
Epoch [18/40], Loss: 0.1883
Epoch [19/40], Loss: 0.1523
Epoch [20/40], Loss: 0.1204
Epoch [21/40], Loss: 0.0930
Epoch [22/40], Loss: 0.0702
Epoch [23/40], Loss: 0.0520
Epoch [24/40], Loss: 0.0379
Epoch [25/40], Loss: 0.0272
Epoch [26/40], Loss: 0.0194
Epoch [27/40], Loss: 0.0137


100%|██████████| 40/40 [00:00<00:00, 134.15it/s]

Epoch [28/40], Loss: 0.0097
Epoch [29/40], Loss: 0.0069
Epoch [30/40], Loss: 0.0049
Epoch [31/40], Loss: 0.0035
Epoch [32/40], Loss: 0.0025
Epoch [33/40], Loss: 0.0019
Epoch [34/40], Loss: 0.0014
Epoch [35/40], Loss: 0.0010
Epoch [36/40], Loss: 0.0008
Epoch [37/40], Loss: 0.0006
Epoch [38/40], Loss: 0.0005
Epoch [39/40], Loss: 0.0004
Epoch [40/40], Loss: 0.0003





Inference example

In [18]:
model.eval()

# So we don't calculate gradients which saves memory and computations
with torch.no_grad():
    # Sample input: (batch_size=2, seq_len=50)
    test_input = torch.randint(0, vocab_size, (2, seq_len))
    predictions = model(test_input)
    print("\nExample predictions for 2 sequences:")
    print(predictions)
    print("Predicted classes:", torch.argmax(predictions, dim=1))

## Model architecture summary
print("\nModel Architecture:")
print(model)

# Shape summary
print("\nInput shape:", x.shape)  # (batch_size, seq_len)
print("Output shape:", outputs.shape)  # (batch_size, output_dim)


Example predictions for 2 sequences:
tensor([[-2.7261,  2.7143],
        [ 0.4964, -0.5536]])
Predicted classes: tensor([1, 0])

Model Architecture:
SimpleNLP(
  (embedding): Embedding(10000, 128)
  (layer1): Linear(in_features=128, out_features=256, bias=True)
  (activation1): ReLU()
  (layer2): Linear(in_features=256, out_features=256, bias=True)
  (activation2): ReLU()
  (layer3): Linear(in_features=256, out_features=2, bias=True)
)

Input shape: torch.Size([32, 50])
Output shape: torch.Size([32, 2])
