In [7]:
import sys
sys.path.append('../')

from minitorch import Tensor
from minitorch.nn import Sequential, Linear
from minitorch.nn import ReLU

import torch
import torch.nn as nn

# set seed 
import numpy as np
np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x268b73d3010>

### Test for minitorch.nn.modules.Sequential


In [8]:
# Build a simple sequential model
model = Sequential(
    Linear(2, 4),
    ReLU(),
    Linear(4, 1)
)

print("Sequential model:")
print(model)

# Test forward pass
x = Tensor([[1.0, 2.0]])
out = model(x)
print("Output:", out)

# Test __getitem__, __setitem__, __delitem__, append, insert, pop
print("First layer:", model[0])
model.append(ReLU())
print("After append:", model)
model.insert(1, Linear(2, 2))
print("After insert:", model)
removed = model.pop()
print("After pop:", model)
model[0] = Linear(2, 4)
print("After setitem:", model)
del model[1]
print("After delitem:", model)


Sequential model:
Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True),
  (1): ReLU(),
  (2): Linear(in_features=4, out_features=1, bias=True),
)
Output: tensor([[-1.0889]], requires_grad=True)
First layer: Linear(in_features=2, out_features=4, bias=True)
After append: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True),
  (1): ReLU(),
  (2): Linear(in_features=4, out_features=1, bias=True),
  (3): ReLU(),
)
After insert: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True),
  (1): Linear(in_features=2, out_features=2, bias=True),
  (2): ReLU(),
  (3): Linear(in_features=4, out_features=1, bias=True),
  (4): ReLU(),
)
After pop: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True),
  (1): Linear(in_features=2, out_features=2, bias=True),
  (2): ReLU(),
  (3): Linear(in_features=4, out_features=1, bias=True),
)
After setitem: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True),
  (1): Linear(in_features=2, out_

In [9]:
# Test parameters
print("Parameters:")
for param in model.parameters():
    print(param)

Parameters:
Parameter containing:
tensor([[ 0.3967, -0.5398],
 [ 0.1979, -0.5044],
 [ 0.6289,  0.0309],
 [-0.1207, -0.333 ]], requires_grad=True)
Parameter containing:
tensor([ 0.3878, -0.062 ,  0.0968, -0.6805], requires_grad=True)
Parameter containing:
tensor([[ 0.068 ,  0.4256, -0.429 , -0.4129]], requires_grad=True)
Parameter containing:
tensor([-0.4798], requires_grad=True)


### Compare with torch.nn.Sequential

In [10]:
# Build a similar sequential model in PyTorch
model_torch = nn.Sequential(
    nn.Linear(2, 4),
    nn.ReLU(),
    nn.Linear(4, 1)
)

print("PyTorch Sequential model:")
print(model_torch)

# Test forward pass
x_torch = torch.tensor([[1.0, 2.0]])
out_torch = model_torch(x_torch)
print("Output:", out_torch)

# Test __getitem__, __setitem__, __delitem__, append, insert, pop
print("First layer:", model_torch[0])
model_torch.append(nn.ReLU())
print("After append:", model_torch)
model_torch.insert(1, nn.Linear(2, 2))
print("After insert:", model_torch)
removed = model_torch.pop(-1)
print("After pop:", model_torch)
model_torch[0] = nn.Linear(2, 4)
print("After setitem:", model_torch)
del model_torch[1]
print("After delitem:", model_torch)



PyTorch Sequential model:
Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): ReLU()
  (2): Linear(in_features=4, out_features=1, bias=True)
)
Output: tensor([[-0.1143]], grad_fn=<AddmmBackward0>)
First layer: Linear(in_features=2, out_features=4, bias=True)
After append: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): ReLU()
  (2): Linear(in_features=4, out_features=1, bias=True)
  (3): ReLU()
)
After insert: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
  (2): ReLU()
  (3): Linear(in_features=4, out_features=1, bias=True)
  (4): ReLU()
)
After pop: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
  (2): ReLU()
  (3): Linear(in_features=4, out_features=1, bias=True)
)
After setitem: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): Linear(in_features=2, out_fea

In [11]:
# Test parameters
print("Parameters:")
for param in model_torch.parameters():
    print(param)

Parameters:
Parameter containing:
tensor([[ 0.5291, -0.1140],
        [ 0.0748,  0.6403],
        [-0.6560, -0.4452],
        [-0.1790, -0.2756]], requires_grad=True)
Parameter containing:
tensor([ 0.6109, -0.4583, -0.3255, -0.4940], requires_grad=True)
Parameter containing:
tensor([[-0.4777, -0.3311, -0.2061,  0.0185]], requires_grad=True)
Parameter containing:
tensor([0.1977], requires_grad=True)


## Train MLP

In [14]:
# MLP using Sequential and training for 10 epochs
from minitorch import Tensor
from minitorch.nn import Sequential, Linear
from minitorch.nn import ReLU, MSELoss
from minitorch.optim import SGD

# Dummy data: y = x1 + 2*x2
X = Tensor([[1.0, 2.0], [2.0, 1.0], [3.0, 4.0], [4.0, 3.0]])
y = Tensor([[5.0], [4.0], [11.0], [10.0]])

# Build MLP model
mlp = Sequential(
    Linear(2, 8),
    ReLU(),
    Linear(8, 1)
)

criterion = MSELoss()
optimizer = SGD(mlp.parameters(), lr=0.01)

print("Initial predictions:", mlp(X))

for epoch in range(100):
    y_pred = mlp(X)
    loss = criterion(y_pred, y)
    if epoch % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print("Final predictions:", mlp(X))

Initial predictions: tensor([[-0.5466],
 [-0.7812],
 [-1.0724],
 [-1.323 ]], requires_grad=True)
Epoch 1, Loss: 81.8946
Epoch 11, Loss: 0.4743
Epoch 21, Loss: 0.3190
Epoch 31, Loss: 0.2240
Epoch 41, Loss: 0.1646
Epoch 51, Loss: 0.1264
Epoch 61, Loss: 0.1008
Epoch 71, Loss: 0.0819
Epoch 81, Loss: 0.0677
Epoch 91, Loss: 0.0565
Final predictions: tensor([[ 5.2614],
 [ 4.2943],
 [10.8556],
 [ 9.8834]], requires_grad=True)
