In [1]:
import minitorch
import minitorch.nn as nn
from minitorch.nn import Linear, Module
from minitorch import Tensor
from minitorch.nn.parameter import Parameter

class MiniTorchModel(Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.layer1 = Linear(in_feat, 3)
        self.layer2 = Linear(3, out_feat)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x

minitorch_model = MiniTorchModel(2, 1)
print(minitorch_model)

print('-'*50)


input_tensor = Tensor([[1.0, 1.0]])

out_minitorch = minitorch_model(input_tensor)

print("MiniTorch Model Output:")
print(out_minitorch)

print("MiniTorch Model Parameters:")
for param in minitorch_model.parameters():
    print(param)

print('-' * 50)

# Test Tensor
t = Tensor([[1.0, 1.0]])
print(t)

p = Parameter(Tensor([[1.0, 1.0]]))
print(p)

MiniTorchModel(
  (layer1): Linear(in_features=2, out_features=3, bias=True),
  (layer2): Linear(in_features=3, out_features=1, bias=True),
)
--------------------------------------------------
MiniTorch Model Output:
tensor([[0.5692]], requires_grad=True)
MiniTorch Model Parameters:
Parameter containing:
tensor([[ 0.0624 -0.5109  0.3155]
 [ 0.6344  0.3436 -0.4453]], requires_grad=True)
Parameter containing:
tensor([-0.0068  0.2181 -0.1573], requires_grad=True)
Parameter containing:
tensor([[ 0.0887]
 [-0.3915]
 [-0.5667]], requires_grad=True)
Parameter containing:
tensor([0.3652], requires_grad=True)
--------------------------------------------------
tensor([[1. 1.]], requires_grad=True)
Parameter containing:
tensor([[1. 1.]], requires_grad=True)


In [2]:
from minitorch.nn import Linear, Module
from minitorch import Tensor
from minitorch.nn.parameter import Parameter

class MiniTorchModel(Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.layer1 = Linear(in_feat, 3)
        self.layer2 = Linear(3, out_feat)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x

minitorch_model = MiniTorchModel(2, 1)
print(minitorch_model)

print(minitorch_model.layer1.weight)
print(minitorch_model.layer1.bias)

print(minitorch_model.layer2.weight)
print(minitorch_model.layer2.bias)


MiniTorchModel(
  (layer1): Linear(in_features=2, out_features=3, bias=True),
  (layer2): Linear(in_features=3, out_features=1, bias=True),
)
Parameter containing:
tensor([[ 0.4798  0.5338  0.328 ]
 [-0.5145  0.1215 -0.2622]], requires_grad=True)
Parameter containing:
tensor([ 0.2228 -0.2896 -0.3408], requires_grad=True)
Parameter containing:
tensor([[-0.3202]
 [-0.1298]
 [-0.106 ]], requires_grad=True)
Parameter containing:
tensor([-0.2885], requires_grad=True)


In [3]:
print(minitorch_model.parameters())

print('-' * 50)

print(minitorch_model.parameters)
print(minitorch_model.zero_grad())

print(minitorch_model.layer1.weight.grad)

[Parameter containing:
tensor([[ 0.4798  0.5338  0.328 ]
 [-0.5145  0.1215 -0.2622]], requires_grad=True), Parameter containing:
tensor([ 0.2228 -0.2896 -0.3408], requires_grad=True), Parameter containing:
tensor([[-0.3202]
 [-0.1298]
 [-0.106 ]], requires_grad=True), Parameter containing:
tensor([-0.2885], requires_grad=True)]
--------------------------------------------------
<bound method Module.parameters of MiniTorchModel(
  (layer1): Linear(in_features=2, out_features=3, bias=True),
  (layer2): Linear(in_features=3, out_features=1, bias=True),
)>
None
tensor(0.0)


In [4]:
import torch

class TorchModel(torch.nn.Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.layer1 = torch.nn.Linear(in_feat, 3)
        self.layer2 = torch.nn.Linear(3, out_feat)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x

torch_model = TorchModel(2, 1)
out_torch = torch_model(torch.tensor([[1.0] * 2]))


print(torch_model)
print('-' * 50)


print("Torch Model Output:")
print(out_torch)

print("Torch Model Parameters:")
for param in torch_model.parameters():
    print(param)

print('-'*50)

t = torch.tensor([[1.0] * 2])
print(t)
print(t.grad)
print(t.requires_grad)

z = torch.tensor([[1.0] * 2], requires_grad=True)
print(z)

p = torch.nn.Parameter(1.0)
print(p)

TorchModel(
  (layer1): Linear(in_features=2, out_features=3, bias=True)
  (layer2): Linear(in_features=3, out_features=1, bias=True)
)
--------------------------------------------------
Torch Model Output:
tensor([[-0.3047]], grad_fn=<AddmmBackward0>)
Torch Model Parameters:
Parameter containing:
tensor([[ 0.6296,  0.5635],
        [-0.6932, -0.4749],
        [ 0.0302, -0.0009]], requires_grad=True)
Parameter containing:
tensor([ 0.1758,  0.5085, -0.4160], requires_grad=True)
Parameter containing:
tensor([[-0.1787,  0.1831,  0.0106]], requires_grad=True)
Parameter containing:
tensor([0.0648], requires_grad=True)
--------------------------------------------------
tensor([[1., 1.]])
None
False
tensor([[1., 1.]], requires_grad=True)


AttributeError: 'float' object has no attribute 'detach'

In [None]:
from minitorch.nn import Parameter

x = Tensor([[1.0, 2.0]], requires_grad=False)
w = Parameter(Tensor([[3.0], [4.0]]))  # shape: (2,1)
b = Parameter(Tensor([0.1]))  # shape: (1,)

y = x @ w + b
print("Forward output:", y)

y.backward()
print("Gradient of w:\n", w.grad)
print("Gradient of b:\n", b.grad)


Forward output: tensor([[11.1]], requires_grad=True)
Gradient of w:
 [[1.]
 [2.]]
Gradient of b:
 [1.]


In [None]:
from minitorch.nn.parameter import Parameter
from minitorch.optim import SGD


# Dummy example
w = Parameter(1.0)  # Initial value
w.grad = 0.1        # gradient

opt = SGD([w], lr=0.01)
opt.step()
print(w.data)  # Should be 1.0 - 0.01 * 0.1 = 0.999


0.999


In [5]:
from minitorch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, xs, ys):
        self.xs = xs
        self.ys = ys

    def __getitem__(self, idx):
        return self.xs[idx], self.ys[idx]

    def __len__(self):
        return len(self.xs)

# Dummy data
xs = [i for i in range(10)]
ys = [2 * i + 1 for i in range(10)]


dataset = MyDataset(xs, ys)
print("Dataset length:", len(dataset))
print(dataset)

loader = DataLoader(dataset, batch_size=3, shuffle=True)
print("DataLoader length:", len(loader))
print(loader)

for xb, yb in loader:
    print("Batch x:", list(xb))
    print("Batch y:", list(yb))


Dataset length: 10
Dataset(10)
DataLoader length: 4
DataLoader(batch_size=3, shuffle=True, dataset_size=10)
Batch x: [7, 9, 2]
Batch y: [15, 19, 5]
Batch x: [3, 8, 5]
Batch y: [7, 17, 11]
Batch x: [4, 0, 6]
Batch y: [9, 1, 13]
Batch x: [1]
Batch y: [3]


In [7]:
from minitorch.nn.parameter import Parameter
from minitorch.engine import Tensor
from minitorch.optim import SGD

# Single-variable linear regression: y = wx + b
w = Parameter(Tensor([0.0]))
b = Parameter(Tensor([0.0]))

def predict(x):
    return w.data * x + b.data

def compute_loss(y_pred, y_true):
    return (y_pred - y_true) ** 2

optimizer = SGD([w, b], lr=0.01)

for epoch in range(10):
    total_loss = 0.0
    for x_batch, y_batch in loader:
        for x, y in zip(x_batch, y_batch):
            x = Tensor(x)
            y = Tensor(y)
            y_pred = predict(x)
            loss = compute_loss(y_pred, y)

            # Manual "backward"
            grad_y_pred = 2.0 * (y_pred - y)
            w.grad = grad_y_pred * x
            b.grad = grad_y_pred * 1.0

            optimizer.step()
            optimizer.zero_grad()

            total_loss += loss.item()
    total_loss /= len(loader)

    print(f"Epoch {epoch+1}: Loss = {total_loss.item():.4f}")


Epoch 1: Loss = 109.7702
Epoch 2: Loss = 0.6510
Epoch 3: Loss = 0.5246
Epoch 4: Loss = 0.4940
Epoch 5: Loss = 0.4356
Epoch 6: Loss = 0.4369
Epoch 7: Loss = 0.3475
Epoch 8: Loss = 0.3470
Epoch 9: Loss = 0.2195
Epoch 10: Loss = 0.1942
