In [2]:
import minitorch
import minitorch.nn as nn


class Model(nn.Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.layer1 = nn.Linear(in_feat, 5)
        self.layer2 = nn.Linear(5, out_feat)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x
    
model = Model(10, 1)


out = model(minitorch.Tensor([[1.0] * 10]))

print(out)
print(model.layer1.weight.shape)

Tensor(data=[[0.3282618]], grad=[[0.]])
(10, 5)


In [5]:
from minitorch.nn import Linear, Module
from minitorch import Tensor

class Model(Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.layer1 = Linear(in_feat, 5)
        self.layer2 = Linear(5, out_feat)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x

x = Tensor([[1.0 for _ in range(10)]])
model = Model(10, 1)
out = model(x)

print("=" * 40)
print("Output:\n", out)
print("=" * 40)

print("Model Parameters:\n")
for i, param in enumerate(model.parameters(), 1):
    print(f"Parameter {i}:\n{param}\n")

model.zero_grad()

print("=" * 40)
print("Gradients after zero_grad():\n")
for i, param in enumerate(model.parameters(), 1):
    print(f"Parameter {i} Gradient:\n{param.grad}\n")
print("=" * 40)


Output:
 Tensor(data=[[0.10897517]], grad=[[0.]])
Model Parameters:

Parameter 1:
Tensor(data=[[ 0.19572176  0.10417269 -0.21652951  0.04283364  0.05585226]
 [-0.10223094  0.11770233 -0.23227375  0.26042445 -0.00633591]
 [-0.05784652 -0.18938134  0.23439864 -0.21872884 -0.27427402]
 [-0.02559256  0.06872185 -0.05929142  0.18299563 -0.26200945]
 [-0.26936871 -0.02597648 -0.13583023  0.24176125  0.0061035 ]
 [-0.29610952  0.20418077  0.05190937  0.05173173 -0.15609623]
 [-0.23919618 -0.23654647  0.27395849  0.24394757  0.14324981]
 [-0.25049732  0.11876332  0.13352296  0.31265075  0.0199595 ]
 [ 0.27038641  0.02763306  0.05718915 -0.27815406 -0.27721306]
 [-0.24326266  0.02555529 -0.04849946  0.27085659 -0.29200439]], grad=[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]])

Parameter 2:
Tensor(data=[ 0.12121884  0.30386682  0.27645743 -0.00312291 -0.15306018]