In [1]:
import torch

In [2]:
print(torch.__version__)

2.1.0+cu121


In [41]:
dtype = torch.float
# device  = torch.device("cpu")
device = torch.device("cuda:0") # For GPU

# Data Generation

In [42]:
N, D_in, H, D_out = 64, 1000, 100, 10 # batch_size, input_dimension, hidden_dimension, output_dimension

In [43]:
x = torch.randn(N, D_in, device = device, dtype = dtype)
y = torch.randn(N, D_out, device = device, dtype = dtype)

# Initialization

In [17]:
w1 = torch.randn(D_in, H, device = device, dtype = dtype)
w2 = torch.randn(H, D_out, device = device, dtype = dtype)

In [14]:
learning_rate = 1e-6

# Neural Netowork in Low-level

In [18]:
for i in range(1000):
    h = x.mm(w1)
    h_relu = h.clamp(min = 0)
    y_pred = h_relu.mm(w2)

    loss = (y_pred - y).pow(2).mean()
    if i % 100 == 99:
        print(f"Epoch: {i}, Train_loss: {loss}")

    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

Epoch: 99, Train_loss: 0.4643843173980713
Epoch: 199, Train_loss: 0.000989199266768992
Epoch: 299, Train_loss: 4.52483982371632e-06
Epoch: 399, Train_loss: 1.7088237314055732e-07
Epoch: 499, Train_loss: 4.0151668656562833e-08
Epoch: 599, Train_loss: 1.8395201806242767e-08
Epoch: 699, Train_loss: 1.1483979101001296e-08
Epoch: 799, Train_loss: 8.430783182689083e-09
Epoch: 899, Train_loss: 6.5176295471758294e-09
Epoch: 999, Train_loss: 5.205746056446969e-09


# Neural Netowork using autograd

In [44]:
w1 = torch.randn(D_in, H, device = device, dtype = dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device = device, dtype = dtype, requires_grad = True)

In [20]:
for i in range(1000):
    y_pred = x.mm(w1).clamp(min = 0).mm(w2)

    loss = (y_pred - y).pow(2).mean()
    if i % 100 == 99:
        print(f"Epoch: {i}, Train_loss: {loss}")

    loss.backward() # This call will compute the gradient of loss with repect to all Tensors with requires_grad=True.

    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        w1.grad.zero_()
        w2.grad.zero_()

Epoch: 99, Train_loss: 39147.33984375
Epoch: 199, Train_loss: 33573.3671875
Epoch: 299, Train_loss: 29918.037109375
Epoch: 399, Train_loss: 27298.453125
Epoch: 499, Train_loss: 25265.076171875
Epoch: 599, Train_loss: 23581.677734375
Epoch: 699, Train_loss: 22124.16015625
Epoch: 799, Train_loss: 20826.115234375
Epoch: 899, Train_loss: 19648.712890625
Epoch: 999, Train_loss: 18569.271484375


# Defining new autograd functions

In [26]:
class MyReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.clamp(min = 0)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

In [27]:
for i in range(1000):
    relu = MyReLU.apply

    y_pred = relu(x.mm(w1)).mm(w2)

    loss = (y_pred - y).pow(2).mean()
    if i % 100 == 99:
        print(f"Epoch: {i}, Train_loss: {loss}")

    loss.backward() # This call will compute the gradient of loss with repect to all Tensors with requires_grad=True.

    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        w1.grad.zero_()
        w2.grad.zero_()

Epoch: 99, Train_loss: 17567.919921875
Epoch: 199, Train_loss: 16644.458984375
Epoch: 299, Train_loss: 15785.390625
Epoch: 399, Train_loss: 14985.8662109375
Epoch: 499, Train_loss: 14239.9345703125
Epoch: 599, Train_loss: 13542.548828125
Epoch: 699, Train_loss: 12890.021484375
Epoch: 799, Train_loss: 12278.498046875
Epoch: 899, Train_loss: 11704.5908203125
Epoch: 999, Train_loss: 11165.5224609375


# Sequential API

In [45]:
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

In [49]:
loss_fn = torch.nn.MSELoss(reduction = "mean")
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [50]:
model = model.to(device)

In [51]:
for i in range(1000):
    y_pred = model(x)

    loss = loss_fn(y_pred, y)
    if i % 100 == 99:
        print(f"Epoch: {i}, Train_loss: {loss.item()}")

    loss.backward() # This call will compute the gradient of loss with repect to all Tensors with requires_grad=True.

    optimizer.step()

    optimizer.zero_grad()

Epoch: 99, Train_loss: 1.0978702306747437
Epoch: 199, Train_loss: 1.0704118013381958
Epoch: 299, Train_loss: 1.0436511039733887
Epoch: 399, Train_loss: 1.0175504684448242
Epoch: 499, Train_loss: 0.9921068549156189
Epoch: 599, Train_loss: 0.9673351645469666
Epoch: 699, Train_loss: 0.9432883262634277
Epoch: 799, Train_loss: 0.9198999404907227
Epoch: 899, Train_loss: 0.8971487283706665
Epoch: 999, Train_loss: 0.8751077055931091


# Cutomize nn Modules using Subclassing API

In [52]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear1_act = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        x = self.linear1(x)
        x_act = self.linear1_act(x)
        y_pred = self.linear2(x_act)
        return y_pred

In [54]:
model = TwoLayerNet(D_in, H, D_out)

In [56]:
model = model.to(device)

In [59]:
loss_fn = torch.nn.MSELoss(reduction = "mean")
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [60]:
for i in range(1000):
    y_pred = model(x)

    loss = loss_fn(y_pred, y)
    if i % 100 == 99:
        print(f"Epoch: {i}, Train_loss: {loss.item()}")

    loss.backward() # This call will compute the gradient of loss with repect to all Tensors with requires_grad=True.

    optimizer.step()

    optimizer.zero_grad()

Epoch: 99, Train_loss: 1.1699904203414917
Epoch: 199, Train_loss: 1.169812560081482
Epoch: 299, Train_loss: 1.1695799827575684
Epoch: 399, Train_loss: 1.1692966222763062
Epoch: 499, Train_loss: 1.1689658164978027
Epoch: 599, Train_loss: 1.1685903072357178
Epoch: 699, Train_loss: 1.1681727170944214
Epoch: 799, Train_loss: 1.1677093505859375
Epoch: 899, Train_loss: 1.167202115058899
Epoch: 999, Train_loss: 1.1666496992111206
