# pytorch


In [3]:
## imports, device, seed

import numpy as np
import matplotlib.pyplot as plt
import torch
# from torchviz import make_dot

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(100)

<torch._C.Generator at 0x24d48058b30>

In [5]:
# Model Class
class SimpleRGNet(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.linear = torch.nn.Linear(1,1,bias=True)
    def forward(self, x):
        return self.linear(x)
    

In [6]:
#Model, optimizer and loss initialization
model = SimpleRGNet().to(device)
paramList = list(model.parameters())
stateDict = model.state_dict()
print(paramList)
print(stateDict)

lr = 0.1
optimizer = torch.optim.SGD(model.parameters(),lr=lr)
lossfnc = torch.nn.MSELoss(reduction='mean')

[Parameter containing:
tensor([[-0.7767]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.6317], device='cuda:0', requires_grad=True)]
OrderedDict([('linear.weight', tensor([[-0.7767]], device='cuda:0')), ('linear.bias', tensor([0.6317], device='cuda:0'))])


In [7]:
# DATA PREPARATION
true_w = 2
true_b = 1
N = 100
#data generation
np.random.seed(100)
x = np.random.rand(N,1)
epsilon = 0.1*np.random.randn(N,1)
y = true_w * x + true_b + epsilon
#data split 
idx = np.arange(N)
np.random.shuffle(idx)
idx_train = idx[:int(0.8*N)]
idx_test  = idx[:int(0.8*N):]
x_train, y_train  = x[idx_train],y[idx_train]
x_val, y_val = x[idx_test], y[idx_test]
x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)
x_val_tensor = torch.as_tensor(x_val).float().to(device)
y_val_tensor = torch.as_tensor(y_val).float().to(device)


In [9]:
#training loop
trainLosses = []
valLosses = []
for i in range(1000):
    model.train()
    # FORWARD PASS
    yhat = model(x_train_tensor)
    loss = lossfnc(yhat, y_train_tensor)
    trainLosses.append(loss.item())
    #make_dot(loss).view()
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    stateDict=model.state_dict()
    w=stateDict['linear.weight']
    b = stateDict['linear.bias']
    w = w.item()
    b = b.item()

    model.eval()
    with torch.no_grad():
        #val MSE loss
        yhatval = model(x_val_tensor)
        valLoss =  lossfnc(yhatval,y_val_tensor)
        valLosses.append(valLoss.item())
    #stopping condition
    if(valLoss.item()<0.0001):
        break
    print(f'train loss={loss.item()}, val loss={valLoss.item()}, w={w}, b={b}')


train loss=3.449514865875244, val loss=2.09355092048645, w=-0.5703502893447876, b=0.9665979743003845
train loss=2.09355092048645, val loss=1.3245314359664917, w=-0.4088849127292633, b=1.2148568630218506
train loss=1.3245314359664917, val loss=0.8868128061294556, w=-0.2812037467956543, b=1.3980698585510254
train loss=0.8868128061294556, val loss=0.6361399292945862, w=-0.17898887395858765, b=1.532467246055603
train loss=0.6361399292945862, val loss=0.4911121428012848, w=-0.09599084407091141, b=1.6302399635314941
train loss=0.4911121428012848, val loss=0.4057947099208832, w=-0.027513999491930008, b=1.7005451917648315
train loss=0.4057947099208832, val loss=0.354265421628952, w=0.029970109462738037, b=1.750260829925537
train loss=0.354265421628952, val loss=0.32189473509788513, w=0.07911314815282822, b=1.784552812576294
train loss=0.32189473509788513, val loss=0.30042511224746704, w=0.12190814316272736, b=1.8073010444641113
train loss=0.30042511224746704, val loss=0.2851944863796234, w=0.1