# Basic

In [None]:
import torch
X = torch.rand(2,2)

In [None]:
print(X)

In [None]:
# torch by default the data is float64 type, but we can always define the data type
X = torch.rand([2,3], dtype = float)

In [None]:
print(X)

In [None]:
# torch operaton
# add same with subscript
X = torch.rand([2,3])
Y = torch.rand([2,3])
X + Y

In [None]:
# when it comes to multiply it is usually element level
X * Y 

In [None]:
X = torch.rand([3,4])
print(X)
# print one column in tensor
print(X[:,0])
# print one element in tensor, use <.item()> function to select specific data element
print(X[1,1].item())

In [None]:
# reshape a tensor
Y = X.view(-1,3)
print(Y)

In [None]:
import numpy as np

In [None]:
# When it comes to from torch to numpy, they share the same memory, then change torch will change the numpy array as well
a = Y.numpy()
Z = torch.from_numpy(a)
Z

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    X = torch.ones(5, device = device)
    y = torch.ones(5)
    # switch the data into GPU
    y.to(device)
    Z = x + y
    z = z.to('cpu')
torch.cuda.is_available()

In [None]:
# need to specify requires_grad = True for gradiant calculation
x = torch.ones(5, requires_grad = True)
print(x)

# Gradiant

In [None]:
import torch 
X = torch.randn(3, requires_grad = True)
print(X)
Y = X + 2
print(Y)
z = Y*Y*2
z = z.mean()
Z.backward(retain_graph=True)
print(x.grad)

In [None]:
# if we don't need to track the tensor gradient there are three way to do that
# 1. <x.requires_grad_(False)>
# 2. <x.detach()>
# 3. <with torch.no_grad():>
X = torch.randn(3, requires_grad = True)
print(X)
X.requires_grad_(False)
print(X)
y = X + 2
print(y)


In [None]:
# examples
import torch
w = torch.ones(4,requires_grad = True)
for epoch in range(3):
    model_out = (w*3).sum()
    model_out.backward()
    print(w.grad)

## Important detail
rezero the gradiant for epoch to avoid accumulation

In [None]:
# this is not correct since for each iteration the 
# gradiant is actually accumulate, should rezero the gradiant to start another epoch
import torch
w = torch.ones(4,requires_grad = True)
for epoch in range(3):
    model_out = (w*3).sum()
    model_out.backward()
    print(w.grad)
    w.grad.zero_()

In [None]:
# example y = w*x 
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad = True)
y_hat = w * x
loss = (y_hat - y)**2

# print the forward pass
print(loss)

# print the back propagation
loss.backward()
print(w.grad)

#update the weight and iterate

# Using pytorch API to specify the forward propagation

## Np way of doing linear regression

In [None]:
import numpy as np
# Eg: f = w * x
# f = 2 * x
X = np.array([1,2,3,4], dtype = np.float32)
y = np.array([2,4,6,8], dtype = np.float32)
# initialize w 
w = 0.0

# model prediction
def forward(x):
    return w * x
# loss mean square error 

def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

# graident 
# MSE = 1/N (w*X - y) **@
# dj/dw = 1/N 2x (w*x - y)

def gradient(x,y, y_pred):
    return np.dot(2*x, y_pred - y).mean()

print(f'Predic before training: f(5) = {forward(5):.3f}')
             

    
# Training 
lr = 0.01
n_iters = 40
for epoch in range(n_iters):
    # 
    y_pred = forward(X)
    l = loss(y,y_pred)
    dw = gradient(x,y,y_pred)
    w -= lr * dw
    if epoch %5 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.3f}')

## Using pytorch

### Stage one

In [6]:
import torch
X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)
W = torch.tensor(0.0, dtype = torch.float32, requires_grad = True)

def forward(X):
    return W * X

def loss(Y,Y_pred):
    return ((Y_pred - Y)**2).mean()

print(f'Predic before training: f(5) = {forward(5):.3f}')

lr = 0.01 
n_iters = 20

for epoch in range(n_iters):
    y_pred = forward(X)
    l = loss(Y,y_pred)
    
    l.backward() # calculate dl/dw
    # update weights
    '''
    # this is very important
    since we don;t want W to show up in the computational graph we have to run in no grad mode
    '''
    with torch.no_grad():
        W -= lr * W.grad
    
    #zero gradients
    '''
    # this is very important
    avoid the accumulation of gradiant
    '''
    W.grad.zero_()
    
    if epoch %2 == 0:
        print(f'epoch {epoch + 1}: w = {W:.3f}, loss = {l:.3f}')

Predic before training: f(5) = 0.000
epoch 1: w = 0.300, loss = 30.000
epoch 3: w = 0.772, loss = 15.660
epoch 5: w = 1.113, loss = 8.175
epoch 7: w = 1.359, loss = 4.267
epoch 9: w = 1.537, loss = 2.228
epoch 11: w = 1.665, loss = 1.163
epoch 13: w = 1.758, loss = 0.607
epoch 15: w = 1.825, loss = 0.317
epoch 17: w = 1.874, loss = 0.165
epoch 19: w = 1.909, loss = 0.086


### Stage two
    use build in functions to build the structure

In [14]:
'''
# Workflow 
# 1) Design model (input, output sizem foward pass)
# 2) Construct loss and optimizer
# 3) Train loop
     - forward pass: compute prediction
     - backward pass: gradients
     - update weights
'''
import torch
import torch.nn as nn

X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)
W = torch.tensor(0.0, dtype = torch.float32, requires_grad = True)

def forward(X):
    return W * X

lr = 0.01 
n_iters = 50

loss = nn.MSELoss()
optimizer = torch.optim.SGD([W], lr = lr)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y,y_pred)
    
    # gradiant = back propagation, update the computational graph 
    l.backward()
    
    # update weights
    optimizer.step()
    
    #zero gradiants
    optimizer.zero_grad()
    
    if epoch %10 == 0:
        print(f'epoch {epoch + 1}: w = {W:.3f}, loss = {l:.3f}')

epoch 1: w = 0.300, loss = 30.000
epoch 11: w = 1.665, loss = 1.163
epoch 21: w = 1.934, loss = 0.045
epoch 31: w = 1.987, loss = 0.002
epoch 41: w = 1.997, loss = 0.000
