# General Pipeline

1. Design Model: decide on architecture and forward pass.
2. Construct loss and optimizer.
3. Training Loop.
    - feed forward: compute prediction.
    - backpropagation: gradients.
    - update weigths: descend.

In this notebook, we will let PyTorch take care of all 3 steps.

In [29]:
import torch
import numpy as np

In [3]:
X = torch.tensor([i*0.3 for i in range(0,20)], device = 'cuda', dtype=torch.float32)
y = torch.tensor([2.2*i for i in range(0,20)], device = 'cuda', dtype=torch.float32)

In [7]:
w = torch.tensor(0.0, device='cuda', dtype = torch.float32, requires_grad=True)

# feed forward
def forward_pass(x):
    return (w*x)

# training loop
learning_rate = 0.01
epochs = 20

# use pre-constructed loss from PyTorch
loss = torch.nn.MSELoss()

# Use Stochastic Gradient Descent optimizer from PyTorch
# We pass a list of weights for the optimizer to track and
# it will take care of updating the weights for us.
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(epochs):
    # forward pass
    y_preds = forward_pass(X)
                            
    # Here is where the PyTorch backend kicks in                        
    ### compute loss
    L = loss(y,y_preds)
    ### compute gradient
    L.backward()
    ### descend!
    optimizer.step()
    ### reset accumlated gradients
    optimizer.zero_grad()
    
    print(f'epoch {epoch+1}: weight = {w:.3f}     ,     loss = {L:.4f}')
    
predictions = forward_pass(X)
    
print(f'Prediction: y = {predictions}')

epoch 1: weight = 1.630     ,     loss = 597.7400
epoch 2: weight = 2.898     ,     loss = 361.5235
epoch 3: weight = 3.884     ,     loss = 218.6557
epoch 4: weight = 4.651     ,     loss = 132.2467
epoch 5: weight = 5.247     ,     loss = 79.9851
epoch 6: weight = 5.711     ,     loss = 48.3764
epoch 7: weight = 6.072     ,     loss = 29.2589
epoch 8: weight = 6.352     ,     loss = 17.6963
epoch 9: weight = 6.570     ,     loss = 10.7030
epoch 10: weight = 6.740     ,     loss = 6.4734
epoch 11: weight = 6.872     ,     loss = 3.9152
epoch 12: weight = 6.974     ,     loss = 2.3680
epoch 13: weight = 7.054     ,     loss = 1.4322
epoch 14: weight = 7.116     ,     loss = 0.8662
epoch 15: weight = 7.164     ,     loss = 0.5239
epoch 16: weight = 7.202     ,     loss = 0.3169
epoch 17: weight = 7.231     ,     loss = 0.1916
epoch 18: weight = 7.254     ,     loss = 0.1159
epoch 19: weight = 7.272     ,     loss = 0.0701
epoch 20: weight = 7.285     ,     loss = 0.0424
Prediction: y = 

---

# A Neural Network "from Scratch"

Ok, so we can now have PyTorch handle most of the actual backend training of our models. This means all we have to do is decide on the achitecture and code the feed-forward computation.

In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [11]:
elec = pd.read_csv('./data/elecdemand.csv')[0:100]

X = elec[['workday', 'temp']]
y = elec['demand']

scale = StandardScaler()

Xs = torch.tensor(scale.fit_transform(X), device='cuda', dtype=torch.float32)
y = torch.tensor(elec['demand'], device='cuda', dtype=torch.float32)

<br>

For this exercise, we'll train a simple neural network to try and predict electricity demand using 2 input features: ```workday``` and ```temp```.

Our network will have 3 layers:
1. The input layer of size 2 units.
2. The hidden layer of size 8 units.
3. The output layer of size 1 unit.

In [40]:
# randomly initialize weights
W0 = torch.rand((2,8), device='cuda', dtype=torch.float32, requires_grad=True)
W1 = torch.rand((8,), device='cuda', dtype=torch.float32, requires_grad=True)

# design model
def forward_pass(X):
    z0 = torch.matmul(X,W0)
    a1 = torch.nn.functional.relu(z0)
    z1 = torch.matmul(a1,W1)
    return z1


learning_rate = 0.1
epochs = 20


loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD([W0, W1], lr=learning_rate)


# training loop
for epoch in range(epochs):
    
    # feed forward
    y_preds = forward_pass(Xs)
    
    # compute loss
    L = loss(y,y_preds)
    
    # backpropagate
    L.backward()
    
    # update weights
    optimizer.step()
    
    # reset accumulation
    optimizer.zero_grad()
    
    print(f'epoch {epoch+1}: loss = {L}')
    
prediction = forward_pass(Xs)
print(f'neural network predicts: {prediction}')

epoch 1: loss = 11.4134521484375
epoch 2: loss = 8.952290534973145
epoch 3: loss = 6.8200812339782715
epoch 4: loss = 5.079548358917236
epoch 5: loss = 3.872300386428833
epoch 6: loss = 3.3251519203186035
epoch 7: loss = 3.15840744972229
epoch 8: loss = 3.0665078163146973
epoch 9: loss = 2.9775564670562744
epoch 10: loss = 2.8906631469726562
epoch 11: loss = 2.800798177719116
epoch 12: loss = 2.699164628982544
epoch 13: loss = 2.5860092639923096
epoch 14: loss = 2.4416728019714355
epoch 15: loss = 2.2844529151916504
epoch 16: loss = 2.1230156421661377
epoch 17: loss = 1.9174059629440308
epoch 18: loss = 1.6568453311920166
epoch 19: loss = 1.3958898782730103
epoch 20: loss = 1.1465448141098022
neural network predicts: tensor([1.6621, 1.5685, 1.4748, 1.2842, 1.1739, 1.2402, 1.2402, 1.2622, 1.1516,
        1.2402, 1.5374, 1.6307, 1.9808, 2.5235, 2.3425, 2.9759, 3.2472, 3.7145,
        4.0915, 4.4667, 4.8437, 5.0938, 5.7209, 5.5941, 5.2823, 5.2823, 4.2782,
        4.2165, 3.2472, 2.1617, 1

<br>

We can even go ahead and see how well the model generalizes to unseen data


In [41]:
elec_test = pd.read_csv('./data/elecdemand.csv').iloc[100:150]

X_test = elec_test[['workday', 'temp']]

scale = StandardScaler()

Xs_test = torch.tensor(scale.fit_transform(X_test), device='cuda', dtype=torch.float32)
y_test = torch.tensor(np.array(elec_test['demand']), device='cuda', dtype=torch.float32)

In [43]:
test_prediction = forward_pass(Xs_test)
print(f'neural network predicts: {test_prediction}')

neural network predicts: tensor([1.7488, 1.7623, 1.7488, 1.7350, 1.7080, 1.7350, 1.7350, 1.7080, 1.7080,
        1.7080, 1.6131, 1.5996, 1.4640, 1.4256, 1.5325, 1.5768, 2.0290, 2.3270,
        2.9368, 2.0290, 1.9643, 1.8664, 1.9967, 2.4925, 2.2711, 2.2159, 2.2711,
        2.0290, 2.2159, 2.2711, 2.3822, 2.8816, 3.3245, 2.9920, 2.9920, 2.3270,
        2.1048, 2.0290, 1.9643, 1.7366, 1.4558, 1.4308, 1.4640, 1.5183, 4.5556,
        4.4848, 4.3082, 4.1667, 4.0963, 3.9548], device='cuda:0',
       grad_fn=<MvBackward0>)


In [44]:
loss(y_test, test_prediction)

tensor(3.7060, device='cuda:0', grad_fn=<MseLossBackward0>)

---

# PyTorch Model

Now that we can build basic neural networks, the next step is to completely outsource even the model building to the PyTorch backend. This will use PyTorch's ```nn``` API, which basically is PyTorch's version of Keras.

In [45]:
import torch.nn as nn

In [48]:
# PyTorch Models accept each training example as a standalone tensor, so we'll have to reshape our training data
X = torch.tensor([[i*0.3] for i in range(0,20)], device = 'cuda', dtype=torch.float32)
y = torch.tensor([[2.2*i] for i in range(0,20)], device = 'cuda', dtype=torch.float32)

n_samples, n_features = X.shape

n_samples, n_features

(20, 1)

In [51]:
input_size = n_features
output_size = n_features

# instantiate a simple linear regression model
model = nn.Linear(input_size, output_size, device='cuda')

# make a prediction for an input
X_test = torch.tensor([4], device='cuda',dtype=torch.float32)

print(f'Prediction (pre-trained): f(4) = {model(X_test).item():.3f}')

Prediction (pre-trained): f(4) = 2.385


- Just like Keras, PyTorch models are functional and can be called on inputs! This makes doing feed-forward passes much simpler to code.

In [55]:
learning_rate = 0.01
epochs = 20

loss = torch.nn.MSELoss()

# since our weights are now instantiated by nn.Linear(), we have to pass those to the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(epochs):
    
    # feed forward, just call the model!
    y_preds = model(X)
    
    # compute loss
    L = loss(y, y_preds)
    
    # backprop
    L.backward()
    
    # descend!
    optimizer.step()
    
    # reset gradients
    optimizer.zero_grad()
    
    print(f'Epoch {epoch+1}: loss = {L}')
    
pred = model(X)
print(f'neural network predicts {pred}')

Epoch 1: loss = 0.175870880484581
Epoch 2: loss = 0.1452292948961258
Epoch 3: loss = 0.13100922107696533
Epoch 4: loss = 0.1197713240981102
Epoch 5: loss = 0.11379070580005646
Epoch 6: loss = 0.10897312313318253
Epoch 7: loss = 0.10674001276493073
Epoch 8: loss = 0.10471751540899277
Epoch 9: loss = 0.1028987392783165
Epoch 10: loss = 0.10127363353967667
Epoch 11: loss = 0.10051329433917999
Epoch 12: loss = 0.09904495626688004
Epoch 13: loss = 0.09822092950344086
Epoch 14: loss = 0.09690400958061218
Epoch 15: loss = 0.09601732343435287
Epoch 16: loss = 0.09516551345586777
Epoch 17: loss = 0.09396514296531677
Epoch 18: loss = 0.09304993599653244
Epoch 19: loss = 0.09217136353254318
Epoch 20: loss = 0.09132790565490723
neural network predicts tensor([[ 0.5783],
        [ 2.7326],
        [ 4.8869],
        [ 7.0394],
        [ 9.1954],
        [11.3479],
        [13.5004],
        [15.6529],
        [17.8124],
        [19.9579],
        [22.1174],
        [24.2769],
        [26.4224],
   

--- 

# Custom Models

We can package all of the steps above into a custom class, we just have to subtype from the ```nn.Module``` class.

In [59]:
class LinearReg(nn.Module):
    def __init__(self, input_dim, output_dim, **kwargs):
        super(LinearReg, self).__init__()
        # define layers here
        self.lin = nn.Linear(input_dim, output_dim, **kwargs)
        
    # we have to build the forward pass so we can call the model
    def forward(self, x):
        return self.lin(x)
    
    

In [60]:
X = torch.tensor([[i*0.3] for i in range(0,20)], device = 'cuda', dtype=torch.float32)
y = torch.tensor([[2.2*i] for i in range(0,20)], device = 'cuda', dtype=torch.float32)

n_samples, n_features = X.shape

n_samples, n_features

(20, 1)

In [61]:
input_size = n_features
output_size = n_features

# instantiate our "custom" model
model = LinearReg(input_size, output_size, device='cuda')


learning_rate = 0.01
epochs = 20

loss = torch.nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(epochs):
    
    # feed forward, just call the model!
    y_preds = model(X)
    
    # compute loss
    L = loss(y, y_preds)
    
    # backprop
    L.backward()
    
    # descend!
    optimizer.step()
    
    # reset gradients
    optimizer.zero_grad()
    
    print(f'Epoch {epoch+1}: loss = {L}')
    
pred = model(X)
print(f'neural network predicts {pred}')

Epoch 1: loss = 590.0912475585938
Epoch 2: loss = 343.4002380371094
Epoch 3: loss = 199.92520141601562
Epoch 4: loss = 116.5318603515625
Epoch 5: loss = 67.75375366210938
Epoch 6: loss = 39.60836410522461
Epoch 7: loss = 23.197452545166016
Epoch 8: loss = 13.60979175567627
Epoch 9: loss = 8.003260612487793
Epoch 10: loss = 4.754348278045654
Epoch 11: loss = 2.8956503868103027
Epoch 12: loss = 1.7933361530303955
Epoch 13: loss = 1.1368954181671143
Epoch 14: loss = 0.773699939250946
Epoch 15: loss = 0.5494605302810669
Epoch 16: loss = 0.4306178689002991
Epoch 17: loss = 0.34970998764038086
Epoch 18: loss = 0.3105721175670624
Epoch 19: loss = 0.2798733413219452
Epoch 20: loss = 0.26517897844314575
neural network predicts tensor([[ 0.9373],
        [ 3.0541],
        [ 5.1708],
        [ 7.2858],
        [ 9.4043],
        [11.5193],
        [13.6344],
        [15.7494],
        [17.8713],
        [19.9795],
        [22.1014],
        [24.2233],
        [26.3314],
        [28.4534],
      