In [1]:
import torch

In [2]:
# Bias == default when all featues take on value 0, basically offset/
# Default prediction

In [None]:
# Vectors leads to order of magnitude level improvement in efficiency

## Generating Synthetic Data

In [6]:
def generate_data(w: torch.Tensor, b: torch.Tensor, num_examples: int):
    normal_data = torch.normal(0,1, (num_examples, len(w))) # Generates data of a certain shape, either with a different mean and stddev for each element or a shared one for all elems
    return normal_data

In [9]:
generate_data(torch.rand(5,3), torch.rand(1,3), 15) # Generate a bunch of Gaussian examples with mean 0 and stddev 1
# This could be extremely useful as we could specify a different mean and stddev for each feature

tensor([[-0.8263, -0.3378, -0.7590, -1.3058, -0.0880],
        [ 0.8383, -1.7387, -0.6003,  0.3287,  0.9068],
        [ 1.5445,  0.6526,  0.2522, -0.4596,  0.7441],
        [ 0.6042,  0.8429,  1.4547,  0.6519,  1.7894],
        [ 0.2583, -1.5769,  0.5673, -0.8607, -0.3156],
        [-1.2678,  0.0126, -0.3155,  0.9588, -1.6057],
        [ 0.2643,  0.9876,  0.3120, -0.3836,  1.3716],
        [-0.5624, -0.1780, -0.3913,  1.0896,  0.0967],
        [ 1.6429,  1.4594,  0.5817,  0.0041, -0.1786],
        [ 0.0968, -0.5051, -2.5518, -1.1594,  2.5856],
        [ 0.2620, -0.2029,  2.0110,  1.2230, -2.0159],
        [ 0.3467, -1.5809, -0.0676,  0.7500,  0.8691],
        [ 1.3077,  0.4531,  0.7718,  0.5086,  0.6707],
        [-0.3690,  0.0495,  0.5064, -0.4081, -0.9435],
        [ 0.0292, -0.2160, -0.1685, -1.4968,  0.2747]])

In [33]:
# Different mean and stddev for each feature:
def generate_data_with_custom_dist(mean: torch.Tensor, stddev: torch.Tensor, w: torch.Tensor, b: torch.Tensor, num_examples: int):
    normal_data = [torch.normal(mean,stddev) for i in range(num_examples)] # Generates data of a certain shape, either with a different mean and stddev for each element or a shared one for all elems
    return normal_data

In [34]:
# Generate different features with different means and standard deviations
generate_data_with_custom_dist(torch.Tensor([1,2,3,4,5]), torch.Tensor([1,2,3,4,5]), torch.rand(5,3), torch.rand(1,3), 15)

[tensor([ 2.0262,  5.6668, -0.0356,  5.6359,  2.0021]),
 tensor([ 1.3844,  1.6745, -0.0680,  5.7642, 12.3371]),
 tensor([ 0.8632, -0.1684,  2.3418,  2.9237,  9.4773]),
 tensor([ 0.3060,  0.8906,  1.3358,  0.6612, -6.3763]),
 tensor([0.7846, 3.5069, 2.3805, 7.4205, 5.5446]),
 tensor([ 1.8134,  1.5516,  5.5529,  1.3864, -1.7785]),
 tensor([0.5565, 0.2367, 2.7897, 6.8607, 8.0864]),
 tensor([2.8080, 5.6415, 5.4175, 2.0603, 2.9201]),
 tensor([0.9949, 0.3634, 0.3902, 1.2336, 6.2772]),
 tensor([ 2.0763,  3.6064,  4.3312,  4.7290, 17.3246]),
 tensor([1.4953, 0.1996, 1.7931, 6.3762, 6.0653]),
 tensor([3.1342, 3.6653, 7.9141, 5.7994, 2.7496]),
 tensor([ 0.1325,  2.0292, -1.5745,  3.4429,  5.6390]),
 tensor([0.8177, 2.2541, 7.6830, 9.4916, 3.5884]),
 tensor([2.7237, 2.1966, 6.7975, 2.3307, 3.7033])]

In [103]:
# generating a dataset with Gaussian noise
def generate_dataset(mean: int = 1, 
                     stddev: int = 2, w: 
                     torch.Tensor = torch.rand((5,1)),
                     b: torch.Tensor = torch.rand(1,1), 
                     num_examples: int = 50):
    print(w.shape)
    X = torch.normal(mean, stddev, (num_examples, len(w))) # Generates data of a certain shape, either with a different mean and stddev for each element or a shared one for all elems
    y = torch.mm(X,w)
    y = y + b
    # Adding gaussian noise
    y += torch.normal(0, 0.1, (num_examples,len(w[0])))
    return X, y # Returning feature set, labels

In [104]:
features, labels = generate_dataset(5,7, torch.Tensor([[.1], [.1], [.7], [.05], [.05]]), torch.Tensor([2]), 7)# Labels generating deterministically + some noise with random weights
# for a linearly regressive algorithm with some noise - linear relationship
labels

torch.Size([5, 1])


tensor([[-0.7741],
        [ 6.2064],
        [ 9.5621],
        [ 3.3655],
        [14.0831],
        [ 6.0961],
        [ 9.0736]])

In [105]:
# featues
features # some randomized linear weighting of these + Gaussian noise yields results. To be more definitive, select weights beforehand

tensor([[14.7194, -2.4425, -6.5926, 10.2956, -1.9313],
        [ 4.6789, -2.9116,  5.3034, -1.5714, 10.0874],
        [ 6.8084, 10.4848,  8.4068, -7.6185,  8.0880],
        [ 2.3162, -0.7306,  0.7154, 14.5020, -1.0340],
        [ 9.8748, -1.2867, 15.2288, 10.5760, -1.7192],
        [14.4920,  1.8988,  3.8513, -2.4731, -3.7723],
        [-1.0157,  2.0223,  9.4840,  1.5953,  8.5487]])

The data generated above is Gaussian data generated with a linear relationship

In [112]:
features = features[torch.randperm(features.shape[0])] # Using random permutation we shuffle the numbers up to a certain value, hence shuffling the rows / our training examples

In [114]:
features

tensor([[ 4.6789, -2.9116,  5.3034, -1.5714, 10.0874],
        [ 2.3162, -0.7306,  0.7154, 14.5020, -1.0340],
        [-1.0157,  2.0223,  9.4840,  1.5953,  8.5487],
        [14.4920,  1.8988,  3.8513, -2.4731, -3.7723],
        [14.7194, -2.4425, -6.5926, 10.2956, -1.9313],
        [ 6.8084, 10.4848,  8.4068, -7.6185,  8.0880],
        [ 9.8748, -1.2867, 15.2288, 10.5760, -1.7192]])

In [173]:
# Beginning with a guess
w = torch.normal(0, 1, (5,1), requires_grad = True) # A normal tensor autoinitialized which required a gradient

In [174]:
b = torch.zeros(1, requires_grad = True) # Bias is generally initialized as 0
w, b

(tensor([[ 0.6665],
         [ 0.2049],
         [ 0.6068],
         [-0.6368],
         [ 0.0205]], requires_grad=True),
 tensor([0.], requires_grad=True))

In [175]:
# Definint basic linear regression model
def linear_regression(X: torch.Tensor, w: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
    # dimensions: examples * features x feature_weights(same dim as features) * 1 + 1*1 = 
    # examples * 1 (one weighted prediction applying all 
    # weights to each example (across column) to output a single prediction)
    return torch.mm(X,w) + b

In [176]:
abs(linear_regression(features, w, b) - labels) # a set of initial errors for predictions

tensor([[ 7.7213],
        [13.6347],
        [ 4.9106],
        [10.5167],
        [15.3698],
        [10.7086],
        [ 0.2856]], grad_fn=<AbsBackward>)

In [177]:
l = ((linear_regression(features,w, b) - labels)**2)  / len(labels) # A cost function (squared error)

In [178]:
l.sum().backward() # Backward propagating 

In [179]:
# Updating with gradient decent
k = w - (0.005 * w.grad)
w = k

In [180]:
w # updated w

tensor([[ 0.6582],
        [-0.0061],
        [ 0.3037],
        [ 0.0583],
        [-0.1611]], grad_fn=<SubBackward0>)

In [181]:
# Updating b
k = b - (0.005 * b.grad)

In [182]:
b = k
b # adjusted b parameter

tensor([0.0075], grad_fn=<SubBackward0>)

In [183]:
linear_regression(features, w, b) - labels # Overcompensated,but significantly lower cost given small number of training examples

tensor([[ 3.7731],
        [-3.4413],
        [-8.6390],
        [ 7.8025],
        [-5.4641],
        [-0.8644],
        [ 2.9600]], grad_fn=<SubBackward0>)

Note: Similar to d2l, it may be a good idea to build my own synthetic data generator for training models in the future

In [184]:
# Building a neural network to solve this task

In [189]:
neural_network = torch.nn.Sequential(torch.nn.Linear(5,1)) # Define a Sequential neural network model and pass it a linear layer
# Layer maps from 5 features to 1 (weighting labels)


In [191]:
# Overriding weights in first layer of neural network (weights variable called 'weight')
neural_network[0].weight.data.normal_(0, 0.1) # Returns randomly initialized weights

tensor([[-0.1063,  0.0761, -0.0834, -0.0849, -0.0536]])

In [193]:
# We can only fill data of weight/bias
neural_network[0].bias.data.fill_(0) # Underscore functions fill parameter (w/b) with some value

tensor([0.])

In [207]:
# Defining a loss function - Mean Squared Error
cost = torch.nn.MSELoss()
trainer = torch.optim.AdamW(neural_network.parameters(), lr = 0.03)# defining an optimizer: need to specify learning rate and our neural networks parameters to optimize over
# nn.parameters() returns all parameters of neural networks to optimize over. We get back a trainer with an optimizer ready to optimize over params
num_epochs = 50


In [208]:
# Define model as a callable which returns predictions by default
for epoch in range(num_epochs):
    loss = cost(neural_network(features), labels) # Compute cost
    print("cost: ", loss)
    trainer.zero_grad() # Resets previous gradient
    loss.backward() # Computes gradients
    trainer.step()  # Updates parameters by gradient
# Note: trainer keeps updates where it left off, MSE increases from 92 to 8

cost:  tensor(9.3138, grad_fn=<MseLossBackward>)
cost:  tensor(9.0028, grad_fn=<MseLossBackward>)
cost:  tensor(8.7795, grad_fn=<MseLossBackward>)
cost:  tensor(8.7311, grad_fn=<MseLossBackward>)
cost:  tensor(8.6163, grad_fn=<MseLossBackward>)
cost:  tensor(8.4990, grad_fn=<MseLossBackward>)
cost:  tensor(8.4603, grad_fn=<MseLossBackward>)
cost:  tensor(8.4930, grad_fn=<MseLossBackward>)
cost:  tensor(8.5225, grad_fn=<MseLossBackward>)
cost:  tensor(8.5075, grad_fn=<MseLossBackward>)
cost:  tensor(8.4633, grad_fn=<MseLossBackward>)
cost:  tensor(8.4267, grad_fn=<MseLossBackward>)
cost:  tensor(8.4217, grad_fn=<MseLossBackward>)
cost:  tensor(8.4432, grad_fn=<MseLossBackward>)
cost:  tensor(8.4653, grad_fn=<MseLossBackward>)
cost:  tensor(8.4677, grad_fn=<MseLossBackward>)
cost:  tensor(8.4502, grad_fn=<MseLossBackward>)
cost:  tensor(8.4258, grad_fn=<MseLossBackward>)
cost:  tensor(8.4063, grad_fn=<MseLossBackward>)
cost:  tensor(8.3935, grad_fn=<MseLossBackward>)
cost:  tensor(8.3815

In [209]:
trainer.state

defaultdict(dict,
            {Parameter containing:
             tensor([[ 0.4241,  0.7119, -0.0726,  0.7008,  0.3963]], requires_grad=True): {'step': 50,
              'exp_avg': tensor([[-0.2162, -0.0050, -0.0854,  0.0309, -0.0388]]),
              'exp_avg_sq': tensor([[0.2851, 0.0438, 0.2171, 0.1787, 0.0529]])},
             Parameter containing:
             tensor([-0.1588], requires_grad=True): {'step': 50,
              'exp_avg': tensor([0.1990]),
              'exp_avg_sq': tensor([0.0061])}})

In [214]:
neural_network[0].weight.data,neural_network[0].bias.data #final predictions for weights and bias

(tensor([[ 0.4241,  0.7119, -0.0726,  0.7008,  0.3963]]), tensor([-0.1588]))