In [1]:
import torch #pytorch lib
import torch.nn as nn #pt neural net
import torch.optim as optim #optimization
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pandas as pd

In [2]:
data = fetch_california_housing()
X = data.data
y = data.target

In [3]:
X_df = pd.DataFrame(X, columns=data.feature_names)
X_df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [4]:
y_df = pd.DataFrame(y, columns=['MedHouseVal'])
y_df.head()

Unnamed: 0,MedHouseVal
0,4.526
1,3.585
2,3.521
3,3.413
4,3.422


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X_train)

[[-0.326196    0.34849025 -0.17491646 ...  0.05137609 -1.3728112
   1.27258656]
 [-0.03584338  1.61811813 -0.40283542 ... -0.11736222 -0.87669601
   0.70916212]
 [ 0.14470145 -1.95271028  0.08821601 ... -0.03227969 -0.46014647
  -0.44760309]
 ...
 [-0.49697313  0.58654547 -0.60675918 ...  0.02030568 -0.75500738
   0.59946887]
 [ 0.96545045 -1.07984112  0.40217517 ...  0.00707608  0.90651045
  -1.18553953]
 [-0.68544764  1.85617335 -0.85144571 ... -0.08535429  0.99543676
  -1.41489815]]


**Conversion into Tensors**

In [7]:
#converting data into pytorch tensors as they work well with GPU
X_train=torch.FloatTensor(X_train)
X_test=torch.FloatTensor(X_test)
y_train=torch.FloatTensor(y_train)
y_test=torch.FloatTensor(y_test)

print(X_train)

tensor([[-0.3262,  0.3485, -0.1749,  ...,  0.0514, -1.3728,  1.2726],
        [-0.0358,  1.6181, -0.4028,  ..., -0.1174, -0.8767,  0.7092],
        [ 0.1447, -1.9527,  0.0882,  ..., -0.0323, -0.4601, -0.4476],
        ...,
        [-0.4970,  0.5865, -0.6068,  ...,  0.0203, -0.7550,  0.5995],
        [ 0.9655, -1.0798,  0.4022,  ...,  0.0071,  0.9065, -1.1855],
        [-0.6854,  1.8562, -0.8514,  ..., -0.0854,  0.9954, -1.4149]])


In pytorch, all Nets funs are found in .nn module.
Every NN model will extend .nn module, in sense that every model in PT is a class.

In [9]:
#defining a simple reg model
class RegModel(nn.Module):
    def __init__(self, input_size):
        super(RegModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64) #1st layer input is input_size and 64 is output of this model
        self.relu = nn.ReLU() #active fun for non-linearity
        self.fc2 = nn.Linear(64, 128) #64 of prev layer as input and 128 as output
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(128,10)
        self.relu = nn.ReLU() #Using Relu in the end instead of sigmoid or softmax bcus
        self.fc4 = nn.Linear(10, 1) #1 is the output we need, a answer in yes or no, or a value as answer

    def forward(self, x): #forward pass in NN
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x

In [10]:
#creating the model
input_size = X_train.shape[1] #num of feats/cols as our input_size is = number of feats
#here x_train is a tensor and we need its shape to know how many fts we have in the tensor
#bias is true here bcus bias is added into the output in case if our neurons becomes zero so it wont stop and continue training
print(input_size)
model = RegModel(input_size)
print(model)

8
RegModel(
  (fc1): Linear(in_features=8, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
  (fc4): Linear(in_features=10, out_features=1, bias=True)
)


Why bias=True in a Linear Layer?

In a neural network, a linear layer performs a linear transformation on its input, which can be expressed as:

output = input * weight + bias
The bias term is crucial for several reasons:

Shifting the Activation Function:
By adding a bias, you can shift the activation function (like ReLU, sigmoid, etc.) left or right. This allows the model to learn a wider range of functions, increasing its representational power.
Modeling Non-Zero Intercepts:
In many real-world datasets, the optimal solution might not pass through the origin (0,0). The bias term enables the model to capture this non-zero intercept, making it more flexible.
Improving Learning Capacity:
The bias term provides additional flexibility to the model, allowing it to learn more complex patterns and make more accurate predictions.
In essence, the bias=True setting empowers the linear layer to learn a more comprehensive range of functions and better fit the underlying data distribution.

In [11]:
#Loss and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [13]:
#training loop
num_epochs = 500
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = loss_fn(outputs, y_train.view(-1, 1)) #lossfn compares loss of output and actual y-train value

    # Backward pass and optimization
    #defining order of loop, when to go forward and when to go backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
'''
loss_fn: This refers to a specific loss function, such as cross-entropy loss or mean squared error, depending on the task.
outputs: These are the predicted outputs generated by your model for a given input.
y_train.view(-1, 1): This part reshapes the ground truth labels y_train into a 2D tensor with a single column. The -1 dimension tells PyTorch to infer the size of that dimension based on the number of elements in y_train.
'''

'''
In the context of deep learning and neural networks, optimizer.zero_grad() is a crucial step in the training process.
It ensures that the gradients accumulated during the backpropagation process are reset to zero before the next iteration.
Why is this necessary?
Accumulation of Gradients: During backpropagation, the gradients of the loss function with respect to the model's parameters are calculated.
 These gradients are accumulated in each iteration.
Incorrect Parameter Updates: If the gradients are not reset, the optimizer will use the accumulated gradients from previous iterations,
leading to incorrect parameter updates.
Divergence: This can cause the model to diverge, meaning its performance will deteriorate over time.
The Role of the Optimizer:
The optimizer, such as Adam or SGD, takes the calculated gradients and updates the model's parameters to minimize the loss function.
By resetting the gradients, the optimizer ensures that each parameter update is based on the current gradient,
preventing incorrect updates and promoting efficient learning.
In summary, optimizer.zero_grad() is an essential step to maintain the accuracy and efficiency of the training process in neural networks.
'''

'''
This line of code is crucial in the training process of neural networks.
It triggers the backpropagation algorithm, which is a fundamental technique used to compute gradients of the loss function with respect to the model's parameters.
Here's a breakdown of what happens:

Forward Pass:
The neural network processes the input data through its layers, performing calculations and generating output predictions.
Loss Calculation: The loss function compares the predicted output with the true labels and calculates the error or loss.
Backward Pass (Backpropagation):
The loss.backward() function initiates the backpropagation process.
It calculates the gradients of the loss function with respect to each parameter in the network.
These gradients indicate the direction and magnitude of change needed for the parameters to reduce the loss.
Why is Backpropagation Important?

Parameter Updates: The calculated gradients are used to update the model's parameters using an optimization algorithm like gradient descent.
Minimizing Loss: By adjusting the parameters in the direction of the negative gradient, the model aims to minimize the loss function and improve its accuracy.
Learning from Mistakes: Backpropagation allows the model to learn from its errors and make better predictions in the future.
In essence, loss.backward() is the engine that drives the learning process in neural networks,
 enabling them to adapt to the data and improve their performance over time.
'''


Epoch [10/500], Loss: 1.1252
Epoch [20/500], Loss: 1.1148
Epoch [30/500], Loss: 1.1045
Epoch [40/500], Loss: 1.0943
Epoch [50/500], Loss: 1.0843
Epoch [60/500], Loss: 1.0743
Epoch [70/500], Loss: 1.0645
Epoch [80/500], Loss: 1.0547
Epoch [90/500], Loss: 1.0450
Epoch [100/500], Loss: 1.0353
Epoch [110/500], Loss: 1.0257
Epoch [120/500], Loss: 1.0162
Epoch [130/500], Loss: 1.0068
Epoch [140/500], Loss: 0.9975
Epoch [150/500], Loss: 0.9883
Epoch [160/500], Loss: 0.9792
Epoch [170/500], Loss: 0.9703
Epoch [180/500], Loss: 0.9614
Epoch [190/500], Loss: 0.9526
Epoch [200/500], Loss: 0.9440
Epoch [210/500], Loss: 0.9355
Epoch [220/500], Loss: 0.9271
Epoch [230/500], Loss: 0.9188
Epoch [240/500], Loss: 0.9107
Epoch [250/500], Loss: 0.9027
Epoch [260/500], Loss: 0.8949
Epoch [270/500], Loss: 0.8871
Epoch [280/500], Loss: 0.8796
Epoch [290/500], Loss: 0.8721
Epoch [300/500], Loss: 0.8648
Epoch [310/500], Loss: 0.8576
Epoch [320/500], Loss: 0.8506
Epoch [330/500], Loss: 0.8437
Epoch [340/500], Lo

"\nThis line of code is crucial in the training process of neural networks.\nIt triggers the backpropagation algorithm, which is a fundamental technique used to compute gradients of the loss function with respect to the model's parameters.\nHere's a breakdown of what happens:\n\nForward Pass: \nThe neural network processes the input data through its layers, performing calculations and generating output predictions.\nLoss Calculation: The loss function compares the predicted output with the true labels and calculates the error or loss.\nBackward Pass (Backpropagation):\nThe loss.backward() function initiates the backpropagation process.\nIt calculates the gradients of the loss function with respect to each parameter in the network.\nThese gradients indicate the direction and magnitude of change needed for the parameters to reduce the loss.\nWhy is Backpropagation Important?\n\nParameter Updates: The calculated gradients are used to update the model's parameters using an optimization alg

In [14]:
#saving the trained model weights
torch.save(model.state_dict(), 'reg_model_weights.pth')

In [15]:
#loading the saved weights
loaded_model = RegModel(input_size)
loaded_model.load_state_dict(torch.load('reg_model_weights.pth'))


  loaded_model.load_state_dict(torch.load('reg_model_weights.pth'))


<All keys matched successfully>

In [17]:
#eval the loaded model on test set
with torch.no_grad():
    y_pred = loaded_model(X_test)
    mse=mean_squared_error(y_test.numpy(), y_pred.numpy())
    print(f'Mean Squared Error: {mse}')

#we can tune parameters to make it more close to 0

Mean Squared Error: 0.7557947635650635
