In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 1. Prepare Data
# Create a simple dataset for a linear regression problem
X_train = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32).unsqueeze(1) # Create a tensor for training inputs (features). unsqueeze(1) adds a dimension to make the shape (5, 1), which is required by the linear layer.
y_train = torch.tensor([2.0, 4.0, 6.0, 8.0, 10.0], dtype=torch.float32).unsqueeze(1) # y = 2x # Create a tensor for training labels (the correct answers). unsqueeze(1) makes the shape (5, 1).

# 2. Build the Model
# Define a simple linear regression model
class LinearRegressionModel(nn.Module): # Define the model class, inheriting from nn.Module, which is the base class for all neural network modules in PyTorch.
    def __init__(self): # The constructor for the model class.
        super(LinearRegressionModel, self).__init__() # Call the constructor of the parent class (nn.Module).
        self.linear = nn.Linear(in_features=1, out_features=1) # Define a linear layer with one input feature and one output feature. This layer represents the linear regression model itself (y = wx + b).

    def forward(self, x): # Define the forward pass, which specifies how the input data flows through the model.
        return self.linear(x) # Pass the input tensor 'x' through the linear layer to get the output.

model = LinearRegressionModel() # Create an instance of our linear regression model.

# 3. Define Loss Function and Optimizer
# Specify the loss function and optimizer
criterion = nn.MSELoss() # Define the loss function as Mean Squared Error (MSE), which is a common choice for regression tasks.
optimizer = optim.SGD(model.parameters(), lr=0.01) # Define the optimizer as Stochastic Gradient Descent (SGD) and pass the model's parameters to it. The learning rate (lr) is set to 0.01.

# 4. Train the Model
# Train the model on the prepared data
print("Starting model training...")
num_epochs = 100 # Set the number of times the entire dataset will be passed through the network.
for epoch in range(num_epochs): # Loop through the specified number of epochs.
    # Forward pass
    outputs = model(X_train) # Pass the training data through the model to get predictions.
    loss = criterion(outputs, y_train) # Calculate the loss by comparing the predictions to the true labels.

    # Backward and optimize
    optimizer.zero_grad() # Clear the gradients from the previous epoch.
    loss.backward() # Perform the backward pass to compute gradients of the loss with respect to the model parameters.
    optimizer.step() # Update the model's parameters (weights and biases) using the optimizer.

    if (epoch + 1) % 50 == 0: # Print the loss every 50 epochs to monitor training progress.
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}') # Display the current epoch and the loss value. .item() gets the value from the tensor.
print("Model training complete.")

# 5. Evaluate the Model (Optional)
# Make a prediction with the trained model
with torch.no_grad(): # Use this context manager to disable gradient calculation, which is not needed for inference and saves memory.
    prediction_tensor = model(torch.tensor([[10.0]], dtype=torch.float32)) # Make a prediction for X=10 using the trained model. The input must also be a tensor of shape (1, 1).
    prediction_value = prediction_tensor.item() # Get the numerical value from the prediction tensor.
print(f"Prediction for X=10: {prediction_value:.2f}") # Print the prediction, formatted to two decimal places.

# 6. Save the Model
# Save the model's state dictionary
model_save_path = 'my_linear_model.pth' # Define the filename for saving the model.
torch.save(model.state_dict(), model_save_path) # Save the model's learnable parameters (weights and biases) to the specified file.
print(f"Model saved to: {model_save_path}")

# 7. Load the Model (Optional, for demonstration)
# Load the saved model's state dictionary
loaded_model = LinearRegressionModel() # Create a new instance of the model with the same architecture.
loaded_model.load_state_dict(torch.load(model_save_path,weights_only=True)) # Load the saved parameters from the file into the new model instance.
loaded_model.eval() # Set the model to evaluation mode. This is important for models with layers like dropout or batch normalization.
print(f"Model loaded from: {model_save_path}")

# Make a prediction with the loaded model
with torch.no_grad(): # Disable gradient calculation for this final prediction.
    loaded_prediction_tensor = loaded_model(torch.tensor([[10.0]], dtype=torch.float32)) # Make a prediction using the loaded model.
    loaded_prediction_value = loaded_prediction_tensor.item() # Get the numerical value from the loaded model's prediction tensor.
print(f"Prediction with loaded model for X=10: {loaded_prediction_value:.2f}") # Print the prediction from the loaded model to confirm it works.

Starting model training...
Epoch [50/88], Loss: 0.1005
Model training complete.
Prediction for X=10: 18.85
Model saved to: my_linear_model.pth
Model loaded from: my_linear_model.pth
Prediction with loaded model for X=10: 18.85
