# Libraries | Install

In [17]:
!pip install torch sklearn numpy matplotlib tqdm torchviz ann_visualizer torchsummary

You should consider upgrading via the '/Users/fermibot/PycharmProjects/python-projects/venv/bin/python -m pip install --upgrade pip' command.[0m


# Libraries | Import

In [18]:
import torch
from sklearn.linear_model import LinearRegression
from random import random, randint, choice
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from torchviz import make_dot
from torchsummary import summary

# Functions | Define

In [36]:
def randomize(factor):
    return choice([-1, 1]) * factor * random()


class Config:
    x_n = 16
    lr = 0.01
    epochs = 10
    split_train = 0.8
    shuffle = True
    batch_size = 64
    device = torch.device("mps")


def print_separator():
    print("\n" + "-" * 100)


config = Config()

# Dataset | Synthesize

$Y_j = \underset{i}{\Sigma} c_{ij}*x_{ij} \mid c_i = 0.1, 0.2, 0.3 ...$

In [19]:
seed_list = [random() for x in range(2 ** 14)]
x_n = config.x_n
coe = [0.1 * x for x in range(x_n)]

X = [[x + randomize(0.01) for y in range(1, x_n + 1)] for x in seed_list]
X_y = [[x_i * coe_j + randomize(0.01) for x_i, coe_j in zip(x, coe)] for x in X]
y = [sum([x_i * coe_j + randomize(0.01) for x_i, coe_j in zip(x, coe)]) for x in X]

# Modeling | Linear Regression

In [20]:
lr = LinearRegression()
lr.fit(X, y)

print(f"\nCoefficients: {lr.coef_}")
print(f"\nIntercept: {lr.intercept_}")


Coefficients: [-0.00613923  0.03658898  0.21007615  0.25801465  0.38538827  0.54504808
  0.60173573  0.69332478  0.89242505  0.91598773  0.99019966  1.05970078
  1.20230315  1.33688507  1.42144093  1.45715284]

Intercept: -3.336649853480367e-05


# $R^2 = 1 - \frac{u}{v}$

- $\mid u = \Sigma ({y_{true} - y_{pred}})^2$
- $\mid v = \Sigma ({y_{true} - y_{mean}})^2$


In [21]:
print(f"\nFit score: {lr.score(X, y)}")


Fit score: 0.9999546397039892



# Modeling | Neural Network

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [23]:
tensor_X = torch.tensor(X, dtype=torch.float32, device=config.device)
tensor_y = torch.tensor(y, dtype=torch.float32, device=config.device)

size_train = int(config.split_train * len(tensor_X))
size_test = len(tensor_X) - size_train
assert size_train + size_test == len(tensor_X)

dataset_train = TensorDataset(tensor_X[:size_train], tensor_y[:size_train])
dataset_test = TensorDataset(tensor_X[size_train:], tensor_y[size_train:])

loader_train = DataLoader(dataset=dataset_train, batch_size=config.batch_size)
loader_test = DataLoader(dataset=dataset_test, batch_size=config.batch_size)

In [24]:
class LinearModel(nn.Module):
    def __init__(self, in_features):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(in_features=in_features, out_features=1, bias=True)

    def forward(self, x):
        return self.linear(x)


model = LinearModel(in_features=config.x_n)
summary(model, input_size=(config.x_n,), device='cpu')
model.to(device=config.device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                    [-1, 1]              17
Total params: 17
Trainable params: 17
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


LinearModel(
  (linear): Linear(in_features=16, out_features=1, bias=True)
)

## Modeling | Linear Regression | Neural Network | Train

In [25]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=config.lr)
loss_all = []

for epoch in tqdm(range(config.epochs)):
    loss_epoch_train = 0.0
    for batch_X, batch_y in loader_train:
        outputs = model(batch_X)
        loss_batch = criterion(outputs, batch_y.unsqueeze(1))
        loss_epoch_train += loss_batch

        optimizer.zero_grad()
        loss_batch.backward()
        optimizer.step()
    loss_all.append(loss_epoch_train)


  0%|          | 0/10 [00:00<?, ?it/s]

## Modeling | Linear Regression | Neural Network | Evaluate

In [26]:
model.eval()
with torch.no_grad():
    # Get the learned parameters
    weights_nn = model.linear.weight.detach().cpu().numpy().flatten()
    bias = model.linear.bias.detach().cpu().numpy().flatten()[0]

In [33]:
df_weights = pd.DataFrame(
    {"coeffs_original": coe, "coeffs_lr": lr.coef_, "coeffs_lr_delta": abs(lr.coef_ - coe), "weights_nn": weights_nn,
     "weights_nn_delta": abs(weights_nn - coe)})

df_weights

Unnamed: 0,coeffs_original,coeffs_lr,coeffs_lr_delta,weights_nn,weights_nn_delta
0,0.0,-0.006139,0.006139,0.702421,0.702421
1,0.1,0.036589,0.063411,0.873634,0.773634
2,0.2,0.210076,0.010076,0.940504,0.740504
3,0.3,0.258015,0.041985,0.605463,0.305463
4,0.4,0.385388,0.014612,0.770953,0.370953
5,0.5,0.545048,0.045048,0.815619,0.315619
6,0.6,0.601736,0.001736,0.928441,0.328441
7,0.7,0.693325,0.006675,0.738034,0.038034
8,0.8,0.892425,0.092425,0.6158,0.1842
9,0.9,0.915988,0.015988,0.751254,0.148746


Sum of weights and $\delta s$

In [34]:
pd.DataFrame({'sum': df_weights.aggregate(func='sum')})

Unnamed: 0,sum
coeffs_original,12.0
coeffs_lr,12.000133
coeffs_lr_delta,0.451671
weights_nn,11.999956
weights_nn_delta,7.150184


# Modeling | Compare

In [35]:
model.eval()
with torch.no_grad():
    y_pred_pytorch = model(tensor_X).cpu().numpy().flatten()
    y_true = tensor_y.cpu().numpy().flatten()

# Sklearn predictions
X_np = np.array(X)
y_np = np.array(y)
y_pred_sklearn = lr.predict(X_np)

# Calculate R² scores
r2_pytorch = r2_score(y_true, y_pred_pytorch)
r2_sklearn = r2_score(y_true, y_pred_sklearn)

# Get model parameters
weights = model.linear.weight.detach().cpu().numpy().flatten()
bias = model.linear.bias.detach().cpu().numpy().flatten()[0]

# Side-by-side comparison

print("MODEL COMPARISON SUMMARY")
print_separator()
print(f"{'Metric':<25} {'Sklearn':<15} {'PyTorch':<15} {'True':<10}")
print_separator()
print(f"{'R² Score':<25} {r2_sklearn:<15.6f} {r2_pytorch:<15.6f} {'-':<10}")
print(f"{'Sum of Weights':<25} {np.sum(lr.coef_):<15.6f} {np.sum(weights):<15.6f} {np.sum(coe):<10.6f}")
print(f"{'Bias/Intercept':<25} {lr.intercept_:<15.6f} {bias:<15.6f} {'0.0':<10}")
print_separator()


# Quality assessment
def get_quality(r2):
    if r2 > 0.9:
        return "Excellent"
    elif r2 > 0.7:
        return "Good"
    elif r2 > 0.5:
        return "Fair"
    else:
        return "Poor"


print(f"{'Fit Quality':<25} {get_quality(r2_sklearn):<15} {get_quality(r2_pytorch):<15} {'-':<10}")
print(f"{'Variance Explained':<25} {r2_sklearn * 100:<14.1f}% {r2_pytorch * 100:<14.1f}% {'-':<10}")

MODEL COMPARISON SUMMARY

----------------------------------------------------------------------------------------------------
Metric                    Sklearn         PyTorch         True      

----------------------------------------------------------------------------------------------------
R² Score                  0.999955        0.999942        -         
Sum of Weights            12.000133       11.999956       12.000000 
Bias/Intercept            -0.000033       0.000106        0.0       

----------------------------------------------------------------------------------------------------
Fit Quality               Excellent       Excellent       -         
Variance Explained        100.0         % 100.0         % -         
