In [25]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [26]:
def linear_model(t_u, *params):
    result = 0
    for i, p in enumerate(params[:-1]):
      result += t_u[i] * p

    return result + params[-1:][0]

def loss_fn(t_p, t_c):
  squared_diffs = (t_p - t_c)**2
  return squared_diffs.mean()

def training_loop(n_epochs, model, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
  final_train_loss = -1
  final_val_loss = -1

  for epoch in range(1, n_epochs + 1):
    train_t_p = model(train_t_u, *params)
    train_loss = loss_fn(train_t_p, train_t_c)

    with torch.no_grad():
      val_t_p = model(val_t_u, *params)
      val_loss = loss_fn(val_t_p, val_t_c)
      assert val_loss.requires_grad == False

    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    if epoch % 500 == 0 or epoch <= 3:
      print(f'Epoch={epoch}, training loss={train_loss.item():.4f}, validation loss={val_loss.item():.4f}')

    if epoch == n_epochs:
      final_train_loss = train_loss
      final_val_loss = val_loss

  return params, final_train_loss, final_val_loss

In [27]:
url = 'https://raw.githubusercontent.com/HamedTabkhi/Intro-to-ML/refs/heads/main/Dataset/Housing.csv'
dataset = pd.DataFrame(pd.read_csv(url))

x = dataset.drop(columns=['price','furnishingstatus'])
varlist = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

def binary_map(x):
    return x.map({'yes': 1, 'no': 0})

x[varlist] = x[varlist].apply(binary_map)

# scaler = StandardScaler()
# x = pd.DataFrame(scaler.fit_transform(x))

y = dataset['price']

np.random.seed(0)
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8, test_size = 0.2, random_state = 100)

train_t_u = [torch.tensor(x_train[col].values) for col in x_train.columns]
val_t_u = [torch.tensor(x_test[col].values) for col in x_test.columns]
train_t_c = torch.tensor(y_train.tolist())
val_t_c = torch.tensor(y_test.tolist())

In [28]:
learning_rate = [1e0, 1e-1, 1e-2, 1e-3, 1e-4]
final_train_loss = []
final_val_loss = []

for lr in learning_rate:
  print(f'Learning rate: {lr}')
  model = linear_model
  params = torch.tensor(np.ones((len(train_t_u)+1,1)),dtype=torch.float32, requires_grad=True)
  optimizer = optim.Adam([params], lr=lr)
  params, train_loss, val_loss = training_loop(5000, model, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c)
  final_train_loss.append(train_loss)
  final_val_loss.append(val_loss)
  print()


Learning rate: 1.0
Epoch=1, training loss=26423936942080.0000, validation loss=25105851744256.0000
Epoch=2, training loss=26370333736960.0000, validation loss=25053001416704.0000
Epoch=3, training loss=26316791349248.0000, validation loss=25000218198016.0000
Epoch=500, training loss=9185849769984.0000, validation loss=8316158738432.0000
Epoch=1000, training loss=4210597298176.0000, validation loss=3749862965248.0000
Epoch=1500, training loss=3424268582912.0000, validation loss=3183293497344.0000
Epoch=2000, training loss=3369570664448.0000, validation loss=3196197273600.0000
Epoch=2500, training loss=3366372507648.0000, validation loss=3204169072640.0000
Epoch=3000, training loss=3363909140480.0000, validation loss=3202571042816.0000
Epoch=3500, training loss=3360956088320.0000, validation loss=3199595446272.0000
Epoch=4000, training loss=3357518594048.0000, validation loss=3196088221696.0000
Epoch=4500, training loss=3353641484288.0000, validation loss=3192128536576.0000
Epoch=5000, t

In [29]:
print("Final losses:")
for i in range(0, len(learning_rate)):
  print(f'Learning rate={learning_rate[i]:.2e}, training loss={final_train_loss[i]:.2e}, validation loss={final_val_loss[i]:.2e}')

Final losses:
Learning rate=1.00e+00, training loss=3.35e+12, validation loss=3.19e+12
Learning rate=1.00e-01, training loss=8.59e+12, validation loss=7.75e+12
Learning rate=1.00e-02, training loss=2.38e+13, validation loss=2.26e+13
Learning rate=1.00e-03, training loss=2.62e+13, validation loss=2.48e+13
Learning rate=1.00e-04, training loss=2.64e+13, validation loss=2.51e+13
