In [159]:
%load_ext autoreload
%autoreload 2

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing
import pandas as pd
import torch

from torch_ols.regression import LinearRegression

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [160]:
# dataloader
def sklearn_to_df(data_loader):
    # y = average house value in units of 100,000
    # X = potential predictors of y

    X_data = data_loader.data
    X_columns = data_loader.feature_names
    X = pd.DataFrame(X_data, columns=X_columns)

    y_data = data_loader.target
    y = pd.Series(y_data, name='target')

    return X, y

In [162]:
cpu = torch.device("cpu")
gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(cpu, gpu)

cpu cpu


---
# Simple Linear Regression

In [54]:
# Use just one variable as predictor
x, y = sklearn_to_df(fetch_california_housing()) 
##toy dataset. y = average house value in units of 100,000
x = x[['MedInc']] 

div = int(len(x)*0.5)
end = None
x_tr = x[:div]
y_tr = y[:div]
x_te = x[div:end]
y_te = y[div:end]

## Coefficients

In [139]:
print("TORCH")
torch_slr = LinearRegression(device=cpu)
torch_slr.fit(x_tr, y_tr)
torch_slr.summary()

TORCH
False False


Unnamed: 0,Unnamed: 1,coefficient
0,intercept,0.481262
1,MedInc,0.419849
0,--,score
1,R^2,0.484937
2,Adj. R^2,0.484937


In [140]:
print("SKLEARN")
sk_slr = linear_model.LinearRegression()
sk_slr.fit(x_tr, y_tr)
{"intercept": sk_slr.intercept_.item(), "MedInc": sk_slr.coef_.item()}

SKLEARN


{'intercept': 0.4812621396162051, 'MedInc': 0.4198487357383301}

## Predictions & Test R^2

In [142]:
preds = torch_slr.predict(x_te)
r2, adj_r2 = torch_slr.r2_score(y_true=y_te, y_pred=preds)

print(f"TorchOLS:\t Test R^2 = {r2},\t Adjusted R^2 = {adj_r2}")

TorchOLS:	 Test R^2 = 0.4594688335880929,	 Adjusted R^2 = 0.4594688335880929


In [143]:
sk_preds = sk_slr.predict(x_te)
sk_r2 = sk_slr.score(x_te, y_te)
print(f"Sklearn:\t Test R^2 = {sk_r2},\t Adjusted R^2 = {1 - (1 - sk_r2) * ((x_te.shape[0] - 1) / (x_te.shape[0] - x_te.shape[1]))}")

Sklearn:	 Test R^2 = 0.4594688335880929,	 Adjusted R^2 = 0.4594688335880929


___
# Multiple Linear Regression

In [144]:
# Use all the variables as predictors
x, y = sklearn_to_df(fetch_california_housing()) 
##toy dataset. y = average house value in units of 100,000

div = int(len(x)*0.5)
end = None
x_tr = x[:div]
y_tr = y[:div]
x_te = x[div:end]
y_te = y[div:end]

## Coefficients

In [146]:
print("TORCH")
torch_mlr = LinearRegression(device=cpu)
torch_mlr.fit(x_tr, y_tr)
# torch_mlr.coefficients, torch_mlr.intercept
torch_mlr.summary()

TORCH
False False


Unnamed: 0,Unnamed: 1,coefficient
0,intercept,-28.871818
1,MedInc,0.449446
2,HouseAge,0.005251
3,AveRooms,-0.115188
4,AveBedrms,0.649832
5,Population,-0.000004
6,AveOccup,-0.007494
7,Latitude,-0.41276
8,Longitude,-0.365412
0,--,score


In [153]:
print("SKLEARN")
sk_mlr = linear_model.LinearRegression()
sk_mlr.fit(x_tr, y_tr)

{'intercept':sk_mlr.intercept_,
'coefs' : {x.columns[i] : sk_mlr.coef_[i] for i in range(x.shape[1])}
}

SKLEARN


{'intercept': -28.871818045412475,
 'coefs': {'MedInc': 0.4494456752691039,
  'HouseAge': 0.005251468965176776,
  'AveRooms': -0.1151879647470256,
  'AveBedrms': 0.6498316037875798,
  'Population': -4.428311747297145e-06,
  'AveOccup': -0.00749431094243891,
  'Latitude': -0.41276023907180504,
  'Longitude': -0.36541182697464963}}

## Predictions & Test R^2

In [157]:
preds = torch_mlr.predict(x_te)
r2, adj_r2 = torch_mlr.r2_score(y_true=y_te, y_pred=preds)

print(f"TorchOLS:\t Test R^2 = {r2},\t Adjusted R^2 = {adj_r2}")

TorchOLS:	 Test R^2 = 0.5861947272818564,	 Adjusted R^2 = 0.5859138276591811


In [158]:
sk_preds = sk_mlr.predict(x_te)
sk_r2 = sk_mlr.score(x_te, y_te)
print(f"Sklearn:\t Test R^2 = {sk_r2}.\t\t Adjusted R^2 = {1 - (1 - sk_r2) * ((x_te.shape[0] - 1) / (x_te.shape[0] - x_te.shape[1]))}")

Sklearn:	 Test R^2 = 0.586194727280809.		 Adjusted R^2 = 0.585913827658133
