In [1]:
%load_ext autoreload
%autoreload 2


import torch
from torch_ols.regression import LinearRegression

from sklearn import linear_model ##for comparing with TorchOLS 
from sklearn.metrics import r2_score ##for evaluating sklearn model to compare with TorchOLS
from sklearn.datasets import fetch_california_housing ##a toy dataset
import pandas as pd ##for messing with the dataset

In [2]:
# dataloader
def sklearn_to_df(data_loader):
    # California Houseing Dataset
    # y = average house value in units of $100,000
    # X = potential predictors of y

    X_data = data_loader.data
    X_columns = data_loader.feature_names
    X = pd.DataFrame(X_data, columns=X_columns)

    y_data = data_loader.target
    y = pd.Series(y_data, name='target')

    return X, y

In [16]:
def adjusted_r2(r2_score, x): # for getting adj. r2 from sklearn
    return 1 - (1 - r2_score) * ((x.shape[0] - 1) / (x.shape[0] - x.shape[1]))

In [27]:
cpu = torch.device("cpu")
gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(cpu, gpu)

cpu cpu


---
# Simple Linear Regression

In [5]:
# Use just one variable as predictor
x, y = sklearn_to_df(fetch_california_housing()) 
##toy dataset. y = average house value in units of 100,000
x = x[['MedInc']] 

div = int(len(x)*0.5) ##just split the data in half so we can have a test sample
end = None
x_tr = x[:div]
y_tr = y[:div]
x_te = x[div:end]
y_te = y[div:end]

## Coefficients

In [8]:
print("TORCH")
torch_slr = LinearRegression(device=cpu)
torch_slr.fit(x_tr, y_tr)
torch_slr.summary()

TORCH
______________________________________________
     Model Summary     | R^2        =   +0.48494
   'target' = Xβ + ε   | Adj. R^2   =   +0.48494
______________________________________________
Intercept  |   +0.48126
MedInc     |   +0.41985
``````````````````````````````````````````````
R^2        =   +0.48494
Adj. R^2   =   +0.48494


In [17]:
print("SKLEARN")
sk_slr = linear_model.LinearRegression()
sk_slr.fit(x_tr, y_tr)
sk_r2 = sk_slr.score(x_tr, y_tr)
{"intercept": sk_slr.intercept_.item(), "MedInc": sk_slr.coef_.item(),
"scores" : {
    "r2" : sk_r2, 'adj_r2' : adjusted_r2(sk_r2, x_tr)
}
}

SKLEARN


{'intercept': 0.4812621396162051,
 'MedInc': 0.4198487357383301,
 'scores': {'r2': 0.4849374063598769, 'adj_r2': 0.4849374063598769}}

## Predictions & Test R^2

In [18]:
preds = torch_slr.predict(x_te)
r2, a_r2 = torch_slr.r2_score(y_true=y_te, y_pred=preds)

print(f"TorchOLS:\t Test R^2 = {r2},\t Adjusted R^2 = {a_r2}")

TorchOLS:	 Test R^2 = 0.4594688335880929,	 Adjusted R^2 = 0.4594688335880929


In [19]:
sk_preds = sk_slr.predict(x_te)
sk_r2 = sk_slr.score(x_te, y_te)
print(f"Sklearn:\t Test R^2 = {sk_r2},\t Adjusted R^2 = {adjusted_r2(sk_r2, x_te)}")

Sklearn:	 Test R^2 = 0.4594688335880929,	 Adjusted R^2 = 0.4594688335880929


___
# Multiple Linear Regression

In [20]:
# Use all the variables as predictors
x, y = sklearn_to_df(fetch_california_housing()) 
##toy dataset. y = average house value in units of 100,000

div = int(len(x)*0.5)
end = None
x_tr = x[:div]
y_tr = y[:div]
x_te = x[div:end]
y_te = y[div:end]

## Coefficients

In [21]:
print("TORCH")
torch_mlr = LinearRegression(device=cpu)
torch_mlr.fit(x_tr, y_tr)
torch_mlr.summary()

TORCH
______________________________________________
     Model Summary     | R^2        =   +0.59431
   'target' = Xβ + ε   | Adj. R^2   =   +0.59403
______________________________________________
Intercept  |  -28.87182
MedInc     |   +0.44945
HouseAge   |   +0.00525
AveRooms   |   -0.11519
AveBedrms  |   +0.64983
Population |   -0.00000
AveOccup   |   -0.00749
Latitude   |   -0.41276
Longitude  |   -0.36541
``````````````````````````````````````````````
R^2        =   +0.59431
Adj. R^2   =   +0.59403


In [26]:
print("SKLEARN")
sk_mlr = linear_model.LinearRegression()
sk_mlr.fit(x_tr, y_tr)
sk_r2 = sk_mlr.score(x_tr, y_tr)

{'intercept':sk_mlr.intercept_,
'Bs' : {x.columns[i] : sk_mlr.coef_[i] for i in range(x.shape[1])},
"scores" : {
    "r2" : sk_r2, 'adj_r2' : adjusted_r2(sk_r2, x_tr)}
}

SKLEARN


{'intercept': -28.871818045412475,
 'Bs': {'MedInc': 0.4494456752691039,
  'HouseAge': 0.005251468965176776,
  'AveRooms': -0.1151879647470256,
  'AveBedrms': 0.6498316037875798,
  'Population': -4.428311747297145e-06,
  'AveOccup': -0.00749431094243891,
  'Latitude': -0.41276023907180504,
  'Longitude': -0.36541182697464963},
 'scores': {'r2': 0.5943061238454341, 'adj_r2': 0.5940307304073928}}

## Predictions & Test R^2

In [23]:
preds = torch_mlr.predict(x_te)
r2, a_r2 = torch_mlr.r2_score(y_true=y_te, y_pred=preds)

print(f"TorchOLS:\t Test R^2 = {r2},\t Adjusted R^2 = {a_r2}")

TorchOLS:	 Test R^2 = 0.5861947272818564,	 Adjusted R^2 = 0.5859138276591811


In [24]:
sk_preds = sk_mlr.predict(x_te)
sk_r2 = sk_mlr.score(x_te, y_te)
print(f"Sklearn:\t Test R^2 = {sk_r2}.\t\t Adjusted R^2 = {adjusted_r2(sk_r2, x_te)}")

Sklearn:	 Test R^2 = 0.586194727280809.		 Adjusted R^2 = 0.585913827658133
