In [8]:
import numpy as np
import scipy as sp
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import PolynomialFeatures,StandardScaler


In [9]:
# Creating the data matrix
california = fetch_california_housing()
D = california.data
y = california.target
n,d = D.shape
print(n,d)


20640 8


In [10]:
# Creating a design matrix with polynomial standardized features
aff = PolynomialFeatures(2,include_bias=True)
scaler = StandardScaler()
X = aff.fit_transform(scaler.fit_transform(D))
features = aff.get_feature_names_out(california.feature_names)
features

array(['1', 'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population',
       'AveOccup', 'Latitude', 'Longitude', 'MedInc^2', 'MedInc HouseAge',
       'MedInc AveRooms', 'MedInc AveBedrms', 'MedInc Population',
       'MedInc AveOccup', 'MedInc Latitude', 'MedInc Longitude',
       'HouseAge^2', 'HouseAge AveRooms', 'HouseAge AveBedrms',
       'HouseAge Population', 'HouseAge AveOccup', 'HouseAge Latitude',
       'HouseAge Longitude', 'AveRooms^2', 'AveRooms AveBedrms',
       'AveRooms Population', 'AveRooms AveOccup', 'AveRooms Latitude',
       'AveRooms Longitude', 'AveBedrms^2', 'AveBedrms Population',
       'AveBedrms AveOccup', 'AveBedrms Latitude', 'AveBedrms Longitude',
       'Population^2', 'Population AveOccup', 'Population Latitude',
       'Population Longitude', 'AveOccup^2', 'AveOccup Latitude',
       'AveOccup Longitude', 'Latitude^2', 'Latitude Longitude',
       'Longitude^2'], dtype=object)

In [14]:
# Minimize RSS by solving the system of equations given by Grad(RSS)=0
beta = sp.linalg.solve(X.T@X,X.T@y)
beta

array([ 1.95724199,  0.92243689,  0.1322746 , -0.39583612,  0.56212582,
        0.03851036, -1.68052409, -1.27092793, -1.16447981, -0.11299565,
        0.04446976,  0.20353088, -0.16758436,  0.12323738, -0.05110633,
       -0.61882733, -0.54901295,  0.03353488, -0.03930124,  0.06328855,
        0.04017423, -0.23751839, -0.27066427, -0.25213086,  0.04449819,
       -0.08079146, -0.19118335,  0.69090139,  0.46243774,  0.40789392,
        0.03597354,  0.2759851 , -0.42913941, -0.44237578, -0.38372053,
        0.00351097,  0.22514157,  0.05551793,  0.03325349,  0.00940107,
        0.47326301,  0.33794513,  0.28233172,  0.46261665,  0.16018745])

In [15]:
# Fetch beta values for given features
print(beta[np.where(features == "MedInc")])
print(beta[np.where(features == "MedInc AveBedrms")])
print(beta[np.where(features == "HouseAge AveBedrms")])

[0.92243689]
[-0.16758436]
[0.06328855]


In [18]:
# Minimize Objective function f(β)=1/n*‖y-Xβ‖²+λ‖β‖² by solving the system of equations 
# given by Grad(f(β))=0 for λ=0.1
beta_b = sp.linalg.solve(X.T@X+n*0.1,X.T@y)
beta_b

array([ 1.95843968,  0.92658988,  0.13369509, -0.39944498,  0.56872288,
        0.03994686, -1.67735339, -1.26172709, -1.15550851, -0.11202827,
        0.04732486,  0.20113733, -0.16526669,  0.12686591, -0.04152643,
       -0.59673877, -0.52825062,  0.0341816 , -0.04284778,  0.06685842,
        0.04239596, -0.23805741, -0.25857465, -0.24076018,  0.04415151,
       -0.07983879, -0.19575158,  0.69121165,  0.44602199,  0.39143491,
        0.03539262,  0.28479486, -0.40821177, -0.42422221, -0.36598655,
        0.00376446,  0.22263601,  0.06551324,  0.04208779,  0.00961806,
        0.46878047,  0.36365074,  0.29916091,  0.49773502,  0.17801013])

In [19]:
# Fetch beta values for given features
print(beta_b[np.where(features == "MedInc")])
print(beta_b[np.where(features == "MedInc AveBedrms")])
print(beta_b[np.where(features == "HouseAge AveBedrms")])

[0.92658988]
[-0.16526669]
[0.06685842]
