In [1]:
import numpy as np
import scipy as sp
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import PolynomialFeatures,StandardScaler


In [2]:
# Creating the data matrix
california = fetch_california_housing()
D = california.data
y = california.target
n,d = D.shape
print(n,d)


20640 8


In [3]:
# Creating a design matrix with polynomial standardized features
aff = PolynomialFeatures(2,include_bias=True)
scaler = StandardScaler()
X = aff.fit_transform(scaler.fit_transform(D))
features = aff.get_feature_names_out(california.feature_names)
features

array(['1', 'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population',
       'AveOccup', 'Latitude', 'Longitude', 'MedInc^2', 'MedInc HouseAge',
       'MedInc AveRooms', 'MedInc AveBedrms', 'MedInc Population',
       'MedInc AveOccup', 'MedInc Latitude', 'MedInc Longitude',
       'HouseAge^2', 'HouseAge AveRooms', 'HouseAge AveBedrms',
       'HouseAge Population', 'HouseAge AveOccup', 'HouseAge Latitude',
       'HouseAge Longitude', 'AveRooms^2', 'AveRooms AveBedrms',
       'AveRooms Population', 'AveRooms AveOccup', 'AveRooms Latitude',
       'AveRooms Longitude', 'AveBedrms^2', 'AveBedrms Population',
       'AveBedrms AveOccup', 'AveBedrms Latitude', 'AveBedrms Longitude',
       'Population^2', 'Population AveOccup', 'Population Latitude',
       'Population Longitude', 'AveOccup^2', 'AveOccup Latitude',
       'AveOccup Longitude', 'Latitude^2', 'Latitude Longitude',
       'Longitude^2'], dtype=object)

In [4]:
# Minimize RSS by solving the system of equations given by Grad(RSS)=0
beta = sp.linalg.solve(X.T@X,X.T@y)
beta

array([ 1.95724199,  0.92243689,  0.1322746 , -0.39583612,  0.56212582,
        0.03851036, -1.68052409, -1.27092793, -1.16447981, -0.11299565,
        0.04446976,  0.20353088, -0.16758436,  0.12323738, -0.05110633,
       -0.61882733, -0.54901295,  0.03353488, -0.03930124,  0.06328855,
        0.04017423, -0.23751839, -0.27066427, -0.25213086,  0.04449819,
       -0.08079146, -0.19118335,  0.69090139,  0.46243774,  0.40789392,
        0.03597354,  0.2759851 , -0.42913941, -0.44237578, -0.38372053,
        0.00351097,  0.22514157,  0.05551793,  0.03325349,  0.00940107,
        0.47326301,  0.33794513,  0.28233172,  0.46261665,  0.16018745])

In [5]:
# Fetch beta values for given features
print(beta[np.where(features == "MedInc")])
print(beta[np.where(features == "MedInc AveBedrms")])
print(beta[np.where(features == "HouseAge AveBedrms")])

[0.92243689]
[-0.16758436]
[0.06328855]


In [6]:
# Minimize Objective function f(β)=1/n*‖y-Xβ‖²+λ‖β‖² by solving the system of equations 
# given by Grad(f(β))=0 for λ=0.1
p = X.shape[1]
beta_b = sp.linalg.solve(X.T@X+n*0.1*np.eye(p),X.T@y)
beta_b

array([ 1.45624916,  0.71957545,  0.14820683, -0.14838337,  0.00862983,
       -0.00186671, -0.17832345, -0.47130572, -0.33996516, -0.02359837,
        0.05021859,  0.12503662, -0.09998862,  0.05668295, -0.03998195,
       -0.06939659, -0.05699217,  0.15959297, -0.0215094 ,  0.0092316 ,
        0.01565444, -0.0450344 , -0.03672704, -0.00781034,  0.06979771,
       -0.11923069, -0.09952622,  0.05196717,  0.06136376,  0.00995201,
        0.04975143,  0.01407802,  0.00709968, -0.00351464,  0.03412183,
        0.00363   ,  0.02554756,  0.03123771,  0.00882566,  0.0015518 ,
        0.03225161, -0.01687138,  0.04437523, -0.03313319,  0.17547435])

In [7]:
# Fetch beta values for given features
print(beta_b[np.where(features == "MedInc")])
print(beta_b[np.where(features == "MedInc AveBedrms")])
print(beta_b[np.where(features == "HouseAge AveBedrms")])

[0.71957545]
[-0.09998862]
[0.0092316]
