In [1]:
# Import dependencies for ridge regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_validate

In [2]:
# Loading data from the web into pd dataframe
path = 'https://jaredlander.com/data/manhattan_Train.csv'
manhattan = pd.read_csv(path)
manhattan = manhattan[['TotalValue', 'LotArea', 'NumFloors', 'UnitsTotal',
                       'LotFront', 'LotDepth', 'BldgFront', 'BldgDepth',
                       'BuiltFAR', 'ResidFAR', "CommFAR"]]
manhattan.head()

Unnamed: 0,TotalValue,LotArea,NumFloors,UnitsTotal,LotFront,LotDepth,BldgFront,BldgDepth,BuiltFAR,ResidFAR,CommFAR
0,327600.0,769,4.5,3,19.0,53.92,19.0,54.0,5.34,10.0,15.0
1,943650.0,1512,5.0,7,36.17,46.67,36.0,44.0,4.94,10.0,15.0
2,897300.0,2180,3.0,3,34.92,69.75,34.0,69.0,2.81,10.0,15.0
3,914400.0,2275,4.0,3,42.17,55.25,41.0,63.0,3.57,10.0,15.0
4,927900.0,1885,5.5,2,29.0,66.92,29.0,66.0,4.9,10.0,15.0


In [3]:
# Test-Train Splite
X_train, X_test, y_train, y_test = train_test_split(
                                        manhattan.drop('TotalValue', axis=1),
                                        manhattan['TotalValue'],
                                        random_state=28
)

In [4]:
# Create the ridge regression object
ridge = Ridge(alpha=100,
             max_iter=600,
             tol=1e-5,
             random_state=700)

In [5]:
# Fit the data to the model object
ridge.fit(X_train, y_train)

Ridge(alpha=100, max_iter=600, random_state=700, tol=1e-05)

In [8]:
# Predict with the training data
y_hat = ridge.predict(X_train)

In [6]:
# Calculate the goodness of fit of the model, R^2
ridge.score(X_train, y_train)

0.6085007741100154

In [9]:
# Calculate the mean squared error of the model
mean_squared_error(y_train, y_hat)

4002037864607.81

In [10]:
# Calculate the cross-validation R^2 and mae
cv = cross_validate(estimator=ridge, X=X_train, y=y_train, cv=10,
                   scoring=('r2', 'neg_mean_absolute_error'),
                   return_train_score=True)

In [12]:
# Check the CV train mae
cv['train_neg_mean_absolute_error']

array([-1090296.41898035, -1092529.53886564, -1092166.23738617,
       -1084614.73305327, -1086986.47629155, -1084696.5716497 ,
       -1107043.19397628, -1091515.74298786, -1073657.04768024,
       -1104164.32805818])

In [13]:
# Check the CV test mae
cv['test_neg_mean_absolute_error']

array([-1067422.62022572, -1067951.37678406, -1108239.32681711,
       -1122796.52312031, -1096489.21602482, -1134588.67991004,
       -1133740.81953848, -1070263.79949657, -1138389.75553896,
       -1036599.56264995])

In [14]:
# Create the regularized regression with cross-validation
ridge_cv = RidgeCV(alphas=(0.001, 0.01, 0.1), cv=10)

In [15]:
# Fit the training the data with the model
ridge_cv.fit(X_train, y_train)

RidgeCV(alphas=array([0.001, 0.01 , 0.1  ]), cv=10)

In [16]:
# Calculate the goodness of fit of the model, R^2
ridge_cv.score(X_train, y_train)

0.6085007985407218

In [17]:
# Calculate the mean squared error of the model
mean_squared_error(y_train, ridge_cv.predict(X_train))

4002037614868.8438