In [1]:
# Import dependencies for lasso regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LassoCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_validate

In [2]:
# Loading data from the web into pd dataframe
path = 'https://jaredlander.com/data/manhattan_Train.csv'
manhattan = pd.read_csv(path)
manhattan = manhattan[['TotalValue', 'LotArea', 'NumFloors', 'UnitsTotal',
                       'LotFront', 'LotDepth', 'BldgFront', 'BldgDepth',
                       'BuiltFAR', 'ResidFAR', "CommFAR"]]
manhattan.head()

Unnamed: 0,TotalValue,LotArea,NumFloors,UnitsTotal,LotFront,LotDepth,BldgFront,BldgDepth,BuiltFAR,ResidFAR,CommFAR
0,327600.0,769,4.5,3,19.0,53.92,19.0,54.0,5.34,10.0,15.0
1,943650.0,1512,5.0,7,36.17,46.67,36.0,44.0,4.94,10.0,15.0
2,897300.0,2180,3.0,3,34.92,69.75,34.0,69.0,2.81,10.0,15.0
3,914400.0,2275,4.0,3,42.17,55.25,41.0,63.0,3.57,10.0,15.0
4,927900.0,1885,5.5,2,29.0,66.92,29.0,66.0,4.9,10.0,15.0


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
                                        manhattan.drop('TotalValue', axis=1),
                                        manhattan['TotalValue'],
                                        random_state=42
)

In [4]:
# Create the lasso regression object
lasso = Lasso(alpha=100,
             max_iter=500,
             tol=1e-3,
             random_state=800)

In [5]:
# Fit the data to the model object
lasso.fit(X_train, y_train)

Lasso(alpha=100, max_iter=500, random_state=800, tol=0.001)

In [6]:
# Predict with the training data
y_hat = lasso.predict(X_train)

In [7]:
# Calculate the goodness of fit of the model, R^2
lasso.score(X_train, y_train)

0.6099028999977545

In [8]:
# Calculate the mean squared error of the model
mean_squared_error(y_hat, y_train)

4020142919299.2637

In [10]:
# Calculate the cross-validation R^2 and mse
cv = cross_validate(estimator=lasso, X=X_train, y=y_train, cv=8,
                   scoring=('r2', 'neg_mean_squared_error'),
                   return_train_score=True)

In [11]:
# Check the CV train R^2
cv['train_r2']

array([0.60520274, 0.60748689, 0.61442403, 0.60937997, 0.60745066,
       0.60670995, 0.61431741, 0.61786227])

In [12]:
# Check the CV test R^2
cv['test_r2']

array([0.64219354, 0.62696012, 0.57229938, 0.6074151 , 0.62461146,
       0.6293562 , 0.57115546, 0.55314649])

In [13]:
# Create the regularized regression with cross-validation
lasso_cv = LassoCV(cv=8)

In [14]:
# Fit the training the data with the model
lasso_cv.fit(X_train, y_train)

LassoCV(cv=8)

In [15]:
# Calculate the goodness of fit of the model, R^2
lasso_cv.score(X_train, y_train)

0.34826575824229056

In [16]:
# Calculate the mean squared error of the model
mean_squared_error(y_train, lasso_cv.predict(X_train))

6716442642747.276