# Gradient Descent Models

We will predict the price (`price` column) of an AirBNB dataset used last week.

**Therefore, our unit of analysis is an AIRBNB LISTING**

## 1. Setup

In [68]:
# Common imports
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import SGDRegressor 
from sklearn.dummy import DummyRegressor

np.random.seed(1)

# 2. Load the data

We will use the AirBNB data that we cleaned in last class (the original, not the one that you altered for last weeks exercise).

In [69]:
X_train = pd.read_csv("airbnb_train_X_price.csv")
X_test = pd.read_csv("airbnb_test_X_price.csv")
y_train = pd.read_csv("airbnb_train_y_price.csv")
y_test = pd.read_csv("airbnb_test_y_price.csv")

## 3. Model the data

First, we will create a dataframe to hold all the results of our models.

In [70]:
results = pd.DataFrame(y_train, columns=["actual"])

rmses = pd.DataFrame({"model": [], "rmse": []})

### 3.1 Fit and test the baseline Model

In [71]:
dummy_reg = DummyRegressor(strategy="mean")
_ = dummy_reg.fit(X_train, y_train)

results["dummy"] = dummy_reg.predict(X_train)

In [72]:
#Baseline Test RMSE
dummy_test_pred = dummy_reg.predict(X_test)
dummy_test_rmse = np.sqrt(mean_squared_error(y_test, dummy_test_pred))

rmses = pd.concat([rmses, pd.DataFrame({'model':"baseline", 'rmse': dummy_test_rmse}, index=[0])])

print(f"Baseline Test RMSE: {dummy_test_rmse:.3f}")


Baseline Test RMSE: 102.625


### 3.2  Fit and test a Linear Regression Using Stochastic Gradient Descent

In [73]:
#Stochastic Gradient Descent:
# https://scikit-learn.org/stable/modules/sgd.html
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html#sklearn.linear_model.SGDRegressor


# eta0 = learning rate
# max_iter = number of passes over training data (i.e., epochs)

sgd_reg = SGDRegressor(max_iter=100, penalty=None, eta0=0.01)
X_train=X_train.drop(columns=['price_category'])
sgd_reg.fit(X_train, np.ravel(y_train))


print(f"Number of iterations = {sgd_reg.n_iter_}")

results["SGD_preds"] = sgd_reg.predict(X_train)

Number of iterations = 46


In [74]:
#SGD Test RMSE
X_test=X_test.drop(columns=['price_category'])
SGD_test_pred = sgd_reg.predict(X_test)
SGD_test_rmse = np.sqrt(mean_squared_error(y_test, SGD_test_pred))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD", 'rmse': SGD_test_rmse}, index=[0])])

print(f"SGD Test RMSE: {SGD_test_rmse:.3f}")

SGD Test RMSE: 57.286


### 3.3 Fit and test SGDRegression using L2 Regularization

In [75]:
#Stochastic Gradient with L2 regularization:

# eta0 = learning rate
# penalty = regularization term
# alphe = regularization strength (lambda)
# max_iter = number of passes over training data (i.e., epochs)

#sklearn.linear_model.SGDRegressor(loss='squared_error', *, penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=0.001, shuffle=True, verbose=0, epsilon=0.1, random_state=None, learning_rate='invscaling', eta0=0.01, power_t=0.25, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, warm_start=False, average=False

sgd_reg_l2 = SGDRegressor(max_iter=100, penalty='l2', alpha = 0.1, eta0=0.01)
_ = sgd_reg_l2.fit(X_train, np.ravel(y_train))

# print(f"Number of iterations = {sgd_reg_l2.n_iter_}")

# results["SGD_preds_l2"] = sgd_reg_l2.predict(X_train)


In [76]:
#Train RMSE
#SGD Test RMSE
SGD_test_pred_l2 = sgd_reg_l2.predict(X_test)
SGD_test_rmse_l2 = np.sqrt(mean_squared_error(y_test, SGD_test_pred_l2))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD L2", 'rmse': SGD_test_rmse_l2}, index=[0])])

print(f"SGD Test with l2 RMSE: {SGD_test_rmse_l2:.3f}")

SGD Test with l2 RMSE: 59.598


### 3.4 Fit and test SGDRegression Using L1 Regularization

In [77]:
#Stochastic Gradient with L1 regularization:

# eta0 = learning rate
# penalty = regularization term
# alphe = regularization strength (lambda)
# max_iter = number of passes over training data (i.e., epochs)

sgd_reg_l1 = SGDRegressor(max_iter=100, penalty='l1', alpha = 0.1, eta0=0.01)
_ = sgd_reg_l1.fit(X_train, np.ravel(y_train))

print(f"Number of iterations = {sgd_reg_l1.n_iter_}")

results["SGD_preds_l1"] = sgd_reg_l1.predict(X_train)

Number of iterations = 46


In [78]:
#Train RMSE
#SGD Test RMSE with L1
SGD_test_pred_l1 = sgd_reg_l1.predict(X_test)
SGD_test_rmse_l1 = np.sqrt(mean_squared_error(y_test, SGD_test_pred_l1))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD L1", 'rmse': SGD_test_rmse_l1}, index=[0])])

print(f"SGD with l1 Test RMSE: {SGD_test_rmse_l1:.3f}")

SGD with l1 Test RMSE: 57.354


### 3.5 Fit and test SGDRegression using ElasticNet Regularization

In [79]:
#Stochastic Gradient with Elastic Net regularization:

# eta0 = learning rate
# penalty = regularization term
# alphe = regularization strength (lambda)
# max_iter = number of passes over training data (i.e., epochs)

sgd_reg_elastic = SGDRegressor(max_iter=100, penalty='elasticnet', l1_ratio=0.5, alpha = 0.1, eta0=0.01)
sgd_reg_elastic.fit(X_train, np.ravel(y_train))

print(f"Number of iterations = {sgd_reg_elastic.n_iter_}")

results["SGD_preds_elastic"] = sgd_reg_elastic.predict(X_train)

Number of iterations = 34


In [80]:
#Train RMSE
#SGD Test RMSE with ElasticNet
SGD_test_pred_elastic = sgd_reg_elastic.predict(X_test)
SGD_test_rmse_elastic = np.sqrt(mean_squared_error(y_test, SGD_test_pred_elastic))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD Elastic", 'rmse': SGD_test_rmse_elastic}, index=[0])])

print(f"SGD wt ElasticNet Test RMSE: {SGD_test_rmse_elastic:.3f}")

SGD wt ElasticNet Test RMSE: 58.570


### 3.6 Fit and test SGDRegression using Early Stopping 

In [81]:
#Stochastic Gradient with Early Stopping regularization:
# 
# # tol is the early stopping criteria

sgd_reg_es = SGDRegressor(max_iter=500, early_stopping=True, n_iter_no_change=5, tol=0.0001, validation_fraction=0.2, eta0=0.01)
sgd_reg_es.fit(X_train, np.ravel(y_train))

print(f"Number of iterations = {sgd_reg_es.n_iter_}")

results["SGD_preds_es"] = sgd_reg_es.predict(X_train)

Number of iterations = 8


In [82]:
#Train RMSE
#SGD Test RMSE with Eaarly Stopping
SGD_test_pred_es = sgd_reg_es.predict(X_test)
SGD_test_rmse_es = np.sqrt(mean_squared_error(y_test, SGD_test_pred_es))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD ES", 'rmse': SGD_test_rmse_es}, index=[0])])

print(f"SGD wt Early Stopping Test RMSE: {SGD_test_rmse_es:.3f}")

SGD wt Early Stopping Test RMSE: 58.172


## 4.0 Polynomial Regression

We've already seen an example of this. Let's now try applying this to our AirBnb data.

This is done by creating the polynomial "variables" of the existing variables, then fitting them in a regular regression model

In [83]:
from sklearn.preprocessing import PolynomialFeatures

# Create second degree terms and interaction terms
poly_features = PolynomialFeatures(degree=2).fit(X_train)
X_train_poly = poly_features.transform(X_train)
X_test_poly = poly_features.transform(X_test)

#This will create the polynomial terms of the categorical variables too (since they are encoded as numbers)

#if degree=3, then it creates all combinations: a, a^2, a^3, b, b^2, b^3, a.b, a^2.b, a.b^2, a^2.b^2 

In [84]:
#We still fit a linear regression model

poly_lin_reg = SGDRegressor(max_iter=1000, penalty=None, eta0=0.01) 
poly_lin_reg.fit(X_train_poly, np.ravel(y_train))

print(f"Number of iterations = {poly_lin_reg.n_iter_}")

results["SGD_preds_ using polynomial"] = poly_lin_reg.predict(X_train_poly)

Number of iterations = 6


In [85]:
# Train RMSE
# SGD with polynomial input
poly_test_pred = poly_lin_reg.predict(X_test_poly)
poly_test_rmse = np.sqrt(mean_squared_error(y_test, poly_test_pred))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD Poly", 'rmse': poly_test_rmse}, index=[0])])

print(f"SGD wt Polynomial input Test RMSE: {poly_test_rmse:.3f}")

SGD wt Polynomial input Test RMSE: 1751813817643.830


The RMSE result from the polynomial is very large, a strong indicator that this may not be a good model. The problem is most likely related to having mamy coeficients that are not significant. We can use Lasso to reduce the size of some of the coeficients, or reduce the degree of the polynomial.

In [86]:
poly_lin_reg_l1 = SGDRegressor(max_iter=1000, penalty='l1', alpha=0.5,  eta0=0.01) 
poly_lin_reg_l1.fit(X_train_poly, np.ravel(y_train))

print(f"Number of iterations = {poly_lin_reg_l1.n_iter_}")

results["SGD_preds_ using polynomial with l1"] = poly_lin_reg_l1.predict(X_train_poly)

poly_test_pred_l1 = poly_lin_reg_l1.predict(X_test_poly)
poly_test_rmse_l1 = np.sqrt(mean_squared_error(y_test, poly_test_pred_l1))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD Poly l1", 'rmse': poly_test_rmse_l1}, index=[0])])

print(f"SGD wt Polynomial input l1 regularization Test RMSE: {poly_test_rmse_l1:.3f}")

Number of iterations = 14
SGD wt Polynomial input l1 regularization Test RMSE: 293879615419.934


In [87]:
poly_lin_reg_l2 = SGDRegressor(max_iter=1000, penalty='l2', alpha=0.5,  eta0=0.01) 
poly_lin_reg_l2.fit(X_train_poly, np.ravel(y_train))

print(f"Number of iterations = {poly_lin_reg_l2.n_iter_}")

results["SGD_preds_ using polynomial with l2"] = poly_lin_reg_l2.predict(X_train_poly)

poly_test_pred_l2 = poly_lin_reg_l2.predict(X_test_poly)
poly_test_rmse_l2 = np.sqrt(mean_squared_error(y_test, poly_test_pred_l2))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD Poly l2", 'rmse': poly_test_rmse_l2}, index=[0])])

print(f"SGD wt Polynomial input l2 regularization Test RMSE: {poly_test_rmse_l2:.3f}")

Number of iterations = 15
SGD wt Polynomial input l2 regularization Test RMSE: 323895794292.400


In [88]:
poly_lin_reg_elastic = SGDRegressor(max_iter=1000, penalty='elasticnet', l1_ratio=.5, alpha=0.5,  eta0=0.01) 
poly_lin_reg_elastic.fit(X_train_poly, np.ravel(y_train))

print(f"Number of iterations = {poly_lin_reg_elastic.n_iter_}")

results["SGD_preds_ using polynomial with elastic net"] = poly_lin_reg_elastic.predict(X_train_poly)

poly_test_pred_elastic = poly_lin_reg_l1.predict(X_test_poly)
poly_test_rmse_elastic= np.sqrt(mean_squared_error(y_test, poly_test_pred_elastic))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD Poly elastic", 'rmse': poly_test_rmse_elastic}, index=[0])])

print(f"SGD wt Polynomial input elastic net regularization Test RMSE: {poly_test_rmse_elastic:.3f}")

Number of iterations = 6
SGD wt Polynomial input elastic net regularization Test RMSE: 293879615419.934


In [89]:
#Stochastic Gradient with Early Stopping regularization:
# 
# # tol is the early stopping criteria

sgd_reg_es_poly = SGDRegressor(max_iter=500, early_stopping=True)
sgd_reg_es_poly.fit(X_train_poly, np.ravel(y_train))

print(f"Number of iterations = {sgd_reg_es_poly.n_iter_}")

results["sgd_reg_es_poly"] = sgd_reg_es_poly.predict(X_train_poly)

Number of iterations = 10


In [90]:
#Train RMSE
#SGD Test RMSE with Eaarly Stopping
SGD_test_pred_es_poly = sgd_reg_es_poly.predict(X_test_poly)
SGD_test_pred_es_poly = np.sqrt(mean_squared_error(y_test, SGD_test_pred_es_poly))

rmses = pd.concat([rmses, pd.DataFrame({'model':"SGD ES ploy", 'rmse': SGD_test_pred_es_poly}, index=[0])])

print(f"SGD wt Early Stopping Test RMSE: {SGD_test_pred_es_poly:.3f}")

SGD wt Early Stopping Test RMSE: 1660040251149.665


In [91]:
results

Unnamed: 0,actual,dummy,SGD_preds,SGD_preds_l1,SGD_preds_elastic,SGD_preds_es,SGD_preds_ using polynomial,SGD_preds_ using polynomial with l1,SGD_preds_ using polynomial with l2,SGD_preds_ using polynomial with elastic net,sgd_reg_es_poly
0,,166.546624,249.565962,252.457911,243.102832,251.943191,9.419895e+10,3.816165e+10,1.680854e+11,-9.022714e+10,-2.995860e+11
1,,166.546624,249.768089,251.993255,239.858208,252.145687,2.446842e+11,4.836272e+11,5.218145e+09,3.645263e+11,-1.676677e+11
2,,166.546624,231.252827,231.226838,219.125122,229.405656,6.456501e+11,5.656021e+09,2.388699e+11,2.654888e+11,-9.404306e+11
3,,166.546624,74.649825,73.988271,75.849447,78.818278,-4.759169e+10,1.123824e+11,3.108923e+11,-1.756905e+11,-2.372206e+10
4,,166.546624,94.513466,96.399080,106.522560,99.986766,5.470134e+11,2.657246e+11,1.655537e+11,4.310337e+10,-8.265141e+11
...,...,...,...,...,...,...,...,...,...,...,...
2483,,166.546624,68.277087,73.114627,74.199526,75.729599,-1.060756e+10,2.796130e+11,1.915206e+11,3.091634e+11,-8.606628e+10
2484,,166.546624,81.989970,79.509789,85.507881,91.722860,1.255416e+10,1.859497e+11,2.626738e+11,-2.157456e+11,-1.816932e+11
2485,,166.546624,233.776809,231.486506,220.904647,233.751342,-9.358796e+10,2.161722e+11,4.080996e+11,-3.493911e+11,9.500508e+10
2486,,166.546624,72.666509,72.395952,69.442214,81.683539,1.264260e+11,1.479414e+11,3.093240e+11,-4.297890e+11,6.530024e+10


In the given example, the SGD model with early_stop = True achieved an SSE of 1.660040e+12, which is considerably lower than the SSE of the non-early stopping models (SGD Poly). This indicates that early stopping helped to prevent overfitting and resulted in a model that generalized better to new data. But not necessarily lower than the SSE values of the other regularized linear regression models (SGD Ploy L1,  SGD Poly L2, SGD Ploy Elastic) with Lasso and Elastic Net regularization. We can produce a model with a better SSE walue using Early Stop by changing the hyper parameter but that would be at the risk of Overfitting. 

## 5.0 Summary

In [92]:
rmses

Unnamed: 0,model,rmse
0,baseline,102.6252
0,SGD,57.2865
0,SGD L2,59.59775
0,SGD L1,57.35449
0,SGD Elastic,58.57045
0,SGD ES,58.17222
0,SGD Poly,1751814000000.0
0,SGD Poly l1,293879600000.0
0,SGD Poly l2,323895800000.0
0,SGD Poly elastic,293879600000.0
