Build a regression model.

In [2]:
import numpy as np
import pandas as pd
from sklearn import linear_model, datasets

In [3]:
fsq_van_bikes_df = pd.read_csv('../data/fsq_van_bikes_df.csv')
fsq_van_bikes_df.describe()

Unnamed: 0,total_bikes,tattoo_500m_rad,bars_500m_rad,cafes_500m_rad,restaurants_500m_rad,parks_500m_rad
count,241.0,241.0,241.0,241.0,241.0,241.0
mean,18.701245,1.46888,12.80083,17.195021,36.211618,3.929461
std,5.341851,1.897385,14.832459,14.65518,15.991692,2.355668
min,11.0,0.0,0.0,0.0,0.0,0.0
25%,15.0,0.0,3.0,6.0,25.0,2.0
50%,18.0,1.0,7.0,13.0,43.0,4.0
75%,20.0,2.0,15.0,23.0,50.0,6.0
max,40.0,8.0,50.0,50.0,50.0,13.0


In [21]:
import statsmodels.api as sm

y = fsq_van_bikes_df['total_bikes']
indep = fsq_van_bikes_df.drop(['total_bikes','name','lat_long'], axis=1)

X = [sm.add_constant(indep[column]) for column in indep.columns] 
X[0]


Unnamed: 0,const,tattoo_500m_rad
0,1.0,3
1,1.0,6
2,1.0,5
3,1.0,2
4,1.0,0
...,...,...
236,1.0,0
237,1.0,0
238,1.0,0
239,1.0,5


Provide model output and an interpretation of the results. 

In [22]:
Models = [sm.OLS(y,x) for x in X] #list of models
Results = [model.fit() for model in Models] #list of results
Adj_Rsquared = [results.rsquared_adj for results in Results] #list of rsquared
Pval = [results.pvalues for results in Results] #list of p-values
Params = [results.params for results in Results] #list of params

In [23]:
for i in range(len(Adj_Rsquared)):
     print(f'adj_R2: {Adj_Rsquared[i]:.3f}, P-values: {*Pval[i],}, column: {indep.columns[i]}')

adj_R2: -0.003, P-values: (1.5290853664255428e-113, 0.6125014942183048), column: tattoo_500m_rad
adj_R2: 0.017, P-values: (7.89371360765777e-108, 0.023836086375881128), column: bars_500m_rad
adj_R2: 0.022, P-values: (1.048863217241922e-92, 0.011777493021707115), column: cafes_500m_rad
adj_R2: -0.002, P-values: (8.101821547957382e-57, 0.4436968771090508), column: restaurants_500m_rad
adj_R2: -0.000, P-values: (1.0464888136548998e-74, 0.3259011592447463), column: parks_500m_rad


In [25]:
remaining_var = fsq_van_bikes_df[['bars_500m_rad', 'cafes_500m_rad']]
z = sm.add_constant(remaining_var)
z


Unnamed: 0,const,bars_500m_rad,cafes_500m_rad
0,1.0,5,25
1,1.0,50,24
2,1.0,40,50
3,1.0,4,18
4,1.0,7,26
...,...,...,...
236,1.0,0,5
237,1.0,8,31
238,1.0,0,3
239,1.0,33,33


In [27]:
Model = sm.OLS(y,z)
Results = Model.fit()
Adj_Rsquared = Results.rsquared_adj
Pval = Results.pvalues

print(f'adj_R2: {Adj_Rsquared}, P-values: {Pval}, column: {remaining_var.columns}')

adj_R2: 0.018131076747166297, P-values: const             2.000662e-90
bars_500m_rad     9.003249e-01
cafes_500m_rad    2.639289e-01
dtype: float64, column: Index(['bars_500m_rad', 'cafes_500m_rad'], dtype='object')


# Stretch

How can you turn the regression model into a classification model?