In [1]:
# Dependencies
import pandas as pd
import plotly.express as plt
import os
import statsmodels.api as sm

In [2]:
# Import and load datasets
bikecounts_file = os.path.join('../','Resources', 'bikecounts.csv')
bikecounts_df = pd.read_csv(bikecounts_file)
bikecounts_df['Day'] = pd.to_datetime(bikecounts_df['Day'])
bikecounts_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47 (S),521.0,1067,1440.0,1307.0,4335
4,4,2016-04-05,2016-04-05,42.1,26.1,0,1416.0,2617,3081.0,2357.0,9471


In [3]:
# create a function to remove and replace unwanted values
def replace_unwanted_values(value_to_replace):
    replaced_value = value_to_replace.replace({'T':'0','0.47 (S)':'0.47'})
    return replaced_value

In [4]:
# applying the function created and converting it to float
bikecounts_df['Precipitation']=(replace_unwanted_values(bikecounts_df['Precipitation'])).astype('float64')

#find the median of the high and low temperatures
bikecounts_df['Median Temp (°F)'] = (bikecounts_df['High Temp (°F)'] + bikecounts_df['Low Temp (°F)'])/2
bikecounts_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total,Median Temp (°F)
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497,72.05
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922,51.95
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759,36.95
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47,521.0,1067,1440.0,1307.0,4335,38.6
4,4,2016-04-05,2016-04-05,42.1,26.1,0.0,1416.0,2617,3081.0,2357.0,9471,34.1


# HYPOTHESIS
1. The number of cycles crossing the Brooklyn Bridge is dependent on Precipitation and Temperature
1. Higher number of cycles crossing Brooklyn bridge on days of lower precipitation and high values of Median Temperature.

## NULL HYPOTHESIS
1. If the number of bikes crossing the Brooklyn Bridge does not depend on Precipitation and Temperature, higher or lower precipitation and temperature does not affect the number of bikes crossing the Brooklyn Bridge.

In [5]:
independent_variables=bikecounts_df[['Median Temp (°F)', 'Precipitation']]
dependent_value=bikecounts_df['Brooklyn Bridge']
X=sm.add_constant(independent_variables)
regression_model=sm.OLS(dependent_value,X)
results=regression_model.fit()

In [6]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:        Brooklyn Bridge   R-squared:                       0.605
Model:                            OLS   Adj. R-squared:                  0.576
Method:                 Least Squares   F-statistic:                     20.70
Date:                Fri, 08 Oct 2021   Prob (F-statistic):           3.55e-06
Time:                        22:23:29   Log-Likelihood:                -235.22
No. Observations:                  30   AIC:                             476.4
Df Residuals:                      27   BIC:                             480.6
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const             -237.9800    697.754  

In [7]:
results.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.576
Dependent Variable:,Brooklyn Bridge,AIC:,476.4342
Date:,2021-10-08 22:23,BIC:,480.6378
No. Observations:,30,Log-Likelihood:,-235.22
Df Model:,2,F-statistic:,20.7
Df Residuals:,27,Prob (F-statistic):,3.55e-06
R-squared:,0.605,Scale:,420260.0

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
const,-237.9800,697.7538,-0.3411,0.7357,-1669.6526,1193.6925
Median Temp (°F),51.1740,12.4783,4.1010,0.0003,25.5705,76.7774
Precipitation,-4395.3591,1208.1137,-3.6382,0.0011,-6874.2036,-1916.5145

0,1,2,3
Omnibus:,3.614,Durbin-Watson:,1.016
Prob(Omnibus):,0.164,Jarque-Bera (JB):,2.743
Skew:,-0.741,Prob(JB):,0.254
Kurtosis:,3.026,Condition No.:,571.0


## CONCLUSION
Since the p-values of Precipitation (0.0011) and Median Temperature (0.0003) are very close to 0, it is therefore safe to disprove the Null-Hypothesis. Hence, there is a relationship between bikes crossing Brooklyn and Median temperature of the day as well as Precipitation