# Q1. What are the main factors determining when people cycle?

# Investigation into regression analysis

## Prepare workstation and import files

In [1]:
# Import libraries and packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import sklearn

from sklearn import datasets
from sklearn import linear_model
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from statsmodels.formula.api import ols

Rationale for libraries and packages:
- Numpy: used for statistical calculations
- Pandas: used for data manipulation and analysis
- Statsmodels: used to get OLS to fit a regression line and to calculate variance inflation factor
- Sklearn: used to calculate simple and multiple linear regression
- Matplotlib: used for plotting functions
- Seaborn: used for plotting functions

In [2]:
# Turn off warnings
import warnings
warnings.filterwarnings("ignore")

Rationale: Warning usually occur when there are obsolete programming elements. Warnings have been turned off to make
the code cleaner. However, this will still allow errors to appear.

In [3]:
# Import data files
central_london = pd.read_csv("Central London.csv")
inner_london = pd.read_csv("Inner London.csv")
outer_london = pd.read_csv("Outer London.csv")

## Explore and clean raw data

### Central London

In [4]:
# Print shape
print(central_london.shape)

(1048366, 17)


In [5]:
# Examine columns
print(central_london.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048366 entries, 0 to 1048365
Data columns (total 17 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   Survey wave (calendar quarter)  758163 non-null  object 
 1   Equivalent financial quarter    758163 non-null  object 
 2   Site ID                         758163 non-null  object 
 3   Location                        758163 non-null  object 
 4   Survey date                     748007 non-null  object 
 5   Weather                         746329 non-null  object 
 6   Time                            758163 non-null  object 
 7   Period                          758163 non-null  object 
 8   Direction                       758163 non-null  object 
 9   Start hour                      758163 non-null  float64
 10  Start minute                    758163 non-null  float64
 11  Number of private cycles        758099 non-null  float64
 12  Number of cycl

In [6]:
# Check for missing values
print(central_london.isnull().sum())

Survey wave (calendar quarter)     290203
Equivalent financial quarter       290203
Site ID                            290203
Location                           290203
Survey date                        300359
Weather                            302037
Time                               290203
Period                             290203
Direction                          290203
Start hour                         290203
Start minute                       290203
Number of private cycles           290267
Number of cycle hire bikes         290267
Total cycles                       290203
Unnamed: 14                       1048366
Unnamed: 15                       1048366
Unnamed: 16                       1048366
dtype: int64


In [7]:
# Drop unnamed columns from data frame
central_london_clean = central_london.drop(['Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16'], axis=1)
print(central_london_clean.head())

  Survey wave (calendar quarter) Equivalent financial quarter   Site ID  \
0        2014 Q1 (January-March)                   2013-14 Q4  CENCY001   
1        2014 Q1 (January-March)                   2013-14 Q4  CENCY001   
2        2014 Q1 (January-March)                   2013-14 Q4  CENCY001   
3        2014 Q1 (January-March)                   2013-14 Q4  CENCY001   
4        2014 Q1 (January-March)                   2013-14 Q4  CENCY001   

                             Location    Survey date Weather         Time  \
0  Millbank (south of Thorney Street)  ven, 24/01/14     Dry  0600 - 0615   
1  Millbank (south of Thorney Street)  ven, 24/01/14     Dry  0615 - 0630   
2  Millbank (south of Thorney Street)  ven, 24/01/14     Dry  0630 - 0645   
3  Millbank (south of Thorney Street)  ven, 24/01/14     Dry  0645 - 0700   
4  Millbank (south of Thorney Street)  ven, 24/01/14     Dry  0700 - 0715   

                        Period   Direction  Start hour  Start minute  \
0  Early Morni

In [8]:
# Print values for Survey wave (calendar quarter)
central_london_clean['Survey wave (calendar quarter)'].value_counts()

2020 Q3 (July-September)       26880
2020 Q1 (January-March)        26880
2019 Q2 (April-June)           26880
2019 Q1 (January-March)        26880
2018 Q4 (October-December)     26880
2018 Q3 (July-September)       26880
2018 Q2 (April-June)           26880
2017 Q4 (October-December)     26752
2017 Q3 (July-September)       26752
2019 Q4 (October-December)     26752
2019 Q3 (July-September)       26752
2021 Q4 (October-December)     26624
2021 Q3 (July-September)       26584
2017 Q1 (January-March)        25728
2015 Q4 (October-December)     25728
2014 Q3 (July-September)       25728
2014 Q4 (October-December)     25728
2015 Q1 (January-March)        25728
2015 Q2 (April-June)           25728
2017 Q2 (April-June)           25728
2015 Q3 (July-September)       25728
2016 Q2 (April-June)           25728
2018 Q1 (January-March)        25728
2014 Q2 (April-June)           25728
2016 Q4 (October-December)     25728
2016 Q1 (January-March)        25600
2016 Q3 (July-September)       25600
2

In [9]:
# Create Month column
central_london_clean['Month'] = central_london_clean['Survey date'].str.split('/').str[1]

In [10]:
# Check values of new Month column
print(central_london_clean['Month'].shape)
print(central_london_clean['Month'].value_counts())
print(central_london_clean['Month'].isnull().sum())

(1048366,)
11    77101
09    76192
01    71934
08    69512
06    67257
07    60838
03    58606
05    56326
12    55862
02    51936
04    51132
10    51055
Name: Month, dtype: int64
300615


In [11]:
# Examine what Survey date looks like when Month is missing but Survey date is not
sub_df = central_london_clean[central_london_clean['Month'].isnull() & ~ central_london_clean['Survey date'].isnull()]
print(sub_df['Survey date'])

654784     
654785     
654786     
654787     
654788     
         ..
664251     
664252     
664253     
664254     
664255     
Name: Survey date, Length: 256, dtype: object


Observations: There are 256 survey date variables that are formatted incorrectly, thus resulting in 256 missing Month values. Given the large size of the data set, these values will be ignored rather than fixing them.

In [12]:
# Create Year column
central_london_clean['Year'] = central_london_clean['Survey date'].str.split('/').str[2]

In [13]:
# Check values of new Year column
print(central_london_clean['Year'].shape)
print(central_london_clean['Year'].value_counts())
print(central_london_clean['Year'].isnull().sum())

(1048366,)
19    106880
18    105300
14    102540
17    102444
16    101632
15    100916
21     77651
20     50388
Name: Year, dtype: int64
300615


In [14]:
# Create quarter column
central_london_clean['Quarter'] = central_london_clean['Survey wave (calendar quarter)'].str.split(" ").str[1]

In [15]:
# Check values of new Quarter column
print(central_london_clean['Quarter'].shape)
print(central_london_clean['Quarter'].value_counts())
print(central_london_clean['Quarter'].isnull().sum())

(1048366,)
Q3    211032
Q4    184192
Q1    181952
Q2    180987
Name: Quarter, dtype: int64
290203


In [16]:
# Check dtype of new variables
central_london_clean.dtypes

Survey wave (calendar quarter)     object
Equivalent financial quarter       object
Site ID                            object
Location                           object
Survey date                        object
Weather                            object
Time                               object
Period                             object
Direction                          object
Start hour                        float64
Start minute                      float64
Number of private cycles          float64
Number of cycle hire bikes        float64
Total cycles                      float64
Month                              object
Year                               object
Quarter                            object
dtype: object

In [17]:
# Change Month and Year to float
central_london_clean['Month'] = central_london_clean['Month'].astype(float)
central_london_clean['Year'] = central_london_clean['Year'].astype(float)

print(central_london_clean.dtypes)

Survey wave (calendar quarter)     object
Equivalent financial quarter       object
Site ID                            object
Location                           object
Survey date                        object
Weather                            object
Time                               object
Period                             object
Direction                          object
Start hour                        float64
Start minute                      float64
Number of private cycles          float64
Number of cycle hire bikes        float64
Total cycles                      float64
Month                             float64
Year                              float64
Quarter                            object
dtype: object


In [18]:
# Create Region column
central_london_clean['Region'] = "Central London"

In [19]:
# Check values of new Region column
central_london_clean['Region'].value_counts()

Central London    1048366
Name: Region, dtype: int64

**Outer London**

In [20]:
# Print shape
print(outer_london.shape)

(375660, 14)


In [21]:
# Examine first few rows of data
print(outer_london.head())

   Survey wave (year)   Site ID          Location    Survey date Weather  \
0                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
1                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
2                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
3                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
4                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   

          Time                       Period   Direction  Start hour  \
0  0600 - 0615  Early Morning (06:00-07:00)  Northbound           6   
1  0615 - 0630  Early Morning (06:00-07:00)  Northbound           6   
2  0630 - 0645  Early Morning (06:00-07:00)  Northbound           6   
3  0645 - 0700  Early Morning (06:00-07:00)  Northbound           6   
4  0700 - 0715        AM peak (07:00-10:00)  Northbound           7   

   Start minute  Number of male cycles  Number of female cycles  \
0             0                      2           

In [22]:
# Examine columns
print(outer_london.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 375660 entries, 0 to 375659
Data columns (total 14 columns):
 #   Column                    Non-Null Count   Dtype 
---  ------                    --------------   ----- 
 0   Survey wave (year)        375660 non-null  int64 
 1   Site ID                   375660 non-null  object
 2   Location                  375660 non-null  object
 3   Survey date               374492 non-null  object
 4   Weather                   374692 non-null  object
 5   Time                      375660 non-null  object
 6   Period                    375660 non-null  object
 7   Direction                 375660 non-null  object
 8   Start hour                375660 non-null  int64 
 9   Start minute              375660 non-null  int64 
 10  Number of male cycles     375660 non-null  int64 
 11  Number of female cycles   375660 non-null  int64 
 12  Number of unknown cycles  375660 non-null  int64 
 13  Total cycles              375660 non-null  int64 
dtypes: i

In [23]:
# Check for missing values
print(outer_london.isnull().sum())

Survey wave (year)             0
Site ID                        0
Location                       0
Survey date                 1168
Weather                      968
Time                           0
Period                         0
Direction                      0
Start hour                     0
Start minute                   0
Number of male cycles          0
Number of female cycles        0
Number of unknown cycles       0
Total cycles                   0
dtype: int64


In [24]:
# Create Month column
outer_london['Month'] = outer_london['Survey date'].str.split('/').str[1]

In [25]:
# Check values of new Month column
print(outer_london['Month'].shape)
print(outer_london['Month'].value_counts())
print(outer_london['Month'].isnull().sum())

(375660,)
06    125316
05    116664
04     54884
07     46768
10     16992
09     13580
08       160
11       128
Name: Month, dtype: int64
1168


In [26]:
# Create Year column
outer_london['Year'] = outer_london['Survey date'].str.split('/').str[2]

In [27]:
# Check values of new Year column
print(outer_london['Year'].shape)
print(outer_london['Year'].value_counts())
print(outer_london['Year'].isnull().sum())

(375660,)
18    57728
19    57584
21    57472
15    57344
16    57344
17    57344
20    29676
Name: Year, dtype: int64
1168


In [28]:
# Check dtypes
outer_london.dtypes

Survey wave (year)           int64
Site ID                     object
Location                    object
Survey date                 object
Weather                     object
Time                        object
Period                      object
Direction                   object
Start hour                   int64
Start minute                 int64
Number of male cycles        int64
Number of female cycles      int64
Number of unknown cycles     int64
Total cycles                 int64
Month                       object
Year                        object
dtype: object

In [29]:
# Change Month and Year to float
outer_london['Month'] = outer_london['Month'].astype(float)
outer_london['Year'] = outer_london['Year'].astype(float)
print(outer_london.dtypes)

Survey wave (year)            int64
Site ID                      object
Location                     object
Survey date                  object
Weather                      object
Time                         object
Period                       object
Direction                    object
Start hour                    int64
Start minute                  int64
Number of male cycles         int64
Number of female cycles       int64
Number of unknown cycles      int64
Total cycles                  int64
Month                       float64
Year                        float64
dtype: object


In [30]:
# Create a Quarter column

# Create list of conditions
conditions = [
    (outer_london['Month'] > 0) & (outer_london['Month'] <= 3),
    (outer_london['Month'] > 3) & (outer_london['Month'] <= 6),
    (outer_london['Month'] > 6) & (outer_london['Month'] <= 9),
    (outer_london['Month'] > 9) & (outer_london['Month'] <= 12)
]

# Create a list of values to assign for each condition
values = ['Q1', 'Q2', 'Q3', 'Q4']

# Create new 'Quarter' column and use np.select to assign values to it
outer_london['Quarter'] = np.select(conditions, values)

# Display updated DataFrame
print(outer_london.Quarter.value_counts())
print(outer_london.head())
print(outer_london.tail())

Q2    296864
Q3     60508
Q4     17120
0       1168
Name: Quarter, dtype: int64
   Survey wave (year)   Site ID          Location    Survey date Weather  \
0                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
1                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
2                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
3                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   
4                2015  OUTCY001  High Road Leyton  ven, 26/06/15     Dry   

          Time                       Period   Direction  Start hour  \
0  0600 - 0615  Early Morning (06:00-07:00)  Northbound           6   
1  0615 - 0630  Early Morning (06:00-07:00)  Northbound           6   
2  0630 - 0645  Early Morning (06:00-07:00)  Northbound           6   
3  0645 - 0700  Early Morning (06:00-07:00)  Northbound           6   
4  0700 - 0715        AM peak (07:00-10:00)  Northbound           7   

   Start minute  Number of male cycl

In [31]:
# Create Region column
outer_london['Region'] = "Outer London"

In [32]:
# Check Region column
outer_london['Region'].value_counts()

Outer London    375660
Name: Region, dtype: int64

### Inner London

In [33]:
# Print shape
print(inner_london.shape)

(615168, 13)


In [34]:
# Examine first few rows of data
print(inner_london.head())

   Survey wave (year)   Site ID    Location    Survey date Weather  \
0              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
1              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
2              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
3              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
4              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   

          Time                       Period   Direction  Start hour  \
0  0600 - 0615  Early Morning (06:00-07:00)  Northbound         6.0   
1  0615 - 0630  Early Morning (06:00-07:00)  Northbound         6.0   
2  0630 - 0645  Early Morning (06:00-07:00)  Northbound         6.0   
3  0645 - 0700  Early Morning (06:00-07:00)  Northbound         6.0   
4  0700 - 0715        AM peak (07:00-10:00)  Northbound         7.0   

   Start minute  Number of private cycles  Number of cycle hire bikes  \
0           0.0                       1.0                         0.0   
1     

In [35]:
# Examine columns
print(inner_london.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 615168 entries, 0 to 615167
Data columns (total 13 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Survey wave (year)          523776 non-null  float64
 1   Site ID                     523776 non-null  object 
 2   Location                    523776 non-null  object 
 3   Survey date                 521024 non-null  object 
 4   Weather                     519102 non-null  object 
 5   Time                        523770 non-null  object 
 6   Period                      523770 non-null  object 
 7   Direction                   523776 non-null  object 
 8   Start hour                  523770 non-null  float64
 9   Start minute                523770 non-null  float64
 10  Number of private cycles    523776 non-null  float64
 11  Number of cycle hire bikes  523776 non-null  float64
 12  Total cycles                523776 non-null  float64
dtypes: float64(6),

In [36]:
# Check for missing values
print(inner_london.isnull().sum())

Survey wave (year)            91392
Site ID                       91392
Location                      91392
Survey date                   94144
Weather                       96066
Time                          91398
Period                        91398
Direction                     91392
Start hour                    91398
Start minute                  91398
Number of private cycles      91392
Number of cycle hire bikes    91392
Total cycles                  91392
dtype: int64


In [37]:
# Create Month column
inner_london['Month'] = inner_london['Survey date'].str.split('/').str[1]

In [38]:
# Check values of new Month column
print(inner_london['Month'].shape)
print(inner_london['Month'].value_counts())
print(inner_london['Month'].isnull().sum())

(615168,)
06    149524
05    149140
07     89840
04     81278
09     26338
10     23902
11       704
08       208
03        90
Name: Month, dtype: int64
94144


In [39]:
# Create Year column
inner_london['Year'] = inner_london['Survey date'].str.split('/').str[2]

In [40]:
# Check values of new Year column
print(inner_london['Year'].shape)
print(inner_london['Year'].value_counts())
print(inner_london['Year'].isnull().sum())

(615168,)
21    91392
15    76032
16    76032
18    76032
19    75968
17    75264
20    50304
Name: Year, dtype: int64
94144


In [41]:
# Check dtypes
inner_london.dtypes

Survey wave (year)            float64
Site ID                        object
Location                       object
Survey date                    object
Weather                        object
Time                           object
Period                         object
Direction                      object
Start hour                    float64
Start minute                  float64
Number of private cycles      float64
Number of cycle hire bikes    float64
Total cycles                  float64
Month                          object
Year                           object
dtype: object

In [42]:
# Change Month and Year to float
inner_london['Month'] = inner_london['Month'].astype(float)
inner_london['Year'] = inner_london['Year'].astype(float)
print(inner_london.dtypes)

Survey wave (year)            float64
Site ID                        object
Location                       object
Survey date                    object
Weather                        object
Time                           object
Period                         object
Direction                      object
Start hour                    float64
Start minute                  float64
Number of private cycles      float64
Number of cycle hire bikes    float64
Total cycles                  float64
Month                         float64
Year                          float64
dtype: object


In [43]:
# Create a Quarter column

# Create list of conditions
conditions = [
    (inner_london['Month'] > 0) & (inner_london['Month'] <= 3),
    (inner_london['Month'] > 3) & (inner_london['Month'] <= 6),
    (inner_london['Month'] > 6) & (inner_london['Month'] <= 9),
    (inner_london['Month'] > 9) & (inner_london['Month'] <= 12)
]

# Create a list of values to assign for each condition
values = ['Q1', 'Q2', 'Q3', 'Q4']

# Create new 'Seasons' column and use np.select to assign values to it
inner_london['Quarter'] = np.select(conditions, values)

# Display updated DataFrame
print(inner_london.Quarter.value_counts())
print(inner_london.head())
print(inner_london.tail())

Q2    379942
Q3    116386
0      94144
Q4     24606
Q1        90
Name: Quarter, dtype: int64
   Survey wave (year)   Site ID    Location    Survey date Weather  \
0              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
1              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
2              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
3              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   
4              2015.0  INNCY001  Grove Road  mer, 20/05/15     Dry   

          Time                       Period   Direction  Start hour  \
0  0600 - 0615  Early Morning (06:00-07:00)  Northbound         6.0   
1  0615 - 0630  Early Morning (06:00-07:00)  Northbound         6.0   
2  0630 - 0645  Early Morning (06:00-07:00)  Northbound         6.0   
3  0645 - 0700  Early Morning (06:00-07:00)  Northbound         6.0   
4  0700 - 0715        AM peak (07:00-10:00)  Northbound         7.0   

   Start minute  Number of private cycles  Number of cycle 

In [44]:
# Create Region column
inner_london['Region'] = "Inner London"

In [45]:
# Check Region column
inner_london['Region'].value_counts()

Inner London    615168
Name: Region, dtype: int64

## Linear relationship analysis

### Central London

**Private cycles**

In [46]:
# Define the dependent variable
y= central_london_clean['Total cycles']

In [47]:
# Define the indepedent variable
x= central_london_clean['Number of private cycles']

In [48]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= central_london_clean).fit()

In [49]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.993
Model:,OLS,Adj. R-squared:,0.993
Method:,Least Squares,F-statistic:,109700000.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:38,Log-Likelihood:,-1731600.0
No. Observations:,758099,AIC:,3463000.0
Df Residuals:,758097,BIC:,3463000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5193,0.003,165.748,0.000,0.513,0.525
x,1.0884,0.000,1.05e+04,0.000,1.088,1.089

0,1,2,3
Omnibus:,619814.141,Durbin-Watson:,0.846
Prob(Omnibus):,0.0,Jarque-Bera (JB):,76860772.388
Skew:,3.279,Prob(JB):,0.0
Kurtosis:,51.89,Cond. No.,34.6


**Cycle hires**

In [50]:
# Define the dependent variable
y= central_london_clean['Total cycles']

In [51]:
# Define the indepedent variable
x= central_london_clean['Number of cycle hire bikes']

In [52]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= central_london_clean).fit()

In [53]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.571
Model:,OLS,Adj. R-squared:,0.571
Method:,Least Squares,F-statistic:,1010000.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:38,Log-Likelihood:,-3299000.0
No. Observations:,758099,AIC:,6598000.0
Df Residuals:,758097,BIC:,6598000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.7138,0.025,191.429,0.000,4.666,4.762
x,6.5247,0.006,1004.868,0.000,6.512,6.537

0,1,2,3
Omnibus:,668319.727,Durbin-Watson:,0.522
Prob(Omnibus):,0.0,Jarque-Bera (JB):,81094434.476
Skew:,3.741,Prob(JB):,0.0
Kurtosis:,53.113,Cond. No.,4.4


**Time of Year**

In [54]:
# Define the dependent variable
y= central_london_clean['Total cycles']

In [55]:
# Define the indepedent variable
x= central_london_clean['Quarter']

In [56]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= central_london_clean).fit()

In [57]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,485.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,7.26e-315
Time:,15:08:41,Log-Likelihood:,-3619500.0
No. Observations:,758163,AIC:,7239000.0
Df Residuals:,758159,BIC:,7239000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,14.9158,0.067,222.093,0.000,14.784,15.047
x[T.Q2],2.8017,0.095,29.459,0.000,2.615,2.988
x[T.Q3],2.9909,0.092,32.635,0.000,2.811,3.171
x[T.Q4],0.9711,0.095,10.255,0.000,0.785,1.157

0,1,2,3
Omnibus:,773540.085,Durbin-Watson:,0.119
Prob(Omnibus):,0.0,Jarque-Bera (JB):,59932113.042
Skew:,5.036,Prob(JB):,0.0
Kurtosis:,45.376,Cond. No.,4.88


**Time of day**

In [58]:
# Define the dependent variable
y= central_london_clean['Total cycles']

In [59]:
# Define the indepedent variable
x= central_london_clean['Period']

In [60]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= central_london_clean).fit()

In [61]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.116
Model:,OLS,Adj. R-squared:,0.116
Method:,Least Squares,F-statistic:,24780.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:43,Log-Likelihood:,-3573600.0
No. Observations:,758163,AIC:,7147000.0
Df Residuals:,758158,BIC:,7147000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,29.8323,0.072,417.130,0.000,29.692,29.972
x[T.Early Morning (06:00-07:00)],-23.1096,0.143,-161.565,0.000,-23.390,-22.829
x[T.Evening (19:00-22:00)],-17.4037,0.101,-172.055,0.000,-17.602,-17.205
x[T.Inter-peak (10:00-16:00)],-21.7927,0.088,-248.799,0.000,-21.964,-21.621
x[T.PM peak (16:00-19:00)],-1.5956,0.101,-15.775,0.000,-1.794,-1.397

0,1,2,3
Omnibus:,767708.124,Durbin-Watson:,0.158
Prob(Omnibus):,0.0,Jarque-Bera (JB):,67983000.181
Skew:,4.91,Prob(JB):,0.0
Kurtosis:,48.339,Cond. No.,6.47


**Weather**

In [62]:
# Subset data based on top weather conditions
central_london_topweather = central_london_clean.groupby('Weather').filter(lambda x : len(x) > 1000)

In [63]:
# Define the dependent variable
y= central_london_topweather['Total cycles']

In [64]:
# Define the indepedent variable
x= central_london_topweather['Weather']

In [65]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= central_london_topweather).fit()

In [66]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,392.4
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:45,Log-Likelihood:,-3491500.0
No. Observations:,731231,AIC:,6983000.0
Df Residuals:,731224,BIC:,6983000.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,8.3357,0.433,19.271,0.000,7.488,9.183
x[T.Drizzle],6.3000,1.001,6.297,0.000,4.339,8.261
x[T.Dry],8.9920,0.434,20.715,0.000,8.141,9.843
x[T.Fine],8.5408,0.480,17.781,0.000,7.599,9.482
x[T.Rain],3.9626,0.582,6.805,0.000,2.821,5.104
x[T.Sunny],7.4866,0.704,10.631,0.000,6.106,8.867
x[T.Wet],4.3342,0.445,9.750,0.000,3.463,5.205

0,1,2,3
Omnibus:,746170.659,Durbin-Watson:,0.12
Prob(Omnibus):,0.0,Jarque-Bera (JB):,58061462.488
Skew:,5.036,Prob(JB):,0.0
Kurtosis:,45.476,Cond. No.,50.0


### Inner London

**Private cycles**

In [67]:
# Define the dependent variable
y= inner_london['Total cycles']

In [68]:
# Define the indepedent variable
x= inner_london['Number of private cycles']

In [69]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= inner_london).fit()

In [70]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.994
Model:,OLS,Adj. R-squared:,0.994
Method:,Least Squares,F-statistic:,81190000.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:45,Log-Likelihood:,-691180.0
No. Observations:,523776,AIC:,1382000.0
Df Residuals:,523774,BIC:,1382000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0847,0.001,59.507,0.000,0.082,0.087
x,1.0307,0.000,9010.702,0.000,1.030,1.031

0,1,2,3
Omnibus:,474627.364,Durbin-Watson:,0.78
Prob(Omnibus):,0.0,Jarque-Bera (JB):,868877984.754
Skew:,3.115,Prob(JB):,0.0
Kurtosis:,202.435,Cond. No.,14.2


**Cycle hires**

In [71]:
# Define the dependent variable
y= inner_london['Total cycles']

In [72]:
# Define the indepedent variable
x= inner_london['Number of cycle hire bikes']

In [73]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= inner_london).fit()

In [74]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.185
Model:,OLS,Adj. R-squared:,0.185
Method:,Least Squares,F-statistic:,119200.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:45,Log-Likelihood:,-1960000.0
No. Observations:,523776,AIC:,3920000.0
Df Residuals:,523774,BIC:,3920000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.8149,0.015,328.426,0.000,4.786,4.844
x,5.1559,0.015,345.282,0.000,5.127,5.185

0,1,2,3
Omnibus:,572340.779,Durbin-Watson:,0.262
Prob(Omnibus):,0.0,Jarque-Bera (JB):,69106087.322
Skew:,5.489,Prob(JB):,0.0
Kurtosis:,58.19,Cond. No.,1.32


**Time of Year**

In [75]:
# Define the dependent variable
y= inner_london['Total cycles']

In [76]:
# Define the indepedent variable
x= inner_london['Quarter']

In [77]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= inner_london).fit()

In [78]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,343.4
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,8.940000000000001e-296
Time:,15:08:46,Log-Likelihood:,-2013000.0
No. Observations:,523776,AIC:,4026000.0
Df Residuals:,523771,BIC:,4026000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.1148,0.215,23.756,0.000,4.693,5.537
x[T.Q1],-3.8593,1.210,-3.190,0.001,-6.231,-1.488
x[T.Q2],0.7998,0.216,3.701,0.000,0.376,1.223
x[T.Q3],1.5831,0.218,7.267,0.000,1.156,2.010
x[T.Q4],3.1996,0.227,14.093,0.000,2.755,3.645

0,1,2,3
Omnibus:,586298.188,Durbin-Watson:,0.15
Prob(Omnibus):,0.0,Jarque-Bera (JB):,69563271.507
Skew:,5.747,Prob(JB):,0.0
Kurtosis:,58.275,Cond. No.,99.0


**Time of day**

In [79]:
# Define the dependent variable
y= inner_london['Total cycles']

In [80]:
# Define the indepedent variable
x= inner_london['Period']

In [81]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= inner_london).fit()

In [82]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.061
Model:,OLS,Adj. R-squared:,0.061
Method:,Least Squares,F-statistic:,8492.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:48,Log-Likelihood:,-1997200.0
No. Observations:,523770,AIC:,3994000.0
Df Residuals:,523765,BIC:,3995000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.6320,0.035,275.411,0.000,9.563,9.701
x[T.Early Morning (06:00-07:00)],-6.5737,0.070,-93.982,0.000,-6.711,-6.437
x[T.Evening (19:00-22:00)],-4.2035,0.049,-84.988,0.000,-4.300,-4.107
x[T.Inter-peak (10:00-16:00)],-6.0172,0.043,-140.480,0.000,-6.101,-5.933
x[T.PM peak (16:00-19:00)],0.1058,0.049,2.138,0.032,0.009,0.203

0,1,2,3
Omnibus:,580143.279,Durbin-Watson:,0.171
Prob(Omnibus):,0.0,Jarque-Bera (JB):,71224652.06
Skew:,5.621,Prob(JB):,0.0
Kurtosis:,59.011,Cond. No.,6.47


**Weather**

In [83]:
# Subset data based on top weather conditions
inner_london_topweather = inner_london.groupby('Weather').filter(lambda x : len(x) > 1000)

In [84]:
# Define the dependent variable
y= inner_london_topweather['Total cycles']

In [85]:
# Define the indepedent variable
x= inner_london_topweather['Weather']

In [86]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= inner_london_topweather).fit()

In [87]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,150.1
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,4.6499999999999997e-191
Time:,15:08:49,Log-Likelihood:,-1963000.0
No. Observations:,510267,AIC:,3926000.0
Df Residuals:,510260,BIC:,3926000.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7.3481,0.150,49.110,0.000,7.055,7.641
x[T.Dry],-0.9834,0.151,-6.529,0.000,-1.279,-0.688
x[T.Fine],-4.6858,0.334,-14.009,0.000,-5.341,-4.030
x[T.N/a],-1.9370,0.292,-6.638,0.000,-2.509,-1.365
x[T.Rain],-2.9747,0.256,-11.619,0.000,-3.476,-2.473
x[T.Sunny],-0.7464,0.196,-3.809,0.000,-1.130,-0.362
x[T.Wet],-2.2340,0.157,-14.219,0.000,-2.542,-1.926

0,1,2,3
Omnibus:,569754.505,Durbin-Watson:,0.151
Prob(Omnibus):,0.0,Jarque-Bera (JB):,67084142.842
Skew:,5.724,Prob(JB):,0.0
Kurtosis:,57.993,Cond. No.,37.2


### Outer London

**Time of Year**

In [88]:
# Define the dependent variable
y= outer_london['Total cycles']

In [89]:
# Define the indepedent variable
x= outer_london['Quarter']

In [90]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= outer_london).fit()

In [91]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.011
Model:,OLS,Adj. R-squared:,0.011
Method:,Least Squares,F-statistic:,1418.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:50,Log-Likelihood:,-963760.0
No. Observations:,375660,AIC:,1928000.0
Df Residuals:,375656,BIC:,1928000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.0283,0.092,54.599,0.000,4.848,5.209
x[T.Q2],-3.5898,0.092,-38.904,0.000,-3.771,-3.409
x[T.Q3],-2.9094,0.093,-31.292,0.000,-3.092,-2.727
x[T.Q4],-2.9199,0.095,-30.677,0.000,-3.106,-2.733

0,1,2,3
Omnibus:,389909.777,Durbin-Watson:,0.419
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33198412.09
Skew:,5.149,Prob(JB):,0.0
Kurtosis:,47.888,Cond. No.,46.9


**Time of day**

In [92]:
# Define the dependent variable
y= outer_london['Total cycles']

In [93]:
# Define the indepedent variable
x= outer_london['Period']

In [94]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= outer_london).fit()

In [95]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.033
Model:,OLS,Adj. R-squared:,0.033
Method:,Least Squares,F-statistic:,3185.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:51,Log-Likelihood:,-959610.0
No. Observations:,375660,AIC:,1919000.0
Df Residuals:,375655,BIC:,1919000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.1588,0.012,184.066,0.000,2.136,2.182
x[T.Early Morning (06:00-07:00)],-0.9260,0.023,-39.478,0.000,-0.972,-0.880
x[T.Evening (19:00-22:00)],-0.9517,0.017,-57.372,0.000,-0.984,-0.919
x[T.Inter-peak (10:00-16:00)],-1.0460,0.014,-72.821,0.000,-1.074,-1.018
x[T.PM peak (16:00-19:00)],0.3169,0.017,19.108,0.000,0.284,0.349

0,1,2,3
Omnibus:,386587.041,Durbin-Watson:,0.435
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32707916.479
Skew:,5.074,Prob(JB):,0.0
Kurtosis:,47.572,Cond. No.,6.47


**Weather**

In [96]:
# Subset data based on top weather conditions
outer_london_topweather = outer_london.groupby('Weather').filter(lambda x : len(x) > 1000)

In [97]:
# Define the dependent variable
y= outer_london_topweather['Total cycles']

In [98]:
# Define the indepedent variable
x= outer_london_topweather['Weather']

In [99]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= outer_london_topweather).fit()

In [100]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,204.6
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,1.3100000000000001e-132
Time:,15:08:51,Log-Likelihood:,-944230.0
No. Observations:,367806,AIC:,1888000.0
Df Residuals:,367802,BIC:,1889000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.6328,0.006,294.735,0.000,1.622,1.644
x[T.Fine],-0.4167,0.081,-5.114,0.000,-0.576,-0.257
x[T.Sunny],-0.2714,0.089,-3.053,0.002,-0.446,-0.097
x[T.Wet],-0.3995,0.016,-24.220,0.000,-0.432,-0.367

0,1,2,3
Omnibus:,382096.782,Durbin-Watson:,0.417
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32335538.164
Skew:,5.16,Prob(JB):,0.0
Kurtosis:,47.76,Cond. No.,17.2


**Outer London - Male**

In [101]:
# Define the dependent variable
y= outer_london['Total cycles']

In [102]:
# Define the indepedent variable
x= outer_london['Number of male cycles']

In [103]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= outer_london).fit()

In [104]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.956
Model:,OLS,Adj. R-squared:,0.956
Method:,Least Squares,F-statistic:,8098000.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:52,Log-Likelihood:,-380600.0
No. Observations:,375660,AIC:,761200.0
Df Residuals:,375658,BIC:,761200.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0602,0.001,49.613,0.000,0.058,0.063
x,1.1455,0.000,2845.640,0.000,1.145,1.146

0,1,2,3
Omnibus:,331245.688,Durbin-Watson:,1.37
Prob(Omnibus):,0.0,Jarque-Bera (JB):,22237526.246
Skew:,3.965,Prob(JB):,0.0
Kurtosis:,39.848,Cond. No.,3.44


**Outer London - Female**

In [105]:
# Define the dependent variable
y= outer_london['Total cycles']

In [106]:
# Define the indepedent variable
x= outer_london['Number of female cycles']

In [107]:
# Create formula and pass through OLS methods
f = 'y ~ x'
test = ols(f, data= outer_london).fit()

In [108]:
# Print the regression table
test.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.457
Model:,OLS,Adj. R-squared:,0.457
Method:,Least Squares,F-statistic:,315600.0
Date:,"Tue, 16 Aug 2022",Prob (F-statistic):,0.0
Time:,15:08:52,Log-Likelihood:,-851330.0
No. Observations:,375660,AIC:,1703000.0
Df Residuals:,375658,BIC:,1703000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9049,0.004,226.389,0.000,0.897,0.913
x,2.8455,0.005,561.795,0.000,2.836,2.855

0,1,2,3
Omnibus:,341848.118,Durbin-Watson:,0.845
Prob(Omnibus):,0.0,Jarque-Bera (JB):,22179936.912
Skew:,4.197,Prob(JB):,0.0
Kurtosis:,39.695,Cond. No.,1.49
