In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNetCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV


In [None]:
df= pd.read_csv('/content/transit_simulator - Sheet1 (1).csv')

df.head()

Unnamed: 0,Date,BART,MUNI,Ferry,Bridge,Visits,Month
0,1/2020,2083734,323400,420907,5420879,5538395,1
1,2/2020,1953373,336300,408246,5222944,5169274,2
2,3/2020,804768,237000,158585,3848968,2547834,3
3,4/2020,70475,46800,5591,2506819,660987,4
4,5/2020,84218,61400,7067,3443831,880423,5


In [None]:
DF2= df[['BART', 'MUNI', 'Ferry', 'Bridge', 'Visits']]

In [None]:
df['ln_visits']= np.log(df['Visits'])
df['ln_bart']= np.log(df['BART'])
df['ln_muni']= np.log(df['MUNI'])
df['ln_ferry']= np.log(df['Ferry'])
df['ln_bridge']= np.log(df['Bridge'])

# OLS

In [None]:
month_dummies= pd.get_dummies(df['Month'], drop_first=True).astype(int)
X= pd.concat([month_dummies, df[['ln_bart', 'ln_muni', 'ln_ferry', 'ln_bridge']]], axis=1)
X = sm.add_constant(X)
y= df['ln_visits']

In [None]:
model= sm.OLS(y, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,ln_visits,R-squared:,0.995
Model:,OLS,Adj. R-squared:,0.994
Method:,Least Squares,F-statistic:,724.2
Date:,"Wed, 29 Oct 2025",Prob (F-statistic):,8.22e-55
Time:,17:01:26,Log-Likelihood:,144.74
No. Observations:,68,AIC:,-257.5
Df Residuals:,52,BIC:,-222.0
Df Model:,15,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.1872,1.075,2.034,0.047,0.029,4.345
2,0.0072,0.019,0.371,0.712,-0.032,0.046
3,-0.0135,0.019,-0.709,0.482,-0.052,0.025
4,-0.0368,0.020,-1.803,0.077,-0.078,0.004
5,-0.0226,0.020,-1.140,0.259,-0.062,0.017
6,-0.0128,0.021,-0.605,0.548,-0.055,0.030
7,-0.0056,0.023,-0.242,0.810,-0.052,0.041
8,-0.0557,0.022,-2.580,0.013,-0.099,-0.012
9,-0.0583,0.021,-2.834,0.007,-0.100,-0.017

0,1,2,3
Omnibus:,5.523,Durbin-Watson:,1.599
Prob(Omnibus):,0.063,Jarque-Bera (JB):,5.645
Skew:,-0.372,Prob(JB):,0.0595
Kurtosis:,4.2,Cond. No.,7140.0


# LASSO Regression

In [None]:
X= pd.concat([month_dummies, df[['ln_bart', 'ln_muni', 'ln_ferry', 'ln_bridge']]], axis=1)

X = sm.add_constant(X)
y= df['ln_visits']

In [None]:
enet_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("enet", ElasticNetCV(
        l1_ratio=[0.6, 0.4],
        alphas=np.logspace(-3, 1, 50),
        cv=5,
        max_iter=100000,
        random_state=42
    ))
])

X.columns = X.columns.astype(str)

enet_pipe.fit(X, y)
enet = enet_pipe.named_steps["enet"]
enet_df = pd.DataFrame({"variable": X.columns, "coef": enet.coef_}) \
             .sort_values("coef", key=abs, ascending=False)
print(f"Chosen l1_ratio={enet.l1_ratio_}, alpha={enet.alpha_}")
print(enet_df[enet_df.coef != 0])

Chosen l1_ratio=0.4, alpha=0.001
     variable      coef
12    ln_bart  0.329487
15  ln_bridge  0.052148
14   ln_ferry  0.046280
8           9 -0.013579
10         11 -0.012569
7           8 -0.011086
9          10 -0.008238
3           4 -0.006678
4           5 -0.003987
6           7  0.003741
2           3 -0.001951
1           2  0.001750
5           6  0.000585
