In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.linear_model import LinearRegression
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
import statsmodels.api as sm
import matplotlib.mlab as mlab
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [39]:
from sklearn import datasets
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import ElasticNet

In [40]:
#connecting to database server
engine = sqlalchemy.create_engine('mysql+mysqlconnector://root:123456@localhost/mydb', pool_size=25, max_overflow=10, pool_timeout=60,pool_recycle=3600) 

## Simulating energy values for various road grades , vehicle mass and auxillary power 

In [82]:
#writing the energy consumption equation

#Define constants
g=9.81 #m/s^2
#m_bus=14000 #kg (empty)
m_pax=70 #kg
C_rr=0.00697 #(without rain)
#C_rr=0.00763 #(with rain)
rho=1.2 #kg/m^3
A=7.92 #m^2
C_d=0.65
#p_aux = np.arange(4000,10000,1000) #4000 # W (without HVAC)
#p_auxi=12 #kW (with HVAC)
eta_m=0.85
eta_bat = 0.95
power= np.empty((0,1))

riders = [5*x for x in np.arange(0,9,1)]
masses = [] 
p_aux = [] 

q_heat = 30000 #30kW
p_auxx = 9000 #9 kW
h_gain  =1.8 #W
R_th = 0.0174 #K/W
T_in = 18.9 #celcius 
T_out = -20
C_p = 1.005 #KJ/KG.K
rho = 1.2
V_inf = 0
V_hv = 1.13 #m^3/sec
psi = 0.20 # 20%

for rider in riders:
    masses.append(14000+70*rider)
    p_aux.append((q_heat - ((T_in-T_out)/R_th) - (rho*C_p*(T_in-T_out)*V_inf ) - (psi*V_hv* rho*C_p*(T_in-T_out)) + (h_gain*rider)+ p_auxx))


#generate a possible sampling distrbution to select data from ????
grades = np.arange(-0.1,0.1,0.005)
#masses = np.arange(14000, 15000,70)
time  = 0.1 #0.1 sec 
p_max = 90000 #90kw
e_bat = 215 #215 kwh
data = np.empty((0,3))

for grade in grades:
    for m_bus in masses: 
        for p_auxi in p_aux:
            data = np.append(data, np.array([[np.sin(grade), m_bus,p_auxi]]), axis=0)





In [77]:
query = "SELECT * FROM simu_data"
ener = pd.read_sql(query, engine)

In [78]:
ener = ener.drop(columns = "index")
ener

Unnamed: 0,0
0,-4.077792
1,-4.076930
2,-4.076068
3,-4.075206
4,-4.074344
...,...
3235,6.064800
3236,6.065662
3237,6.066524
3238,6.067386


## Fitting the polynomial regression model 

In [83]:

# Add a bias term to the dataset
x = sm.add_constant(data)

# Create polynomial features
poly_feats = PolynomialFeatures(degree = 6)
x = poly_feats.fit_transform(x)

# Split into training and validation set
x_train, x_val, y_train, y_val = train_test_split(x, ener, test_size=0.2, random_state=0)


# Fit the elastic net regression model
my_reg = ElasticNet( alpha = 0.001, l1_ratio = 0.8, 
                    max_iter = 1e5).fit(x_train, y_train)

# Make predictions
val_preds = my_reg.predict(x_val)
train_preds = my_reg.predict(x_train)
val_mse = mean_squared_error(y_val, val_preds)
train_mse = mean_squared_error(y_train, train_preds)
print("Train MSE:", train_mse, "\n", "Valid MSE:", val_mse, "\n", "coef :", my_reg.coef_)


Train MSE: 0.019283392279136884 
 Valid MSE: 0.018673069388103754 
 coef : [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.53439591e-04
 -0.00000000e+00  0.00000000e+00  0.00000000e+00  3.68700428e-09
 -0.00000000e+00 -0.00000000e+00  3.31930314e-03  5.52662021e-05
  4.04603364e-09 -1.14528691e-09 -1.06694208e-07  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  0.00000000e+00  2.80004650e-07  1.34950214e-09 -1.08496683e-09
  1.74745256e-10  0.00000000e+00 -0.00000000e+00 -0.00000000e+00
 -5.47577076e-09  2.82960775e-10  8.34372977e-10  2.44422555e-14
 -1.31528280e-15 -2.94068934e-14  4.95266506e-13  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  0.00000000e+00  1.98490536e-07  2.06314036e-09 -1.09809440e-09
 -2.66105590e-08  0.00000000e+00 -0.00000000e+00 -0.00000000e+00
 -5.39503216e-09  2.68421723e-10  8.28230465e-10  2.48357275e-14
 -1.25658838e-15 -2.93582889e-14  4.94272016e-13 -0.00000000e+00
  0.00000000e+0

  positive)


In [47]:
print(my_reg.predict(x))

[-0.80384324 -0.80358854 -0.80332534 -0.80305362 -0.80277337 -0.8024846
 -0.80218729 -0.80188143 -0.80156701 -0.77605069 -0.77573848 -0.77541775
 -0.77508848 -0.77475067 -0.77440431 -0.77404939 -0.77368592 -0.77331387
 -0.74816414 -0.74779274 -0.7474128  -0.74702431 -0.74662726 -0.74622165
 -0.74580746 -0.74538468 -0.74495333 -0.72032422 -0.71989192 -0.71945105
 -0.71900162 -0.71854361 -0.71807701 -0.71760183 -0.71711804 -0.71662565
 -0.6926901  -0.69219513 -0.69169157 -0.69117943 -0.69065869 -0.69012934
 -0.68959139 -0.68904482 -0.68848963 -0.66544037 -0.66488091 -0.66431284
 -0.66373616 -0.66315087 -0.66255696 -0.66195442 -0.66134325 -0.66072343
 -0.63877392 -0.63814808 -0.63751363 -0.63687056 -0.63621885 -0.6355585
 -0.63488951 -0.63421185 -0.63352554 -0.61291082 -0.61221671 -0.61151396
 -0.61080256 -0.61008251 -0.6093538  -0.60861642 -0.60787037 -0.60711564
 -0.58809335 -0.58732898 -0.58655595 -0.58577425 -0.58498388 -0.58418483
 -0.5833771  -0.58256067 -0.58173555 -0.25362144 -0.2

In [117]:
#importing mass and auxillary power 
query = "SELECT * FROM mbta_route_data ;"
trip_data = pd.read_sql(query, engine)

In [120]:
#import real data for Boston 
query = "SELECT * FROM mbta_datafinal ;"
df = pd.read_sql(query, engine)

In [146]:
MBTA_energy_30ft = []


for trip in trip_data.trip_id.values:
    data1 = np.empty((0,3))
    for i in range(0, df[df.trip_id ==  trip].shape[0]): 
        route  = trip_data[trip_data.trip_id == trip]["route_id"].values[0]
        data1 = np.append(data1, np.array([[np.sin(df[df.trip_id ==  trip][ "slope"].values[i]), trip_data[trip_data.route_id == route]["30ft_wt"].mean(),trip_data[trip_data.route_id == route]["p_ext"].mean()]]), axis =0 )
    # Add a bias term to the dataset
    x1 = sm.add_constant(data1, has_constant='add')
    # Create polynomial features
    poly_feats = PolynomialFeatures(degree = 6)
    x1 = poly_feats.fit_transform(x1)
    p = my_reg.predict(x1)
    MBTA_energy_30ft.append(np.matmul(p,(df[df.trip_id == trip]["dist_m"].values/1000)))

In [147]:
MBTA_energy_40ft = []


for trip in trip_data.trip_id.values:
    data1 = np.empty((0,3))
    for i in range(0, df[df.trip_id ==  trip].shape[0]): 
        route  = trip_data[trip_data.trip_id == trip]["route_id"].values[0]
        data1 = np.append(data1, np.array([[np.sin(df[df.trip_id ==  trip][ "slope"].values[i]), trip_data[trip_data.route_id == route]["40ft_wt"].mean(),trip_data[trip_data.route_id == route]["p_ext"].mean()]]), axis =0 )
    # Add a bias term to the dataset
    x1 = sm.add_constant(data1, has_constant='add')
    # Create polynomial features
    poly_feats = PolynomialFeatures(degree = 6)
    x1 = poly_feats.fit_transform(x1)
    p = my_reg.predict(x1)
    MBTA_energy_40ft.append(np.matmul(p,(df[df.trip_id == trip]["dist_m"].values/1000)))

In [148]:
MBTA_energy_60ft = []


for trip in trip_data.trip_id.values:
    data1 = np.empty((0,3))
    for i in range(0, df[df.trip_id ==  trip].shape[0]): 
        route  = trip_data[trip_data.trip_id == trip]["route_id"].values[0]
        data1 = np.append(data1, np.array([[np.sin(df[df.trip_id ==  trip][ "slope"].values[i]), trip_data[trip_data.route_id == route]["60ft_wt"].mean(),trip_data[trip_data.route_id == route]["p_ext"].mean()]]), axis =0 )
    # Add a bias term to the dataset
    x1 = sm.add_constant(data1, has_constant='add')
    # Create polynomial features
    poly_feats = PolynomialFeatures(degree = 6)
    x1 = poly_feats.fit_transform(x1)
    p = my_reg.predict(x1)
    MBTA_energy_60ft.append(np.matmul(p,(df[df.trip_id == trip]["dist_m"].values/1000)))

In [154]:
trip_data["MBTA_energy_30ft"]= MBTA_energy_30ft
trip_data["MBTA_energy_40ft"]= MBTA_energy_40ft
trip_data["MBTA_energy_60ft"]= MBTA_energy_60ft #,"MBTA_energy_40ft","MBTA_energy_60ft"] = MBTA_energy_30ft, MBTA_energy_40ft,MBTA_energy_60ft

In [156]:
MBTA_energy_30ft

[17.702699072236413,
 12.801722633852567,
 9.466264986894034,
 11.464183000811726,
 20.507363239744965,
 12.911028793610743,
 10.381514395977119,
 28.62172220425925,
 13.086479723556227,
 11.055752557708253,
 19.523991184850132,
 20.50525479711394,
 62.85993062726224,
 39.93762894991304,
 12.038939573993664,
 14.952292722882632,
 43.21713529297357,
 27.18666036793603,
 31.619483501739946,
 24.539641799066924,
 30.787598396041655,
 13.112129914745546,
 18.44332937667718,
 10.832129701707757,
 13.714109182524531,
 16.042064232090276,
 17.983492912621784,
 11.843799822853804,
 16.566138463722485,
 9.7953523283064,
 9.575861676622933,
 34.8380954827725,
 42.807309089269545,
 37.1795180263554,
 37.14707434447349,
 30.51965638945889,
 24.001996215873433,
 20.593194854050292,
 19.60524515301323,
 15.323721109110906,
 16.23157655547438,
 10.706511023395699,
 62.66145468915971,
 52.60433809047682,
 41.60741355985581,
 22.272836621315328,
 28.69886866170429,
 27.87840538979175,
 46.6862565136919

In [155]:
MBTA_energy_60ft

[-44.09211732039166,
 -34.13643153454197,
 -22.47253552866429,
 -29.741494613828674,
 -52.55299482181341,
 -33.934831193601575,
 -25.433163255000437,
 -68.56418427089889,
 -32.94865356553501,
 -28.493593164619256,
 -46.57488202577276,
 -49.7257067661293,
 -159.58616002771137,
 -102.04985223818366,
 -28.78841832998002,
 -42.24534991250633,
 -130.99611812554713,
 -77.74332009337913,
 -92.74948959651563,
 -78.63176702566625,
 -95.10253417753918,
 -37.65208751845752,
 -53.51132279979397,
 -30.577800746857214,
 -44.9586212514578,
 -52.137322116433374,
 -78.82447759829111,
 -57.596444623274216,
 -72.21067269330494,
 -44.917402994818055,
 -43.24501138533265,
 -108.2160532649084,
 -137.31936578665986,
 -127.8878525628091,
 -118.9371113715154,
 -98.96107625093488,
 -84.9422113948333,
 -67.78632527973281,
 -73.2478197825649,
 -44.14341108884101,
 -46.834775614998584,
 -32.51670565923183,
 -233.21135659121052,
 -196.2226455389163,
 -157.00300481880586,
 -77.1040919688849,
 -102.67402964235876,
 -

In [158]:
query = "SELECT * FROM mbta_e_data ;"
route_data = pd.read_sql(query, engine)
route_data

Unnamed: 0,index,route_id,route_length,num_buses,MBTA_energy_30ft,MBTA_energy_40ft,MBTA_energy_60ft,avg_speed
0,0,201,671572.0,2.0,1.450868,1.529698,2.140397,5.92067
1,1,202,204479.0,4.0,1.376865,1.657608,2.114057,5.179345
2,2,210,533152.0,4.0,1.360542,1.804997,2.095427,7.328337
3,3,211,804990.0,8.0,1.799849,1.54735,2.12776,6.300936
4,4,212,189424.0,2.0,1.495068,1.566624,2.338906,5.997131
5,5,214,322743.0,4.0,1.287326,1.634344,2.989357,6.568696
6,6,215,1395334.0,20.0,1.947277,1.955252,2.308941,6.819996
7,7,216,807525.0,4.0,1.485901,1.541651,2.50804,8.105051
8,8,217,114998.0,2.0,1.849854,1.699853,2.326275,6.939686
9,9,220,2756083.0,8.0,1.259148,1.58248,2.487096,10.125741


In [160]:
trip_data.drop(columns = ["level_0", "index"])

Unnamed: 0,route_id,trip_id,trip_count,avg_speed,route_length,30ft_wt,40ft_wt,60ft_wt,p_ext,MBTA_energy_30ft,MBTA_energy_40ft,MBTA_energy_60ft
0,201,42949724,38,5.786905,4861.0,14181.320500,20381.320500,31281.320500,39542.988435,17.702699,18.767153,-44.092117
1,201,42949676,2,6.296667,3778.0,14181.320500,20381.320500,31281.320500,39542.988435,12.801723,13.494605,-34.136432
2,201,42949628,48,8.621429,3621.0,14181.320500,20381.320500,31281.320500,39542.988435,9.466265,10.076920,-22.472536
3,201,42842961_1,39,3.576923,2790.0,14181.320500,20381.320500,31281.320500,39542.988435,11.464183,12.150949,-29.741495
4,201,42949723,44,5.321429,4470.0,14181.320500,20381.320500,31281.320500,39542.988435,20.507363,21.664494,-52.552995
...,...,...,...,...,...,...,...,...,...,...,...,...
88,245,42949494,3,7.476042,14354.0,14604.849063,20804.849063,31704.849063,39553.879170,40.718961,42.666314,-124.708834
89,245,42949556,5,8.466667,14224.0,14604.849063,20804.849063,31704.849063,39553.879170,40.387800,42.280644,-123.643350
90,245,42949557,11,8.101923,12639.0,14604.849063,20804.849063,31704.849063,39553.879170,45.480774,47.703167,-135.278788
91,245,42949426,1,7.820690,13608.0,14604.849063,20804.849063,31704.849063,39553.879170,42.640949,44.545251,-133.399291


In [166]:
#estimating total energy consumed by different buses types on each route 
for route in route_data.route_id:
    ab = trip_data[trip_data.route_id == route]
    route_data.loc[route_data.route_id == route, "MBTA_energy_30ft"] = (ab["trip_count"] * ab["MBTA_energy_30ft"]).sum()
    route_data.loc[route_data.route_id == route, "MBTA_energy_40ft"] = (ab["trip_count"] * ab["MBTA_energy_40ft"]).sum()
    route_data.loc[route_data.route_id == route, "MBTA_energy_60ft"] = (ab["trip_count"] * ab["MBTA_energy_60ft"]).sum()

#storing estiamted data in database 
route_data.to_sql('mbta_e_data', con=engine, if_exists = 'replace')

In [168]:
route_data

Unnamed: 0,index,route_id,route_length,num_buses,MBTA_energy_30ft,MBTA_energy_40ft,MBTA_energy_60ft,avg_speed
0,0,201,671572.0,2.0,2502.113849,2650.957927,-6294.705089,5.92067
1,1,202,204479.0,4.0,705.225027,747.908936,-1758.106133,5.179345
2,2,210,533152.0,4.0,2076.019475,2198.005609,-5234.182161,7.328337
3,3,211,804990.0,8.0,2411.84323,2517.418685,-7275.354527,6.300936
4,4,212,189424.0,2.0,605.955598,628.239532,-1972.496668,5.997131
5,5,214,322743.0,4.0,958.253214,924.181897,-4251.414338,6.568696
6,6,215,1395334.0,20.0,3812.915283,3934.876103,-12876.738583,6.819996
7,7,216,807525.0,4.0,2217.320212,2318.893011,-6475.761947,8.105051
8,8,217,114998.0,2.0,382.539407,389.56188,-1407.082903,6.939686
9,9,220,2756083.0,8.0,6986.666459,7264.728839,-21834.799468,10.125741


In [43]:
df[df.trip_id == "42949921"].shape[0]

12

In [124]:
# Add a bias term to the dataset
x1 = sm.add_constant(data1, has_constant='add')
# Create polynomial features
poly_feats = PolynomialFeatures(degree = 6)
x1 = poly_feats.fit_transform(x1)


In [125]:
p = my_reg.predict(x1)

In [101]:
df[df.trip_id == "42949921"]["duration_min"].values

(12,)