In [12]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
import numpy as np
from sklearn.metrics import mean_squared_error,  r2_score, mean_absolute_error

In [13]:
import warnings
warnings.filterwarnings('ignore')


In [14]:
path= '../data/bus_running_times_feature_added_all.csv'
df = pd.read_csv(path)
df = df[df['week_no'] < 25]


In [15]:
df = df[['deviceid','week_no','segment','length','direction',
 'month','day','day_of_week',
 'time_of_day',
 'dt(n-1)','rt(w-1)','rt(w-2)','rt(w-3)','rt(t-1)','rt(t-2)','rt(n-1)','rt(n-2)','rt(n-3)',
 'precip','windspeed','temp','run_time_in_seconds']]
df

Unnamed: 0,deviceid,week_no,segment,length,direction,month,day,day_of_week,time_of_day,dt(n-1),...,rt(w-3),rt(t-1),rt(t-2),rt(n-1),rt(n-2),rt(n-3),precip,windspeed,temp,run_time_in_seconds
0,262.0,1.0,1.0,0.6261,1.0,10.0,1.0,4.0,6.50,0.0,...,96.0,96.0,96.0,96.0,96.0,96.0,0.0,6.1,20.0,69.0
1,262.0,1.0,2.0,1.2808,1.0,10.0,1.0,4.0,6.50,74.0,...,247.0,247.0,247.0,69.0,247.0,247.0,0.0,6.1,20.0,210.0
2,262.0,1.0,3.0,2.1125,1.0,10.0,1.0,4.0,6.75,0.0,...,506.0,506.0,506.0,210.0,69.0,506.0,0.0,6.1,20.0,496.0
3,262.0,1.0,4.0,1.5513,1.0,10.0,1.0,4.0,6.75,6.0,...,192.0,192.0,192.0,496.0,210.0,69.0,0.0,6.1,20.0,195.0
4,262.0,1.0,5.0,0.8450,1.0,10.0,1.0,4.0,6.75,0.0,...,114.0,114.0,114.0,195.0,496.0,210.0,0.0,6.1,20.0,97.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173930,121.0,24.0,30.0,2.5600,2.0,7.0,3.0,6.0,13.00,15.0,...,439.0,439.0,439.0,307.0,210.0,95.0,0.0,9.4,22.1,501.0
173931,121.0,24.0,31.0,0.4200,2.0,7.0,3.0,6.0,13.25,15.0,...,65.0,65.0,65.0,501.0,307.0,210.0,0.0,9.4,22.1,79.0
173932,121.0,24.0,32.0,1.3000,2.0,7.0,3.0,6.0,13.25,15.0,...,262.0,262.0,262.0,79.0,501.0,307.0,0.0,9.4,22.1,217.0
173933,121.0,24.0,33.0,1.2200,2.0,7.0,3.0,6.0,13.25,15.0,...,200.0,200.0,200.0,217.0,79.0,501.0,0.0,9.4,22.1,172.0


In [16]:
# dropping the NA data
df = df.dropna(subset=['run_time_in_seconds'])
df.isnull().sum()

deviceid               0
week_no                0
segment                0
length                 0
direction              0
month                  0
day                    0
day_of_week            0
time_of_day            0
dt(n-1)                0
rt(w-1)                0
rt(w-2)                0
rt(w-3)                0
rt(t-1)                0
rt(t-2)                0
rt(n-1)                0
rt(n-2)                0
rt(n-3)                0
precip                 0
windspeed              0
temp                   0
run_time_in_seconds    0
dtype: int64

In [17]:
def predict(model, dt, t):
    preds = model.predict(dt)
    rmse = np.sqrt(mean_squared_error(t['run_time_in_seconds'], preds)) 
    mae = mean_absolute_error(t['run_time_in_seconds'], preds)
    print("MAE (1): %f" % (mae)) 
    

<hr>

In [18]:
batch = []
dbatch =[]
batch.append(df[df['week_no'] <= 19]) 
for i in range(20,24):
    batch.append(df[df['week_no'] == i])
for i in batch:
    dbatch.append([i.drop(columns=['week_no','run_time_in_seconds']), i['run_time_in_seconds']])

In [19]:
def inc_train(is_initial, data_batch, prev_model):
    if is_initial:
        curr_model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', learning_rate='adaptive')
        curr_model.fit(data_batch[0], data_batch[1])
    else:
        curr_model = prev_model.partial_fit(data_batch[0], data_batch[1])
    return curr_model

In [20]:
models = []
for i in range(len(batch)):
    if i!=0:
        curr_model = inc_train(False,  dbatch[i], models[i-1])
    else:
        curr_model = inc_train(True, dbatch[i], None)
    models.append(curr_model)
    for j in range(len(batch)):
        print(f"Results of data batch {j+1}  with model {i+1}")
        predict(curr_model, dbatch[j][0], batch[j])

Results of data batch 1  with model 1
MAE (1): 31.375669
Results of data batch 2  with model 1
MAE (1): 34.808634
Results of data batch 3  with model 1
MAE (1): 38.646756
Results of data batch 4  with model 1
MAE (1): 35.200892
Results of data batch 5  with model 1
MAE (1): 40.674452
Results of data batch 1  with model 2
MAE (1): 31.987584
Results of data batch 2  with model 2
MAE (1): 32.164228
Results of data batch 3  with model 2
MAE (1): 37.060248
Results of data batch 4  with model 2
MAE (1): 33.846276
Results of data batch 5  with model 2
MAE (1): 39.326317
Results of data batch 1  with model 3
MAE (1): 33.908375
Results of data batch 2  with model 3
MAE (1): 33.852020
Results of data batch 3  with model 3
MAE (1): 36.208299
Results of data batch 4  with model 3
MAE (1): 34.267437
Results of data batch 5  with model 3
MAE (1): 39.283138
Results of data batch 1  with model 4
MAE (1): 33.818133
Results of data batch 2  with model 4
MAE (1): 33.928787
Results of data batch 3  with m