In [1]:
# Library Imports.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns

# Allows plots to appear directly in the notebook.
%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score 

In [2]:
chunk = pd.read_csv('v2_dtypes_LT_2018.csv', delimiter = ',',chunksize=1000000)

leave = pd.concat(chunk)

In [3]:
leave.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
0,01-JAN-18 00:00:00,5972116,12,119,48030,48030,48030,48030,2693211
1,01-JAN-18 00:00:00,5966674,12,119,54001,54001,54001,54001,2693267
2,01-JAN-18 00:00:00,5959105,12,119,60001,60001,60001,60001,2693263
3,01-JAN-18 00:00:00,5966888,12,119,58801,58801,58801,58801,2693284
4,01-JAN-18 00:00:00,5965960,12,119,56401,56401,56401,56401,2693209


In [4]:
leave.shape

(116360526, 9)

In [5]:
leave.dtypes

DAYOFSERVICE       object
TRIPID              int64
PROGRNUMBER         int64
STOPPOINTID         int64
PLANNEDTIME_ARR     int64
PLANNEDTIME_DEP     int64
ACTUALTIME_ARR      int64
ACTUALTIME_DEP      int64
VEHICLEID           int64
dtype: object

<h3>Changing Data Types to make size of leave dataframe smaller</h3>

In [6]:
leave['TRIPID'] = leave['TRIPID'].astype('int32')

In [7]:
leave['PROGRNUMBER'] = leave['PROGRNUMBER'].astype('int16')

In [8]:
leave['STOPPOINTID'] = leave['STOPPOINTID'].astype('int16')

In [9]:
leave['PLANNEDTIME_ARR'] = leave['PLANNEDTIME_ARR'].astype('int32')

In [10]:
leave['PLANNEDTIME_DEP'] = leave['PLANNEDTIME_DEP'].astype('int32')

In [11]:
leave['ACTUALTIME_ARR'] = leave['PLANNEDTIME_DEP'].astype('int32')

In [12]:
leave['ACTUALTIME_DEP'] = leave['PLANNEDTIME_DEP'].astype('int32')

In [13]:
leave['VEHICLEID'] = leave['VEHICLEID'].astype('int32')

In [14]:
leave.dtypes

DAYOFSERVICE       object
TRIPID              int32
PROGRNUMBER         int16
STOPPOINTID         int16
PLANNEDTIME_ARR     int32
PLANNEDTIME_DEP     int32
ACTUALTIME_ARR      int32
ACTUALTIME_DEP      int32
VEHICLEID           int32
dtype: object

In [15]:
leave.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
0,01-JAN-18 00:00:00,5972116,12,119,48030,48030,48030,48030,2693211
1,01-JAN-18 00:00:00,5966674,12,119,54001,54001,54001,54001,2693267
2,01-JAN-18 00:00:00,5959105,12,119,60001,60001,60001,60001,2693263
3,01-JAN-18 00:00:00,5966888,12,119,58801,58801,58801,58801,2693284
4,01-JAN-18 00:00:00,5965960,12,119,56401,56401,56401,56401,2693209


## Sort the dataframe by Trip ID

In [16]:
leave_sort = leave.sort_values('TRIPID')

In [17]:
leave_sort.shape

(116360526, 9)

In [18]:
leave_sort.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
1580830,06-JAN-18 00:00:00,5955221,69,2861,42184,42184,42184,42184,2693254
1578400,06-JAN-18 00:00:00,5955221,65,2857,42001,42001,42001,42001,2693254
9777438,06-JAN-18 00:00:00,5955221,74,2865,42457,42457,42457,42457,2693254
1572261,06-JAN-18 00:00:00,5955221,52,1045,41336,41336,41336,41336,2693254
1578368,06-JAN-18 00:00:00,5955221,64,2856,41956,41956,41956,41956,2693254


## Splitting the Sorted Dataframe into Five Dataframes

In [19]:
leave1, leave2, leave3, leave4, leave5, leave6 = np.array_split(leave_sort, 6)

In [20]:
leave1.shape

(19393421, 9)

In [21]:
leave1.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
18749270,26-FEB-18 00:00:00,6288438,10,4692,26244,26244,26244,26244,2868356
18081673,28-FEB-18 00:00:00,6288438,82,2622,31943,31943,31943,31943,1932364
18253354,28-FEB-18 00:00:00,6288438,38,619,27948,27948,27948,27948,1932364
17818731,27-FEB-18 00:00:00,6288438,5,1258,26035,26035,26035,26035,2868356
18232791,28-FEB-18 00:00:00,6288438,29,665,27305,27305,27305,27305,1932364


In [22]:
leave2.shape

(19393421, 9)

In [23]:
leave2.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
19870167,28-FEB-18 00:00:00,6288438,62,2332,30444,30444,30444,30444,1932364
18343791,27-FEB-18 00:00:00,6288438,52,1406,29759,29759,29759,29759,2868356
18731627,26-FEB-18 00:00:00,6288438,5,1258,26035,26035,26035,26035,2868356
18306170,27-FEB-18 00:00:00,6288438,87,2669,32127,32127,32127,32127,2868356
18262240,28-FEB-18 00:00:00,6288438,42,4495,28659,28659,28659,28659,1932364


In [24]:
leave2.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
37491553,30-APR-18 00:00:00,6651088,8,2676,27350,27350,27350,27350,2172259
44644531,04-MAY-18 00:00:00,6651088,30,1989,28628,28628,28628,28628,2693290
44001571,02-MAY-18 00:00:00,6651088,32,1992,28787,28787,28787,28787,2868347
38832489,30-APR-18 00:00:00,6651088,79,987,32378,32378,32378,32378,2172259
43598285,01-MAY-18 00:00:00,6651088,62,1595,31483,31483,31483,31483,2172300


In [25]:
leave3.shape

(19393421, 9)

In [26]:
leave3.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
37497811,30-APR-18 00:00:00,6651088,12,2121,27558,27558,27558,27558,2172259
44015990,02-MAY-18 00:00:00,6651088,27,2716,28392,28392,28392,28392,2868347
43602310,01-MAY-18 00:00:00,6651088,67,1599,31698,31698,31698,31698,2172300
44271498,03-MAY-18 00:00:00,6651088,1,4795,27000,27000,27000,27000,1932309
37516259,30-APR-18 00:00:00,6651088,34,1994,28980,28980,28980,28980,2172259


In [27]:
leave3.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
56599825,27-JUN-18 00:00:00,7110198,30,762,42211,42211,42211,42211,1000582
56623323,27-JUN-18 00:00:00,7110198,5,811,40004,40004,40004,40004,1000582
57404784,29-JUN-18 00:00:00,7110198,36,435,42551,42551,42551,42551,1000296
58879315,28-JUN-18 00:00:00,7110198,53,2031,43473,43473,43473,43473,1000578
57629768,29-JUN-18 00:00:00,7110198,24,756,41820,41820,41820,41820,1000296


In [28]:
leave4.shape

(19393421, 9)

In [29]:
leave4.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
56345236,26-JUN-18 00:00:00,7110198,48,2021,43116,43116,43116,43116,1000302
58916136,27-JUN-18 00:00:00,7110198,14,6059,40745,40745,40745,40745,1000582
56345138,26-JUN-18 00:00:00,7110198,33,2008,42380,42380,42380,42380,1000302
58890403,29-JUN-18 00:00:00,7110198,4,810,39962,39962,39962,39962,1000296
56807793,27-JUN-18 00:00:00,7110198,20,846,41588,41588,41588,41588,1000582


In [30]:
leave4.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
76131694,27-AUG-18 00:00:00,7502493,3,229,54117,54117,54117,54117,1932370
77843761,31-AUG-18 00:00:00,7502493,11,4432,54591,54591,54591,54591,1932370
76906162,29-AUG-18 00:00:00,7502493,38,2804,57009,57009,57009,57009,1932370
77848485,31-AUG-18 00:00:00,7502493,16,46,54812,54812,54812,54812,1932370
76476803,28-AUG-18 00:00:00,7502493,8,1642,54456,54456,54456,54456,1932370


In [31]:
leave5.shape

(19393421, 9)

In [32]:
leave5.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
76491067,28-AUG-18 00:00:00,7502493,14,7603,54710,54710,54710,54710,1932370
76322295,27-AUG-18 00:00:00,7502493,17,47,54891,54891,54891,54891,1932370
76510200,28-AUG-18 00:00:00,7502493,25,350,56029,56029,56029,56029,1932370
76329266,28-AUG-18 00:00:00,7502493,5,230,54257,54257,54257,54257,1932370
76117608,27-AUG-18 00:00:00,7502493,33,390,56703,56703,56703,56703,1932370


In [33]:
leave5.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
85334676,11-SEP-18 00:00:00,8089321,16,2154,31045,31045,31045,31045,2693187
85334677,11-SEP-18 00:00:00,8089321,17,6245,31078,31078,31078,31078,2693187
85334678,11-SEP-18 00:00:00,8089321,18,4671,31158,31158,31158,31158,2693187
85334679,11-SEP-18 00:00:00,8089321,19,4672,31231,31231,31231,31231,2693187
85339184,11-SEP-18 00:00:00,8089321,82,105,36003,36003,36003,36003,2693187


In [34]:
leave6.shape

(19393421, 9)

In [35]:
leave6.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
85498785,10-SEP-18 00:00:00,8089321,52,2002,33600,33600,33600,33600,2693187
85498783,10-SEP-18 00:00:00,8089321,50,1999,33376,33376,33376,33376,2693187
85334680,11-SEP-18 00:00:00,8089321,20,3465,31298,31298,31298,31298,2693187
85498782,10-SEP-18 00:00:00,8089321,49,1998,33285,33285,33285,33285,2693187
85334681,11-SEP-18 00:00:00,8089321,23,2159,31432,31432,31432,31432,2693187


In [36]:
leave6.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
113795534,23-DEC-18 00:00:00,8592207,64,3913,67391,67391,67391,67391,3265681
114168242,26-DEC-18 00:00:00,8592207,43,3947,66487,66487,66487,66487,3265682
113781124,23-DEC-18 00:00:00,8592207,13,1450,65040,65040,65040,65040,3265681
115865288,30-DEC-18 00:00:00,8592207,27,2213,65798,65798,65798,65798,2868369
113764483,23-DEC-18 00:00:00,8592207,3,494,64063,64063,64063,64063,3265681


## Moving the Split Trip ID into the other Dataframes

<h3>1 to 2:</h3>

In [37]:
leave1.TRIPID.value_counts()[6288438]

93

In [38]:
leave2.TRIPID.value_counts()[6288438]

151

In [39]:
# Grabbing trip 6288438 from leave1
tripMove = leave1 [leave1['TRIPID'] == 6288438]

tripMove.shape

(93, 9)

In [40]:
leave2

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
19870167,28-FEB-18 00:00:00,6288438,62,2332,30444,30444,30444,30444,1932364
18343791,27-FEB-18 00:00:00,6288438,52,1406,29759,29759,29759,29759,2868356
18731627,26-FEB-18 00:00:00,6288438,5,1258,26035,26035,26035,26035,2868356
18306170,27-FEB-18 00:00:00,6288438,87,2669,32127,32127,32127,32127,2868356
18262240,28-FEB-18 00:00:00,6288438,42,4495,28659,28659,28659,28659,1932364
...,...,...,...,...,...,...,...,...,...
37491553,30-APR-18 00:00:00,6651088,8,2676,27350,27350,27350,27350,2172259
44644531,04-MAY-18 00:00:00,6651088,30,1989,28628,28628,28628,28628,2693290
44001571,02-MAY-18 00:00:00,6651088,32,1992,28787,28787,28787,28787,2868347
38832489,30-APR-18 00:00:00,6651088,79,987,32378,32378,32378,32378,2172259


In [41]:
# Append trip dataframe to leave2
leave2 = leave2.append(tripMove)

  leave2 = leave2.append(tripMove)


In [42]:
leave2.shape

(19393514, 9)

In [43]:
# Drop rows with tripMove from leave1
leave1.drop(leave1[leave1['TRIPID'] == 6288438].index, inplace = True)

In [44]:
leave1.shape

(19393328, 9)

In [45]:
leave1.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
19969441,03-MAR-18 00:00:00,6288425,19,2062,82805,82805,82805,82805,2406873
27931010,03-MAR-18 00:00:00,6288425,52,795,84924,84924,84924,84924,2406873
19969764,03-MAR-18 00:00:00,6288425,10,4569,82352,82352,82352,82352,2406873
19969779,03-MAR-18 00:00:00,6288425,11,7658,82364,82364,82364,82364,2406873
27931170,03-MAR-18 00:00:00,6288425,4,2042,82124,82124,82124,82124,2406873


<h3>2 to 3:</h3>

In [46]:
leave2.TRIPID.value_counts()[6651088]

135

In [47]:
leave3.TRIPID.value_counts()[6651088]

283

In [48]:
# Grabbing trip 6651088 from leave3
tripMove = leave2 [leave2['TRIPID'] == 6651088]

tripMove.shape

(135, 9)

In [49]:
leave3

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
37497811,30-APR-18 00:00:00,6651088,12,2121,27558,27558,27558,27558,2172259
44015990,02-MAY-18 00:00:00,6651088,27,2716,28392,28392,28392,28392,2868347
43602310,01-MAY-18 00:00:00,6651088,67,1599,31698,31698,31698,31698,2172300
44271498,03-MAY-18 00:00:00,6651088,1,4795,27000,27000,27000,27000,1932309
37516259,30-APR-18 00:00:00,6651088,34,1994,28980,28980,28980,28980,2172259
...,...,...,...,...,...,...,...,...,...
56599825,27-JUN-18 00:00:00,7110198,30,762,42211,42211,42211,42211,1000582
56623323,27-JUN-18 00:00:00,7110198,5,811,40004,40004,40004,40004,1000582
57404784,29-JUN-18 00:00:00,7110198,36,435,42551,42551,42551,42551,1000296
58879315,28-JUN-18 00:00:00,7110198,53,2031,43473,43473,43473,43473,1000578


In [50]:
# Append trip dataframe to leave3
leave3 = leave3.append(tripMove)

  leave3 = leave3.append(tripMove)


In [51]:
leave3.shape

(19393556, 9)

In [52]:
# Drop rows with tripMove from leave2
leave2.drop(leave2[leave2['TRIPID'] == 6651088].index, inplace = True)

In [53]:
leave2.shape

(19393379, 9)

In [54]:
leave2.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
18749270,26-FEB-18 00:00:00,6288438,10,4692,26244,26244,26244,26244,2868356
18081673,28-FEB-18 00:00:00,6288438,82,2622,31943,31943,31943,31943,1932364
18253354,28-FEB-18 00:00:00,6288438,38,619,27948,27948,27948,27948,1932364
17818731,27-FEB-18 00:00:00,6288438,5,1258,26035,26035,26035,26035,2868356
18232791,28-FEB-18 00:00:00,6288438,29,665,27305,27305,27305,27305,1932364


<h3>3 to 4:</h3>

In [55]:
leave3.TRIPID.value_counts()[7110198]

57

In [56]:
leave4.TRIPID.value_counts()[7110198]

237

In [57]:
# Grabbing trip 7110198 from leave3
tripMove = leave3 [leave3['TRIPID'] == 7110198]

tripMove.shape

(57, 9)

In [58]:
leave4

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
56345236,26-JUN-18 00:00:00,7110198,48,2021,43116,43116,43116,43116,1000302
58916136,27-JUN-18 00:00:00,7110198,14,6059,40745,40745,40745,40745,1000582
56345138,26-JUN-18 00:00:00,7110198,33,2008,42380,42380,42380,42380,1000302
58890403,29-JUN-18 00:00:00,7110198,4,810,39962,39962,39962,39962,1000296
56807793,27-JUN-18 00:00:00,7110198,20,846,41588,41588,41588,41588,1000582
...,...,...,...,...,...,...,...,...,...
76131694,27-AUG-18 00:00:00,7502493,3,229,54117,54117,54117,54117,1932370
77843761,31-AUG-18 00:00:00,7502493,11,4432,54591,54591,54591,54591,1932370
76906162,29-AUG-18 00:00:00,7502493,38,2804,57009,57009,57009,57009,1932370
77848485,31-AUG-18 00:00:00,7502493,16,46,54812,54812,54812,54812,1932370


In [59]:
# Append trip dataframe to leave4
leave4 = leave4.append(tripMove)

  leave4 = leave4.append(tripMove)


In [60]:
leave4.shape

(19393478, 9)

In [61]:
# Drop rows with tripMove from leave3
leave3.drop(leave3[leave3['TRIPID'] == 7110198].index, inplace = True)

In [62]:
leave3.shape

(19393499, 9)

In [63]:
leave3.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
37491553,30-APR-18 00:00:00,6651088,8,2676,27350,27350,27350,27350,2172259
44644531,04-MAY-18 00:00:00,6651088,30,1989,28628,28628,28628,28628,2693290
44001571,02-MAY-18 00:00:00,6651088,32,1992,28787,28787,28787,28787,2868347
38832489,30-APR-18 00:00:00,6651088,79,987,32378,32378,32378,32378,2172259
43598285,01-MAY-18 00:00:00,6651088,62,1595,31483,31483,31483,31483,2172300


<h3>4 to 5:</h3>

In [64]:
leave4.TRIPID.value_counts()[7502493]

77

In [65]:
leave5.TRIPID.value_counts()[7502493]

112

In [66]:
# Grabbing trip 7502493 from leave4
tripMove = leave4 [leave4['TRIPID'] == 7502493]

tripMove.shape

(77, 9)

In [67]:
leave5

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
76491067,28-AUG-18 00:00:00,7502493,14,7603,54710,54710,54710,54710,1932370
76322295,27-AUG-18 00:00:00,7502493,17,47,54891,54891,54891,54891,1932370
76510200,28-AUG-18 00:00:00,7502493,25,350,56029,56029,56029,56029,1932370
76329266,28-AUG-18 00:00:00,7502493,5,230,54257,54257,54257,54257,1932370
76117608,27-AUG-18 00:00:00,7502493,33,390,56703,56703,56703,56703,1932370
...,...,...,...,...,...,...,...,...,...
85334676,11-SEP-18 00:00:00,8089321,16,2154,31045,31045,31045,31045,2693187
85334677,11-SEP-18 00:00:00,8089321,17,6245,31078,31078,31078,31078,2693187
85334678,11-SEP-18 00:00:00,8089321,18,4671,31158,31158,31158,31158,2693187
85334679,11-SEP-18 00:00:00,8089321,19,4672,31231,31231,31231,31231,2693187


In [68]:
# Append trip dataframe to leave5
leave5 = leave5.append(tripMove)

  leave5 = leave5.append(tripMove)


In [69]:
leave5.shape

(19393498, 9)

In [70]:
# Drop rows with tripMove from leave4
leave4.drop(leave4[leave4['TRIPID'] == 7502493].index, inplace = True)

In [71]:
leave4.shape

(19393401, 9)

In [72]:
leave4.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
56599825,27-JUN-18 00:00:00,7110198,30,762,42211,42211,42211,42211,1000582
56623323,27-JUN-18 00:00:00,7110198,5,811,40004,40004,40004,40004,1000582
57404784,29-JUN-18 00:00:00,7110198,36,435,42551,42551,42551,42551,1000296
58879315,28-JUN-18 00:00:00,7110198,53,2031,43473,43473,43473,43473,1000578
57629768,29-JUN-18 00:00:00,7110198,24,756,41820,41820,41820,41820,1000296


<h3>5 to 6:</h3>

In [73]:
leave5.TRIPID.value_counts()[8089321]

742

In [74]:
leave6.TRIPID.value_counts()[8089321]

128

In [75]:
# Grabbing trip 8089321 from leave6
tripMove = leave6 [leave6['TRIPID'] == 8089321]

tripMove.shape

(128, 9)

In [76]:
leave5

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
76491067,28-AUG-18 00:00:00,7502493,14,7603,54710,54710,54710,54710,1932370
76322295,27-AUG-18 00:00:00,7502493,17,47,54891,54891,54891,54891,1932370
76510200,28-AUG-18 00:00:00,7502493,25,350,56029,56029,56029,56029,1932370
76329266,28-AUG-18 00:00:00,7502493,5,230,54257,54257,54257,54257,1932370
76117608,27-AUG-18 00:00:00,7502493,33,390,56703,56703,56703,56703,1932370
...,...,...,...,...,...,...,...,...,...
76131694,27-AUG-18 00:00:00,7502493,3,229,54117,54117,54117,54117,1932370
77843761,31-AUG-18 00:00:00,7502493,11,4432,54591,54591,54591,54591,1932370
76906162,29-AUG-18 00:00:00,7502493,38,2804,57009,57009,57009,57009,1932370
77848485,31-AUG-18 00:00:00,7502493,16,46,54812,54812,54812,54812,1932370


In [77]:
# Append trip dataframe to leave6
leave5 = leave5.append(tripMove)

  leave5 = leave5.append(tripMove)


In [78]:
leave5.shape

(19393626, 9)

In [79]:
# Drop rows with tripMove from leave5
leave6.drop(leave6[leave6['TRIPID'] == 8089321].index, inplace = True)

In [80]:
leave6.shape

(19393293, 9)

In [81]:
leave6.tail()

Unnamed: 0,DAYOFSERVICE,TRIPID,PROGRNUMBER,STOPPOINTID,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,VEHICLEID
113795534,23-DEC-18 00:00:00,8592207,64,3913,67391,67391,67391,67391,3265681
114168242,26-DEC-18 00:00:00,8592207,43,3947,66487,66487,66487,66487,3265682
113781124,23-DEC-18 00:00:00,8592207,13,1450,65040,65040,65040,65040,3265681
115865288,30-DEC-18 00:00:00,8592207,27,2213,65798,65798,65798,65798,2868369
113764483,23-DEC-18 00:00:00,8592207,3,494,64063,64063,64063,64063,3265681


## Send Parts to CSVs

In [82]:
# Send leave1 to csv:
leave1.to_csv('v3_leave1_LT_2018.csv', index=False)

## Continued in 'v3a_leave1_data_prep.ipynb'

In [83]:
# Send leave2 to csv:
leave2.to_csv('v3_leave2_LT_2018.csv', index=False)

## Continued in 'v3a_leave2_data_prep.ipynb'

In [84]:
# Send leave3 to csv:
leave3.to_csv('v3_leave3_LT_2018.csv', index=False)

## Continued in 'v3a_leave3_data_prep.ipynb'

In [85]:
# Send leave4 to csv:
leave4.to_csv('v3_leave4_LT_2018.csv', index=False)

## Continued in 'v3a_leave4_data_prep.ipynb'

In [86]:
# Send leave5 to csv:
leave5.to_csv('v3_leave5_LT_2018.csv', index=False)

## Continued in 'v3a_leave5_data_prep.ipynb'

In [87]:
# Send leave6 to csv:
leave6.to_csv('v3_leave6_LT_2018.csv', index=False)

## Continued in 'v3a_leave6_data_prep.ipynb'