In [1]:
# Library Imports.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Allows plots to appear directly in the notebook.
%matplotlib inline

from patsy import dmatrices
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score 

In [2]:
# Read CSV files into Data Frames:
weather = pd.read_csv('cleaned_2018_OpenWeather.csv', keep_default_na=True, delimiter=',', skipinitialspace=True)
trips = pd.read_csv('trips_2018.csv', keep_default_na=True, delimiter=',', skipinitialspace=True)

In [3]:
weather.shape

(26280, 29)

In [4]:
weather.dtypes

dt                       int64
dt_iso                  object
timezonetext            object
timezone                 int64
city_name               object
lat                    float64
lon                    float64
temp                   float64
visibility             float64
dew_point              float64
feels_like             float64
temp_min               float64
temp_max               float64
pressure                 int64
sea_level              float64
grnd_level             float64
humidity                 int64
wind_speed             float64
wind_deg                 int64
wind_gust              float64
rain_1h                float64
rain_3h                float64
snow_1h                float64
snow_3h                float64
clouds_all               int64
weather_id               int64
weather_main            object
weather_description     object
weather_icon            object
dtype: object

In [5]:
trips.shape

(1048575, 16)

In [6]:
trips.dtypes

DATASOURCE          object
DAYOFSERVICE        object
TRIPID               int64
LINEID              object
ROUTEID             object
DIRECTION            int64
PLANNEDTIME_ARR      int64
PLANNEDTIME_DEP      int64
ACTUALTIME_ARR     float64
ACTUALTIME_DEP     float64
BASIN               object
TENDERLOT          float64
SUPPRESSED         float64
JUSTIFICATIONID    float64
LASTUPDATE          object
NOTE                object
dtype: object

In [7]:
weather.head()

Unnamed: 0,dt,dt_iso,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,2.3,,,,75,501,Rain,moderate rain,10n
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,1.51,,,,75,501,Rain,moderate rain,10n
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,0.64,,,,75,500,Rain,light rain,10n
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,,0.17,,,,75,500,Rain,light rain,10n
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,,,,,,75,803,Clouds,broken clouds,04n


In [8]:
weather2 = weather.rename(columns={'dt_iso': 'DAYOFSERVICE'})

In [9]:
weather2.head()

Unnamed: 0,dt,DAYOFSERVICE,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,2.3,,,,75,501,Rain,moderate rain,10n
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,1.51,,,,75,501,Rain,moderate rain,10n
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,0.64,,,,75,500,Rain,light rain,10n
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,,0.17,,,,75,500,Rain,light rain,10n
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,,,,,,75,803,Clouds,broken clouds,04n


In [10]:
trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,BASIN,TENDERLOT,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,BasDef,,,,2/28/2018 12:05,",2967409,"
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,BasDef,,,,2/28/2018 12:05,",2580260,"
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,BasDef,,,,2/28/2018 12:05,",2448968,"
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,BasDef,,,,2/28/2018 12:05,",3094242,"
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,BasDef,,,,2/28/2018 12:05,",2526331,"


In [11]:
trips['planDep_time'] = (trips['PLANNEDTIME_DEP'] / 3600).round()

In [12]:
trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,BASIN,TENDERLOT,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE,planDep_time
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,BasDef,,,,2/28/2018 12:05,",2967409,",24.0
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,BasDef,,,,2/28/2018 12:05,",2580260,",7.0
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,BasDef,,,,2/28/2018 12:05,",2448968,",9.0
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,BasDef,,,,2/28/2018 12:05,",3094242,",15.0
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,BasDef,,,,2/28/2018 12:05,",2526331,",23.0


In [13]:
trips[['xDOS', 'hour']] = trips.DAYOFSERVICE.str.split(' ', expand=True)
trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,BASIN,TENDERLOT,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE,planDep_time,xDOS,hour
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,BasDef,,,,2/28/2018 12:05,",2967409,",24.0,2/7/2018,0:00
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,BasDef,,,,2/28/2018 12:05,",2580260,",7.0,2/7/2018,0:00
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,BasDef,,,,2/28/2018 12:05,",2448968,",9.0,2/7/2018,0:00
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,BasDef,,,,2/28/2018 12:05,",3094242,",15.0,2/7/2018,0:00
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,BasDef,,,,2/28/2018 12:05,",2526331,",23.0,2/7/2018,0:00


In [14]:
trips[['xMonth', 'xDay', 'xYear']] = trips.xDOS.str.split('/', expand=True)
trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,...,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE,planDep_time,xDOS,hour,xMonth,xDay,xYear
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,...,,,2/28/2018 12:05,",2967409,",24.0,2/7/2018,0:00,2,7,2018
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,...,,,2/28/2018 12:05,",2580260,",7.0,2/7/2018,0:00,2,7,2018
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,...,,,2/28/2018 12:05,",2448968,",9.0,2/7/2018,0:00,2,7,2018
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,...,,,2/28/2018 12:05,",3094242,",15.0,2/7/2018,0:00,2,7,2018
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,...,,,2/28/2018 12:05,",2526331,",23.0,2/7/2018,0:00,2,7,2018


In [15]:
trips['planDep_time'] = np.where(trips['planDep_time'] == 24, 0, trips['planDep_time'])

In [16]:
trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,...,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE,planDep_time,xDOS,hour,xMonth,xDay,xYear
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,...,,,2/28/2018 12:05,",2967409,",0.0,2/7/2018,0:00,2,7,2018
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,...,,,2/28/2018 12:05,",2580260,",7.0,2/7/2018,0:00,2,7,2018
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,...,,,2/28/2018 12:05,",2448968,",9.0,2/7/2018,0:00,2,7,2018
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,...,,,2/28/2018 12:05,",3094242,",15.0,2/7/2018,0:00,2,7,2018
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,...,,,2/28/2018 12:05,",2526331,",23.0,2/7/2018,0:00,2,7,2018


In [17]:
trips['planDep_time'] = trips['planDep_time'].astype('int')

In [18]:
trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,...,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE,planDep_time,xDOS,hour,xMonth,xDay,xYear
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,...,,,2/28/2018 12:05,",2967409,",0,2/7/2018,0:00,2,7,2018
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,...,,,2/28/2018 12:05,",2580260,",7,2/7/2018,0:00,2,7,2018
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,...,,,2/28/2018 12:05,",2448968,",9,2/7/2018,0:00,2,7,2018
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,...,,,2/28/2018 12:05,",3094242,",15,2/7/2018,0:00,2,7,2018
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,...,,,2/28/2018 12:05,",2526331,",23,2/7/2018,0:00,2,7,2018


In [21]:
trips['planDep_time'] = trips['planDep_time'].astype('string')

In [22]:
trips['DOS_ID'] = trips.xMonth + trips.xDay + trips.xYear + trips.planDep_time
trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,...,JUSTIFICATIONID,LASTUPDATE,NOTE,planDep_time,xDOS,hour,xMonth,xDay,xYear,DOS_ID
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,...,,2/28/2018 12:05,",2967409,",0,2/7/2018,0:00,2,7,2018,2720180
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,...,,2/28/2018 12:05,",2580260,",7,2/7/2018,0:00,2,7,2018,2720187
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,...,,2/28/2018 12:05,",2448968,",9,2/7/2018,0:00,2,7,2018,2720189
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,...,,2/28/2018 12:05,",3094242,",15,2/7/2018,0:00,2,7,2018,27201815
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,...,,2/28/2018 12:05,",2526331,",23,2/7/2018,0:00,2,7,2018,27201823


In [23]:
trips.drop('xDOS', axis=1, inplace=True)
trips.drop('hour', axis=1, inplace=True)
trips.drop('xMonth', axis=1, inplace=True)
trips.drop('xDay', axis=1, inplace=True)
trips.drop('xYear', axis=1, inplace=True)

trips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,BASIN,TENDERLOT,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE,planDep_time,DOS_ID
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,BasDef,,,,2/28/2018 12:05,",2967409,",0,2720180
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,BasDef,,,,2/28/2018 12:05,",2580260,",7,2720187
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,BasDef,,,,2/28/2018 12:05,",2448968,",9,2720189
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,BasDef,,,,2/28/2018 12:05,",3094242,",15,27201815
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,BasDef,,,,2/28/2018 12:05,",2526331,",23,27201823


In [24]:
weather2[['xDOS', 'hour']] = weather2.DAYOFSERVICE.str.split(' ', expand=True)
weather2.head()

Unnamed: 0,dt,DAYOFSERVICE,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon,xDOS,hour
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,,,75,501,Rain,moderate rain,10n,1/1/2017,0:00
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,,,75,501,Rain,moderate rain,10n,1/1/2017,1:00
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,,,75,500,Rain,light rain,10n,1/1/2017,2:00
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,,,,75,500,Rain,light rain,10n,1/1/2017,3:00
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,,,,75,803,Clouds,broken clouds,04n,1/1/2017,4:00


In [25]:
weather2[['xMonth', 'xDay', 'xYear']] = weather2.xDOS.str.split('/', expand=True)
weather2.head()

Unnamed: 0,dt,DAYOFSERVICE,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,clouds_all,weather_id,weather_main,weather_description,weather_icon,xDOS,hour,xMonth,xDay,xYear
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,75,501,Rain,moderate rain,10n,1/1/2017,0:00,1,1,2017
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,75,501,Rain,moderate rain,10n,1/1/2017,1:00,1,1,2017
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,75,500,Rain,light rain,10n,1/1/2017,2:00,1,1,2017
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,75,500,Rain,light rain,10n,1/1/2017,3:00,1,1,2017
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,75,803,Clouds,broken clouds,04n,1/1/2017,4:00,1,1,2017


In [27]:
weather2[['xHour', 'xMin']] = weather2.hour.str.split(':', expand=True)
weather2.head()

Unnamed: 0,dt,DAYOFSERVICE,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,weather_main,weather_description,weather_icon,xDOS,hour,xMonth,xDay,xYear,xHour,xMin
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,Rain,moderate rain,10n,1/1/2017,0:00,1,1,2017,0,0
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,Rain,moderate rain,10n,1/1/2017,1:00,1,1,2017,1,0
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,Rain,light rain,10n,1/1/2017,2:00,1,1,2017,2,0
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,Rain,light rain,10n,1/1/2017,3:00,1,1,2017,3,0
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,Clouds,broken clouds,04n,1/1/2017,4:00,1,1,2017,4,0


In [28]:
weather2['DOS_ID'] = weather2.xMonth + weather2.xDay + weather2.xYear + weather2.xHour
weather2.head()

Unnamed: 0,dt,DAYOFSERVICE,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,weather_description,weather_icon,xDOS,hour,xMonth,xDay,xYear,xHour,xMin,DOS_ID
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,moderate rain,10n,1/1/2017,0:00,1,1,2017,0,0,1120170
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,moderate rain,10n,1/1/2017,1:00,1,1,2017,1,0,1120171
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,light rain,10n,1/1/2017,2:00,1,1,2017,2,0,1120172
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,light rain,10n,1/1/2017,3:00,1,1,2017,3,0,1120173
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,broken clouds,04n,1/1/2017,4:00,1,1,2017,4,0,1120174


In [29]:
weather2.drop('xDOS', axis=1, inplace=True)
weather2.drop('hour', axis=1, inplace=True)
weather2.drop('xMonth', axis=1, inplace=True)
weather2.drop('xDay', axis=1, inplace=True)
weather2.drop('xYear', axis=1, inplace=True)
weather2.drop('xHour', axis=1, inplace=True)
weather2.drop('xMin', axis=1, inplace=True)

weather2.head()

Unnamed: 0,dt,DAYOFSERVICE,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon,DOS_ID
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,2.3,,,,75,501,Rain,moderate rain,10n,1120170
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,1.51,,,,75,501,Rain,moderate rain,10n,1120171
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,0.64,,,,75,500,Rain,light rain,10n,1120172
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,0.17,,,,75,500,Rain,light rain,10n,1120173
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,,,,,75,803,Clouds,broken clouds,04n,1120174


In [30]:
tripWeather = pd.merge(trips, weather2, how="left", on=["DOS_ID"])

In [31]:
tripWeather.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE_x,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,DB,2/7/2018 0:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,...,,,,,,20,801,Clouds,few clouds,02n
1,DB,2/7/2018 0:00,6262138,25B,25B_271,2,30517,26460,32752.0,,...,,,,,,20,801,Clouds,few clouds,02n
2,DB,2/7/2018 0:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,...,,,,,,75,803,Clouds,broken clouds,04d
3,DB,2/7/2018 0:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,...,,0.2,,,,90,300,Drizzle,light intensity drizzle,09d
4,DB,2/7/2018 0:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,...,,,,,,75,300,Drizzle,light intensity drizzle,09n


In [32]:
# Send merged df to csv:
tripWeather.to_csv('tripWeather.csv', index=False)