# Joining Weather Data
Now that we have collected all of our data, we need to join it into CSVs so we can use it for our model.

In [116]:
import pandas as pd
from datetime import datetime
import pytz
%matplotlib inline

In [117]:
pdf = pd.read_csv('data/utah_positive_examples.csv')
ndf = pd.read_csv('data/utah_negative_examples.csv')
wdf = pd.read_csv('data/utah_weather_2010-2018_grouped.csv')

In [118]:
pdf.head()

Unnamed: 0,timestamp,segment_id,station_id,hour,weekday,month,pre_dir,street_type,suf_dir,one_way,...,aadt,sinuosity,euclidean_length,segment_length,near_major_road,road_orient_approx,at_intersection,near_billboard,accident_counts,target
0,2016-11-26 10:00:00,1,72572024127,10,5,11,N,,,0,...,16689.0,1.0,110.198977,110.198977,0,1.022803,1,0,4.0,1
1,2017-10-16 16:00:00,1,72572024127,16,0,10,N,,,0,...,16689.0,1.0,110.198977,110.198977,0,1.022803,1,0,4.0,1
2,2010-01-16 15:00:00,1,72572024127,15,5,1,N,,,0,...,16689.0,1.0,110.198977,110.198977,0,1.022803,1,0,4.0,1
3,2013-12-16 15:00:00,1,72572024127,15,0,12,N,,,0,...,16689.0,1.0,110.198977,110.198977,0,1.022803,1,0,4.0,1
4,2015-09-04 21:00:00,2,72572024127,21,4,9,N,,,0,...,16689.0,1.0,87.835598,87.835598,0,1.187714,1,0,7.0,1


In [119]:
pdf['timestamp'] = pd.DatetimeIndex(pd.to_datetime(pdf.timestamp))\
    .tz_localize('US/Mountain',ambiguous='NaT',errors='coerce').tz_convert('utc')
ndf['timestamp'] = pd.DatetimeIndex(pd.to_datetime(ndf.timestamp))\
    .tz_localize('US/Mountain',ambiguous='NaT',errors='coerce').tz_convert('utc')
wdf['timestamp'] = pd.DatetimeIndex(pd.to_datetime(wdf.timestamp))\
    .tz_localize('utc')

In [120]:
def add_join_key(df):
    df['join_key'] = df.station_id.map(int).map(str)+df.timestamp.map(datetime.isoformat)
    df = df.set_index('join_key')
    return df

In [121]:
pdf = add_join_key(pdf)
ndf = add_join_key(ndf)
wdf = add_join_key(wdf)

In [122]:
pdf = pdf.join(wdf.drop(columns=['timestamp', 'station_id']))
ndf = ndf.join(wdf.drop(columns=['timestamp', 'station_id']))

In [123]:
tset = pdf.append(ndf)

In [124]:
tset[tset.target==0].head(50)

Unnamed: 0_level_0,timestamp,segment_id,station_id,hour,weekday,month,pre_dir,street_type,suf_dir,one_way,...,visibility,temperature,precip_depth,snow_depth,snowing,raining,foggy,icy,hailing,thunderstorm
join_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
724700931410001-01-01T00:00:00,NaT,27310,72470093141,2,6,3,E,,,0,...,,,,,,,,,,
724700931410001-01-01T00:00:00,NaT,364035,72470093141,1,6,11,,,,0,...,,,,,,,,,,
724700931410001-01-01T00:00:00,NaT,117834,72470093141,2,6,3,S,,W,0,...,,,,,,,,,,
724700931412010-01-01T07:00:00+00:00,2010-01-01 07:00:00+00:00,382101,72470093141,0,4,1,E,RD,,0,...,16093.0,-8.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
724700931412010-01-01T07:00:00+00:00,2010-01-01 07:00:00+00:00,94235,72470093141,0,4,1,,,,0,...,16093.0,-8.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
724700931412010-01-01T07:00:00+00:00,2010-01-01 07:00:00+00:00,32221,72470093141,0,4,1,S,,,0,...,16093.0,-8.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
724700931412010-01-01T08:00:00+00:00,2010-01-01 08:00:00+00:00,60227,72470093141,1,4,1,,RD,,0,...,16093.0,-7.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
724700931412010-01-01T08:00:00+00:00,2010-01-01 08:00:00+00:00,167579,72470093141,1,4,1,,,,0,...,16093.0,-7.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
724700931412010-01-01T09:00:00+00:00,2010-01-01 09:00:00+00:00,32221,72470093141,2,4,1,S,,,0,...,16093.0,-7.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
724700931412010-01-01T09:00:00+00:00,2010-01-01 09:00:00+00:00,113512,72470093141,2,4,1,,,,0,...,16093.0,-7.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [125]:
tset.to_csv('data/utah_training_set.csv')