# Create Traffic Predictions GeoJSON

Notebook assumes modeling pipeline has been run:
- save_dir as defined in args file has been populated with test_predictions.csv
- database has been populated with segments_selected table and time_N table, where N = time_resolution as defined in args file

1. get predictions data
2. create geojson and write to file
3. create timestamp lookup and bounding box and write to file

In [None]:
import os
import shutil
import joblib
import ast
import json
import pandas as pd
import psycopg2 as pg
import datetime as dt
from AWS import AWS
from Utility import Utility

In [None]:
# set environment
aws = None
s3_bucket_name = 'dse-cohort3-group3'
s3_dat_dir = 'PreprocessedWazeData'

# get args file
sampling_args_file = '../conf/pipeline_args.txt'
fr = open(sampling_args_file, 'r')
fa = fr.read()
file_args = ast.literal_eval(fa)

# assume save_dir already exists
save_dir = file_args['save_dir']

# assume connection file is always present
conn_str_file = file_args['conn_str_file']

In [None]:
# create subdirectory for viz files - assumes save_dir already exists
viz_dir = '{}/viz_files'.format(save_dir)

# delete and remake viz_dir if exists
if os.path.isdir(viz_dir):
    shutil.rmtree(viz_dir)

os.mkdir(viz_dir)

## 1. get data

In [None]:
# create utility object and connect to database
util = Utility(file_args)

if util.isAWS():
    aws = AWS(s3_bucket_name, s3_dat_dir)

pg_conn_str = open(conn_str_file, 'r').read()

conn = pg.connect(pg_conn_str) 
util.conn = conn

In [None]:
def mile_to_meter(mile):
    meter = int(round(mile*1609.344))
    return str(meter)

def bounding_box(nw, se):
    nw = nw.replace(" N","").replace(" W", "")
    nw = nw.split(', ')
    se = se.replace(" N","").replace(" W", "")
    se = se.split(', ')
    
    return "geom @ ST_MakeEnvelope (-{}, {}, -{}, {}) and ST_Length(geom) > 0".format(nw[1],nw[0],se[1],se[0])

def sql_radius(google_lat_lon, mile):
    lat_lon = google_lat_lon.replace(" N","").replace(" W", "")
    lat_lon = lat_lon.split(', ')
    return "ST_DWithin(geom, ST_MakePoint(" + "-"+ lat_lon[1] +"," + lat_lon[0] + ")::geography," + mile_to_meter(mile) +')'

In [None]:
# get segments
select_attributes = "segment_id, street, road_type, ST_AsGeoJSON(geom) AS geometry"
where_conditions = bounding_box(file_args['segment_queries']['bounding_box']['input_nw_corner'], 
                                file_args['segment_queries']['bounding_box']['input_se_corner'])
segments_sql = "SELECT " + select_attributes + " FROM segments_selected WHERE " + where_conditions

segments_df = pd.read_sql(segments_sql, con=conn)
print(str(len(segments_df)) + " rows")

In [None]:
# get time windows for range of test dates
times_sql = '''
select date, time, day_of_week
from time_''' + str(file_args['time_resolution']) + '''
where date >= \'''' + file_args['test_date_start'] + '''\' and date <= \'''' + file_args['test_date_end'] + '''\';'''

time_df = pd.read_sql(times_sql, con=conn)
time_df.sort_values(by=['date','time'], inplace=True)
print(str(len(time_df))+" rows")

In [None]:
# cartesian Product of segments and times
time_df['tmp'] = 1
segments_df['tmp'] = 1
segments_time_df = pd.merge(time_df, segments_df, how='outer', on=['tmp'])
print(str(len(segments_time_df))+" rows")

In [None]:
# # get actuals and predictions from database
# preds_df = pd.read_sql('select * from test_predictions;', con=conn)
# print(str(len(preds_df))+" rows")

In [None]:
# get actuals and predictions from csv
fn = os.path.join(save_dir, 'test_predictions.csv')
preds_df = pd.read_csv(fn)
preds_df['date'] = pd.to_datetime(preds_df['date'],format='%Y-%m-%d').dt.date
preds_df['time'] = pd.to_datetime(preds_df['time'],format='%H:%M:%S').dt.time
print(str(len(preds_df))+" rows")

In [None]:
#Left join preds_df onto df with cartesian product of all time slices and segments of interest
level_cols = [c for c in preds_df.columns if c.startswith('level')]
geojson_df = pd.merge(segments_time_df, preds_df[['date','time','segment_id']+level_cols], 
                      how='left', on=['segment_id','date','time'])
print(str(len(geojson_df))+" rows")

#Replace nulls with 0 for assumption of no congestion
geojson_df.update(geojson_df[level_cols].fillna(0))

In [None]:
#level_cols = ['level_max','level_max_preds_cluster_logistic_regression','level_max_preds_full_knn',
#              'level_max_preds_cluster_ensemble','level_max_preds_avg_baseline']

## 2.  create geojson

In [None]:
# create day of week dictionary - might want to make this actual date range?
dow_dict = {
    '1':'Sunday',
    '2':'Monday',
    '3':'Tuesday',
    '4':'Wednesday',
    '5':'Thursday',
    '6':'Friday',
    '7':'Saturday'
}

In [None]:
# function to extract segment data by day of week
def extract_segment_data(seg):
    seg_data_dict = {}
    for dow,dow_name in dow_dict.iteritems():
        # extract data for given segment and dow
        seg_dow_df = geojson_df.loc[(geojson_df['segment_id']==seg) & (geojson_df['day_of_week']==dow),level_cols]
        seg_data_dict[dow_name] = seg_dow_df.to_dict(orient='list')
    return seg_data_dict

In [None]:
# create dict to fill in with geojson
geojson_dict = {
    'type': 'FeatureCollection',
    'features': []
}

# fill geojson
for seg in geojson_df['segment_id'].unique():
    features_dict = {
        'type': 'Feature',
        'geometry': ast.literal_eval(segments_df[segments_df['segment_id']==seg]['geometry'].values[0]),
        'properties': {
            'segment_id': seg,
            'street': segments_df[segments_df['segment_id']==seg]['street'].values[0],
            'road_type': segments_df[segments_df['segment_id']==seg]['road_type'].values[0],
            'data': extract_segment_data(seg)
        }
    }
    geojson_dict['features'].append(features_dict)

In [None]:
# dump geojson to file
fn = os.path.join(viz_dir, 'segment_preds.geojson')
with open(fn, 'w') as f:
    json.dump(geojson_dict, f, indent=4, sort_keys=True)

In [None]:
# write geojson as js file to js folder
geojson_prefix = 'var segment_preds_geojson = '
fn = os.path.join(viz_dir, 'segment_preds_geojson.js')
with open(fn, 'w') as f:
    f.write(geojson_prefix + str(geojson_dict))

## 3. create timestamp lookup and bounding box variable files

In [None]:
# function to format string of type 'hh:mm:ss' to 'hh:mm a.m.'
def ts_conversion(ts_str):
    ts_hour = ts_str[:2]
    ts_min = ts_str[3:5]
    am_pm = 'a.m.' if int(ts_hour)<12 else 'p.m.'
    ts_hour = str((int(ts_hour)+12) % 12)
    ts_hour = '12' if ts_hour=='0' else ts_hour
    ts = '{}:{} {}'.format(ts_hour, ts_min, am_pm)
    return ts

In [None]:
# create df with all possible timestamps to ensure none are left out
time_interval = file_args['time_resolution']
ts_df = pd.DataFrame.from_dict({'time_interval':[dt.time(h, m, 0) for h in range(0,24) for m in range(0,60,time_interval)]})

In [None]:
# convert time_interval to string and write to dict
ts_df['time_interval'] = map(ts_conversion, ts_df['time_interval'].astype(str))
timestamp_dict = ts_df.to_dict()['time_interval']

In [None]:
# write timestamp_lookup as js file to js folder
ts_prefix = 'var timestamp_lookup = '
fn = os.path.join(viz_dir, 'timestamp_lookup.js')
with open(fn, 'w') as f:
    f.write(ts_prefix + str(timestamp_dict))

In [None]:
# write bounding box bounds as js file to js folder
bb_prefix = 'var bounds = '

input_nw_corner = file_args['segment_queries']['bounding_box']['input_nw_corner']
nw = input_nw_corner.replace(" N","").replace(" W", "")
nw = nw.split(', ')
nw[1] = '-'+nw[1] # add negative
nw = map(float, nw)

input_se_corner = file_args['segment_queries']['bounding_box']['input_se_corner']
se = input_se_corner.replace(" N","").replace(" W", "")
se = se.split(', ')
se[1] = '-'+se[1] # add negative
se = map(float, se)

bounds = [[nw[0],nw[1]],[se[0],se[1]]]

fn = os.path.join(viz_dir, 'boundingbox.js')
with open(fn, 'w') as f:
    f.write(bb_prefix + str(bounds))