# DC Criminalistics: Transit Recommender

## Imports

In [1]:
import joblib
import pickle
import pandas as pd
import numpy as np
import datetime
import requests
import censusgeocode as cg
import ipywidgets as widgets
from ipywidgets import HBox, VBox
pd.options.mode.chained_assignment = None

## Import Transit data

In [2]:
df_bus = pd.read_csv('../data/wmata-data/bus_station_data_full.csv')
df_rail = pd.read_csv('../data/wmata-data/rail_station_data_full.csv')
df_cabi = pd.read_csv('../data/cabi-station-data/cabi_station_data_full.csv')

In [3]:
df_bus['census_index'] = df_bus['census_index'].apply(str)
df_bus['census_index'] = df_bus['census_index'].apply(lambda x: x.zfill(7))

df_rail['census_index'] = df_rail['census_index'].apply(str)
df_rail['census_index'] = df_rail['census_index'].apply(lambda x: x.zfill(7))

df_cabi['census_index'] = df_cabi['census_index'].apply(str)
df_cabi['census_index'] = df_cabi['census_index'].apply(lambda x: x.zfill(7))

## Load model, encoder, and scaler

In [4]:
filename = '../model/BaggingClassifier.sav'
loaded_model = joblib.load(filename)

bg_encoder_file = open('../model/bg_cat_encoder.sav','rb')
bg_encoder = pickle.load(bg_encoder_file)
bg_encoder_file.close()

scaler_file = open('../model/scaler_final.sav','rb')
scaler = pickle.load(scaler_file)
scaler_file.close()



## Defining Functions

#### Geocode address and retrieve census tract and block

In [5]:
def addressLookup(address_input):
    address = cg.onelineaddress(address_input + ", Washington, DC")

    addressLookup.block_group = address[0]['geographies']['2010 Census Blocks'][0]['BLKGRP']
    addressLookup.tract = address[0]['geographies']['2010 Census Blocks'][0]['TRACT']
    addressLookup.lat = address[0]['coordinates']['y']
    addressLookup.lon = address[0]['coordinates']['x']

#### Retrieve weather data

In [16]:
def darkSkyAPICall(lat,lon,date_time):

    base_url = 'https://api.darksky.net/forecast/'
    api_key = 'c9274e7c52c1a5b7e99be6f22db98855'
    exclude = 'minutely, hourly, daily, flags'
    params = {'exclude': exclude}

    lat_address = lat
    lon_address = lon

    query = "/{},{}".format(lat_address,lon_address,date_time)

    url = base_url + api_key + query

    try:
        response = requests.get(url, params=params)
    except ConnectionError:
        pass

    try:
        response_json = response.json()
    except:
        response_json = {}

    darkSkyAPICall.summary = response_json['currently']['summary']
    darkSkyAPICall.precip_intensity = response_json['currently']['precipIntensity']
    darkSkyAPICall.precip_probability = response_json['currently']['precipProbability']
    darkSkyAPICall.temp = response_json['currently']['temperature']
    darkSkyAPICall.dewpoint = response_json['currently']['dewPoint']
    darkSkyAPICall.humidity = response_json['currently']['humidity']
    darkSkyAPICall.pressure = response_json['currently']['pressure']
    darkSkyAPICall.wind_speed = response_json['currently']['windSpeed']
    darkSkyAPICall.wind_gust = response_json['currently']['windGust']
    darkSkyAPICall.wind_bearing = response_json['currently']['windBearing']
    darkSkyAPICall.cloud_cover = response_json['currently']['cloudCover']
    darkSkyAPICall.uv_index = response_json['currently']['uvIndex']
    darkSkyAPICall.visibility = response_json['currently']['visibility']

    weather = "Summary: {}".format(darkSkyAPICall.summary)
    
    #include print of weather to make sure all tests are working
    
    print(weather)

#### Transforming user input date and time data

In [6]:
def cleanDateTimeInput(date,time,am_pm):
    time_transformed = time + ':00' + am_pm
    
    if time_transformed[-2:] == "AM" and time_transformed[:2] == "12": 
        cleanDateTimeInput.time24 = "00" + time_transformed[2:-2]  
        
    elif time_transformed[-2:] == "AM": 
        cleanDateTimeInput.time24 = time_transformed[:-2] 
    elif time_transformed[-2:] == "PM" and time_transformed[:2] == "12": 
        cleanDateTimeInput.time24 = time_transformed[:-2] 
    else: 
        cleanDateTimeInput.time24 = str(int(time_transformed[:2]) + 12) + time_transformed[2:8]

    cleanDateTimeInput.time24_clean = datetime.datetime.strptime(cleanDateTimeInput.time24, '%H:%M:%S').time()
    
    cleanDateTimeInput.datetime_combined = datetime.datetime.combine(date, cleanDateTimeInput.time24_clean)
    cleanDateTimeInput.date_time_clean = cleanDateTimeInput.datetime_combined.isoformat()
    
    return cleanDateTimeInput.date_time_clean
    return cleanDateTimeInput.time24

#### Categorize time of day into appropriate bucket

In [7]:
def timeOfDayBucket(mydatetime):
    if 23 <= mydatetime.hour:
        timeOfDayBucket.tod_num = 8

    if 0 <= mydatetime.hour < 2:
        timeOfDayBucket.tod_num = 8

    elif 2 <= mydatetime.hour < 5:
        timeOfDayBucket.tod_num = 1

    elif 5 <= mydatetime.hour < 8:
        timeOfDayBucket.tod_num = 2
        
    elif 8 <= mydatetime.hour < 11:
        timeOfDayBucket.tod_num = 3
        
    elif 11 <= mydatetime.hour < 14:
        timeOfDayBucket.tod_num = 4
        
    elif 14 <= mydatetime.hour < 17:
        timeOfDayBucket.tod_num = 5

    elif 17 <= mydatetime.hour < 20:
        timeOfDayBucket.tod_num = 6

    elif 20 <= mydatetime.hour < 23:
        timeOfDayBucket.tod_num = 7

#### Function to return transit recommendation

In [8]:
def transitOrLyft(predicted):
    
    if predicted == 'High' or predicted == 'Med-High':
        print('Take a Lyft!')

    else:
        bus_options = pd.merge(df['BlockGroup'], df_bus, how='left',
            left_on='BlockGroup', right_on='census_index')
        rail_options = pd.merge(df['BlockGroup'], df_rail, how='left',
            left_on='BlockGroup', right_on='census_index')
        capitol_bike_share_options = pd.merge(df['BlockGroup'], df_cabi, how='left',
            left_on='BlockGroup', right_on='census_index')
        
        print('Bus Options:')

        for index, row in bus_options.iterrows():
            if(pd.isnull(row['Stop_Name'])):
                print('No nearby bus options')
            else:
                print(row['Stop_Name'], row['Routes_Available'])
        print(' ')        
        print('Metro Rail Options:')

        for index, row in rail_options.iterrows():
            if(pd.isnull(row['Description'])):
               print('No nearby Metro Rail options')
            else:
               print(row['Description'], row['Station_Entrance'])

        print(' ')    
        print('Capitol Bike Share Options:')

        for index, row in capitol_bike_share_options.iterrows():
            if(pd.isnull(row['name'])):
                print('No nearby Capitol Bike Share Options')
            else:
                print(row['name'])

## Setting up widgets

#### Address input widget

In [11]:
address = widgets.Text(
    value='Ex: 640 Massachusetts Ave NW',
    placeholder='',
    description='Address: ',
    disabled=False
)

date = widgets.DatePicker(
    description='Pick a Date',
    disabled=False
)

time = widgets.Dropdown(
    options=['12:00', '12:30', '01:00', '01:30', '02:00', '02:30', '03:00',
'03:30', '04:00', '04:30','05:00', '05:30', '06:00', '06:30', '07:00', '07:30', 
'08:00', '08:30', '09:00', '09:30', '10:00', '10:30', '11:00', '11:30'],
    value='09:00',
    description='Depart Time:',
    disabled=False,
)

am_pm = widgets.Dropdown(
    options=['AM','PM'],
    value='AM',
    description='AM or PM? ',
    disabled=False,
)

# User Inputs

In [23]:
address

Text(value='800 Independence ave SW', description='Address: ', placeholder='')

In [13]:
date

DatePicker(value=None, description='Pick a Date')

In [21]:
HBox([time, am_pm])

HBox(children=(Dropdown(description='Depart Time:', index=19, options=('12:00', '12:30', '01:00', '01:30', '02…

## Call functions, encode, scale, and model

In [26]:
cleanDateTimeInput(date.value,time.value,am_pm.value)

addressLookup(address.value)
darkSkyAPICall(addressLookup.lat,addressLookup.lon,cleanDateTimeInput.date_time_clean)

timeOfDayBucket(cleanDateTimeInput.time24_clean)

weekday = cleanDateTimeInput.datetime_combined.weekday()
day = cleanDateTimeInput.datetime_combined.day
month = cleanDateTimeInput.datetime_combined.month

tract_input = addressLookup.tract.rjust(6, '0')
block_group_input = addressLookup.block_group
block_group = addressLookup.tract + addressLookup.block_group
bg_cat = tract_input + ' ' + block_group_input

list_values = [bg_cat, weekday, timeOfDayBucket.tod_num, darkSkyAPICall.uv_index, 
               darkSkyAPICall.temp, day, block_group]
list_columns = ['bg_cat', 'weekday', 'tod_num', 'uv_index',
                'temperature', 'day', 'BlockGroup']

df = pd.DataFrame([list_values],columns=list_columns)

df_modeling = df[['bg_cat', 'weekday', 'tod_num', 'uv_index',
                'temperature', 'day']]

df_modeling['bg_cat'] = bg_encoder.transform(df_modeling['bg_cat'])

scaled_df = scaler.transform(df_modeling)

predicted = loaded_model.predict(df_modeling)

transitOrLyft(predicted)

Summary: Clear
Bus Options:
7TH ST SW + C ST SW 74, A9
M ST SW + 4TH ST SW 74, A9
7TH ST SW + I ST SW 52, 52v2, 74, V1
7TH ST SW + MAINE AVE SW 52, 52v1
MAINE AVE SW + 9TH ST SW 52, 52v2
MAINE AVE SW + 7TH ST SW 74
12TH + D ST N/S N/B 52, 52v1
7TH ST SW + G ST SW 52, 52v1, 74, V1
7TH ST SW + E ST SW 74, V1
D ST SW + 12TH ST SW V1
D ST SW + 12TH ST SW V1
D ST SW + 7TH ST SW V1
12TH ST SW + C ST SW 52, 52v2, V1
C ST SW + 12TH ST SW V1
C ST SW + 14TH ST SW V1
14TH ST SW + C ST SW 11Y, 11Yv2, 16Cv1, 16E
INDEPENDENCE AVE SW + 12TH ST SW 16Cv1, 52, 52v2, V1
INDEPENDENCE AVE SW + 14TH ST SW 16Cv1, 52, 52v2, V1
D ST SW + 9TH ST SW 5A, V1
D ST SW + 9TH ST SW V1
MAINE AVE SW + 9TH ST SW 52, 52v1
M ST SW + 6TH ST SW 74
3RD ST SW + L ST SW P6, P6v4, V1
6TH ST SW + K ST SW 74
I ST SW + 7TH ST SW V1
I ST SW + WESLEY PL SW P6, P6v4, V1
I ST SW + MAKEMIE PL SW V1
7TH ST SW + I ST SW 52, 52v1, 74, V1
 
Metro Rail Options:
Station entrance adjacent to Department of Agriculture building located southwest