## Transit Recommender Demo Notebook

# Imports

In [1]:
import joblib
import pickle
import pandas as pd
import numpy as np
import datetime
import requests
import censusgeocode as cg
import ipywidgets as widgets
from ipywidgets import HBox, VBox
import s

# Import Transit data

In [2]:
df_bus = pd.read_csv('../data/wmata-data/bus_station_data_full.csv')
df_rail = pd.read_csv('../data/wmata-data/rail_station_data_full.csv')
df_cabi = pd.read_csv('../data/cabi-station-data/cabi_station_data_full.csv')

In [3]:
df_bus['census_index'] = df_bus['census_index'].apply(str)
df_bus['census_index'] = df_bus['census_index'].apply(lambda x: x.zfill(7))

df_rail['census_index'] = df_rail['census_index'].apply(str)
df_rail['census_index'] = df_rail['census_index'].apply(lambda x: x.zfill(7))

df_cabi['census_index'] = df_cabi['census_index'].apply(str)
df_cabi['census_index'] = df_cabi['census_index'].apply(lambda x: x.zfill(7))

# Load model

In [88]:
filename = '../JobLib/BaggingClassifier.sav'
loaded_model = joblib.load(filename)

# Load encoder and scaler

In [8]:
bg_encoder_file = open('../FeaturesModels/bg_cat_encoder_2.sav','rb')
bg_encoder = pickle.load(bg_encoder_file)
bg_encoder_file.close()

In [63]:
scaler_file = open('../FeaturesModels/scaler_3.sav','rb')
scaler = pickle.load(scaler_file)
scaler_file.close()

In [11]:
bg_encoder

LabelEncoder()

In [64]:
scaler

StandardScaler(copy=True, with_mean=True, with_std=True)

# Defining Functions

#### Geocode address and return census information

In [13]:
def addressLookup(address_input):
    address = cg.onelineaddress(address_input)

    addressLookup.block_group = address[0]['geographies']['2010 Census Blocks'][0]['BLKGRP']
    addressLookup.tract = address[0]['geographies']['2010 Census Blocks'][0]['TRACT']
    addressLookup.lat = address[0]['coordinates']['y']
    addressLookup.lon = address[0]['coordinates']['x']

#### Gather weather information

In [14]:
def darkSkyAPICall(lat,lon,date_time):

    base_url = 'https://api.darksky.net/forecast/'
    api_key = 'c9274e7c52c1a5b7e99be6f22db98855'
    exclude = 'minutely, hourly, daily, flags'
    params = {'exclude': exclude}

    lat_address = lat
    lon_address = lon

    query = "/{},{}".format(lat_address,lon_address,date_time)

    url = base_url + api_key + query

    try:
        response = requests.get(url, params=params)
    except ConnectionError:
        pass

    try:
        response_json = response.json()
    except:
        response_json = {}

    darkSkyAPICall.summary = response_json['currently']['summary']
    darkSkyAPICall.precip_intensity = response_json['currently']['precipIntensity']
    darkSkyAPICall.precip_probability = response_json['currently']['precipProbability']
    darkSkyAPICall.temp = response_json['currently']['temperature']
    darkSkyAPICall.dewpoint = response_json['currently']['dewPoint']
    darkSkyAPICall.humidity = response_json['currently']['humidity']
    darkSkyAPICall.pressure = response_json['currently']['pressure']
    darkSkyAPICall.wind_speed = response_json['currently']['windSpeed']
    darkSkyAPICall.wind_gust = response_json['currently']['windGust']
    darkSkyAPICall.wind_bearing = response_json['currently']['windBearing']
    darkSkyAPICall.cloud_cover = response_json['currently']['cloudCover']
    darkSkyAPICall.uv_index = response_json['currently']['uvIndex']
    darkSkyAPICall.visibility = response_json['currently']['visibility']

    weather = "Summary: {}".format(darkSkyAPICall.summary)
    
    #include print of weather to make sure all tests are working
    
    print(weather)

#### Transforming user input date and time data

In [17]:
def transformTimeInput(time,am_pm):
    time_transformed = time + ':00' + am_pm
    return time_transformed

def convert24(time): 
    if time[-2:] == "AM" and time[:2] == "12": 
        return "00" + time[2:-2]    
    elif time[-2:] == "AM": 
        return time[:-2] 
    elif time[-2:] == "PM" and time[:2] == "12": 
        return time[:-2] 
    else: 
        return str(int(time[:2]) + 12) + time[2:8] 

#### Transform day of week

In [18]:
def weekdayTranform(mydatetime):
    if mydatetime.weekday() == 0:
        weekdayTranform.wkday_0 = 1
        weekdayTranform.wkday_1 = 0
        weekdayTranform.wkday_2 = 0
        weekdayTranform.wkday_3 = 0
        weekdayTranform.wkday_4 = 0
        weekdayTranform.wkday_5 = 0
        weekdayTranform.wkday_6 = 0

### Time of Day

In [19]:
def timeOfDayBucket(mydatetime):
    if 23 <= mydatetime.hour:
        timeOfDayBucket.tod_num = 8

    if 0 <= mydatetime.hour < 2:
        timeOfDayBucket.tod_num = 8

    elif 2 <= mydatetime.hour < 5:
        timeOfDayBucket.tod_num = 1

    elif 5 <= mydatetime.hour < 8:
        timeOfDayBucket.tod_num = 2
        
    elif 8 <= mydatetime.hour < 11:
        timeOfDayBucket.tod_num = 3
        
    elif 11 <= mydatetime.hour < 14:
        timeOfDayBucket.tod_num = 4
        
    elif 14 <= mydatetime.hour < 17:
        timeOfDayBucket.tod_num = 5

    elif 17 <= mydatetime.hour < 20:
        timeOfDayBucket.tod_num = 6

    elif 20 <= mydatetime.hour < 23:
        timeOfDayBucket.tod_num = 7

#### Function to return transit recommendation

In [135]:
def transitOrLyft(predicted):
    
    if predicted == 'High' or predicted == 'Med-High':
        print('Take a Lyft!')

    else:
        bus_options = pd.merge(df['BlockGroup'], df_bus, how='left',
            left_on='BlockGroup', right_on='census_index')
        rail_options = pd.merge(df['BlockGroup'], df_rail, how='left',
            left_on='BlockGroup', right_on='census_index')
        capitol_bike_share_options = pd.merge(df['BlockGroup'], df_cabi, how='left',
            left_on='BlockGroup', right_on='census_index')
        
        print('Bus Options:')

        for index, row in bus_options.iterrows():
            if(pd.isnull(row['Stop_Name'])):
                print('No nearby bus options')
            else:
                print(row['Stop_Name'], row['Routes_Available'])
        print(' ')        
        print('Metro Rail Options:')

        for index, row in rail_options.iterrows():
            if(pd.isnull(row['Description'])):
               print('No nearby Metro Rail options')
            else:
               print(row['Description'], row['Station_Entrance'])

        print(' ')    
        print('Capitol Bike Share Options:')

        for index, row in capitol_bike_share_options.iterrows():
            if(pd.isnull(row['name'])):
                print('No nearby Capitol Bike Share Options')
            else:
                print(row['name'])

## Setting up widgets

#### Address input widget

In [21]:
address = widgets.Text(
    value='Ex: 640 Massachusetts Ave NW',
    placeholder='',
    description='Address: ',
    disabled=False
)

#### Date input widget

In [22]:
date = widgets.DatePicker(
    description='Pick a Date',
    disabled=False
)

#### Time input widget

In [23]:
time = widgets.Dropdown(
    options=['12:00', '12:30', '01:00', '01:30', '02:00', '02:30', '03:00',
'03:30', '04:00', '04:30','05:00', '05:30', '06:00', '06:30', '07:00', '07:30', 
'08:00', '08:30', '09:00', '09:30', '10:00', '10:30', '11:00', '11:30'],
    value='09:00',
    description='Depart Time:',
    disabled=False,
)

am_pm = widgets.Dropdown(
    options=['AM','PM'],
    value='AM',
    description='AM or PM? ',
    disabled=False,
)

# Inputs

In [25]:
address

Text(value='4120 14th st NW', description='Address: ', placeholder='')

In [27]:
date

DatePicker(value=datetime.date(2019, 6, 14), description='Pick a Date')

In [29]:
HBox([time, am_pm])

HBox(children=(Dropdown(description='Depart Time:', index=18, options=('12:00', '12:30', '01:00', '01:30', '02…

# Extract values from inputs

In [30]:
address_input = address.value + ", Washington, DC"
time_input = time.value
am_pm_input = am_pm.value
date_input = date.value

# Call Functions

#### Reformat date and time input

In [31]:
time_input_transformed = transformTimeInput(time_input,am_pm_input)
time_24 = convert24(time_input_transformed)
time_input_clean = datetime.datetime.strptime(time_24, '%H:%M:%S').time()
mydatetime = datetime.datetime.combine(date_input, time_input_clean)
date_time_input = mydatetime.isoformat()
date_time_input

'2019-06-14T21:00:00'

#### Encode day of week and time of day buckets for modeling

In [32]:
weekday = mydatetime.weekday()
weekday

4

In [33]:
day = mydatetime.day
day

14

In [34]:
month = mydatetime.month
month

6

In [35]:
timeOfDayBucket(mydatetime)

### Call API functions, print weather forecast to make sure it's working

In [36]:
addressLookup(address_input)
darkSkyAPICall(addressLookup.lat,addressLookup.lon,date_time_input)

Summary: Partly Cloudy


## Modeling!

### Reformat Block Group for encoding

In [95]:
tract_input = addressLookup.tract.rjust(6, '0')
block_group_input = addressLookup.block_group
block_group = addressLookup.tract + addressLookup.block_group
bg_cat = tract_input + ' ' + block_group_input
bg_cat

'002502 4'

### In progress: Set up dataframe of info for model input, encode and transform

In [113]:
list_values = [month, day, timeOfDayBucket.tod_num, darkSkyAPICall.humidity,
      darkSkyAPICall.temp, darkSkyAPICall.uv_index, weekday, bg_cat, block_group]

In [114]:
list_columns = ['month', 'day', 'tod_num', 'humidity', 'temperature', 'uv_index', 
                     'weekday', 'bg_cat','BlockGroup']

In [115]:
df = pd.DataFrame([list_values],columns=list_columns)

In [116]:
df_modeling = df[['month', 'day', 'tod_num', 'humidity', 'temperature', 'uv_index', 
                     'weekday', 'bg_cat']]

In [117]:
df_modeling

Unnamed: 0,month,day,tod_num,humidity,temperature,uv_index,weekday,bg_cat
0,6,14,7,0.51,66.32,4,4,002502 4


In [118]:
df_modeling['bg_cat'] = bg_encoder.transform(df_modeling['bg_cat'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [119]:
df_modeling

Unnamed: 0,month,day,tod_num,humidity,temperature,uv_index,weekday,bg_cat
0,6,14,7,0.51,66.32,4,4,125


In [120]:
scaled_df = scaler.transform(df_modeling)

In [121]:
scaled_df

array([[-0.1990505 , -0.22192035,  1.02848007, -0.75935701,  0.34399307,
         0.99531307,  0.49785785, -0.99027067]])

# Predict and return transit options

In [123]:
predicted = loaded_model.predict(df_modeling)

In [136]:
transitOrLyft(predicted)

Take a Lyft!
