In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
#import time

import random as random
import gmaps
from pprint import pprint
#import pycountry
#from IPython.display import (Image, HTML)
#import logging
#import math
#from scipy import stats

# Keys
from api_config import (key_openweathermap, key_gmaps)

# Incorporated citipy to determine city based on latitude and longitude
#from citipy import citipy


# Data sources to explore (so far)
* Chicago:
    * Chicago Transit Authority: https://www.transitchicago.com/developers/
        * CTA Transit Stops - General Transit Feed Specification (GTFS): "Data/access-Chicago-CTA-stops.txt"
        
    * Chicago Taxi Trips: https://www.kaggle.com/chicago/chicago-taxi-trips-bq/home
    * Chicago Taxi Rides 2016: https://www.kaggle.com/chicago/chicago-taxi-rides-2016
* Los Angeles
* New York:
* San Francisco
* Washington, D.C.


In [38]:
# Read in the Chicago CTA General Transit Feed Specification (GTFS) stops.txt file
i_file = "./access-Chicago-CTA-stops.txt"

# Data Cleaning: Force 2 columns stop_code and parent_station to be "object" (vs. 'int64')
#  since some inputs values are empty, which for some reason returns an error if these are 'int64'
i_dtypes = {'stop_id':'int64', 'stop_code':'object',
            'stop_name':'object', 'stop_desc':'object',
            'stop_lat':'float64', 'stop_lon':'float64',
            'location_type':'int64', 'parent_station':'object', 'wheelchair_boarding':'int64'
            }
c_cta_stops_df = pd.read_csv(i_file, dtype = i_dtypes)

In [39]:
c_cta_stops_df.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,location_type,parent_station,wheelchair_boarding
0,1,1,Jackson & Austin Terminal,"Jackson & Austin Terminal, Northeastbound, Bus...",41.876322,-87.774105,0,,1
1,2,2,5900 W Jackson,"5900 W Jackson, Eastbound, Southside of the St...",41.877067,-87.771318,0,,1
2,3,3,Jackson & Menard,"Jackson & Menard, Eastbound, Southside of the ...",41.876957,-87.76975,0,,1
3,4,4,5700 W Jackson,"5700 W Jackson, Eastbound, Southside of the St...",41.877024,-87.767451,0,,1
4,6,6,Jackson & Lotus,"Jackson & Lotus, Eastbound, Southeast Corner",41.876513,-87.761446,0,,1


In [50]:
# Keep keep only the rows where parent_station is NaN.
# (some rows are direction-specific 'stop_id',
#  with each 'parent_station' allowed one or more directly-specific 'stop_id' entries.
#  but for this analysis we only need to know the parent_station information
c_cta_stops_df = c_cta_stops_df.loc[ c_cta_stops_df[ 'parent_station'].isnull() ]
c_cta_stops_df.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,location_type,parent_station,wheelchair_boarding
0,1,1,Jackson & Austin Terminal,"Jackson & Austin Terminal, Northeastbound, Bus...",41.876322,-87.774105,0,,1
1,2,2,5900 W Jackson,"5900 W Jackson, Eastbound, Southside of the St...",41.877067,-87.771318,0,,1
2,3,3,Jackson & Menard,"Jackson & Menard, Eastbound, Southside of the ...",41.876957,-87.76975,0,,1
3,4,4,5700 W Jackson,"5700 W Jackson, Eastbound, Southside of the St...",41.877024,-87.767451,0,,1
4,6,6,Jackson & Lotus,"Jackson & Lotus, Eastbound, Southeast Corner",41.876513,-87.761446,0,,1


In [95]:
# Use reverse geocode lookup to find a
#  postal_code (zipcode) associated with a lat/long coor
def zipcode_from_latlong( a_lat, a_long ):
    baseurl = "https://maps.googleapis.com/maps/api/geocode/json?"
    latlong = f"latlng={a_lat},{a_long}"
    api_key = f"&key={key_gmaps}"
    
    full_url = baseurl + latlong + api_key

    # Perform a reverse geocode loopup to find the zipcode associated with this lat/long coord
    g_response = requests.get(full_url)
    g_json = g_response.json()
    
    # Traverse the results to find a zipcode for this address
    zipcode = None
    for a in r_json['results'][0]['address_components']:
        if 'postal_code' in a['types']:
            zipcode = a['long_name']
            
    # Return the zipcode that was found
    return zipcode

In [99]:
# Now, populate postal_code(zipcode) for all of the CTA stops
for si in c_cta_stops_df.index:
    c_cta_stops_df['postal_code'] = zipcode_from_latlong(
                                    c_cta_stops_df['stop_lat'][si],
                                    c_cta_stops_df['stop_lon'][si])
    


In [100]:
c_cta_stops_df.head(20)

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,location_type,parent_station,wheelchair_boarding,postal_code
0,1,1,Jackson & Austin Terminal,"Jackson & Austin Terminal, Northeastbound, Bus...",41.876322,-87.774105,0,,1,60644
1,2,2,5900 W Jackson,"5900 W Jackson, Eastbound, Southside of the St...",41.877067,-87.771318,0,,1,60644
2,3,3,Jackson & Menard,"Jackson & Menard, Eastbound, Southside of the ...",41.876957,-87.76975,0,,1,60644
3,4,4,5700 W Jackson,"5700 W Jackson, Eastbound, Southside of the St...",41.877024,-87.767451,0,,1,60644
4,6,6,Jackson & Lotus,"Jackson & Lotus, Eastbound, Southeast Corner",41.876513,-87.761446,0,,1,60644
5,7,7,5351 W Jackson,"5351 W Jackson, Eastbound, Southside of the St...",41.876552,-87.758925,0,,1,60644
6,8,8,Jackson & Lockwood,"Jackson & Lockwood, Eastbound, Southeast Corner",41.876564,-87.757313,0,,1,60644
7,9,9,Jackson & Laramie,"Jackson & Laramie, Eastbound, Southeast Corner",41.876595,-87.754615,0,,1,60644
8,10,10,Jackson & Leamington,"Jackson & Leamington, Eastbound, Southeast Corner",41.876626,-87.753168,0,,1,60644
9,11,11,5047 W Jackson,"5047 W Jackson, Eastbound, Southside of the St...",41.876651,-87.751283,0,,1,60644


In [101]:
# Save the dataframe to a csv file for later use
o_file = "./chicago_cta_stops.csv"
c_cta_stops_df.to_csv(o_file, index=False)

NameError: name 'c_cta_stops_df' is not defined