In [1]:
import pandas as pd
import os
import glob
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from pprint import pprint
# from geopy.distance import great_circle
# from geopy.distance import vincenty

from haversine import haversine
%pylab inline



Populating the interactive namespace from numpy and matplotlib


In [2]:
!ls ../../cohort2/output_unzipped/meta/2015/d11

d11_text_meta_2015_01_01.txt d11_text_meta_2015_07_31.txt
d11_text_meta_2015_02_06.txt d11_text_meta_2015_08_01.txt
d11_text_meta_2015_02_20.txt d11_text_meta_2015_08_04.txt
d11_text_meta_2015_04_03.txt d11_text_meta_2015_08_11.txt
d11_text_meta_2015_05_23.txt d11_text_meta_2015_08_21.txt
d11_text_meta_2015_06_03.txt d11_text_meta_2015_09_03.txt
d11_text_meta_2015_06_18.txt d11_text_meta_2015_09_18.txt
d11_text_meta_2015_06_25.txt d11_text_meta_2015_09_24.txt
d11_text_meta_2015_06_26.txt d11_text_meta_2015_10_23.txt
d11_text_meta_2015_07_22.txt d11_text_meta_2015_12_17.txt


In [3]:
!head -n 5 ../../cohort2/output_unzipped/meta/2015/d11/d11_text_meta_2015_01_01.txt

ID	Fwy	Dir	District	County	City	State_PM	Abs_PM	Latitude	Longitude	Length	Type	Lanes	Name	User_ID_1	User_ID_2	User_ID_3	User_ID_4
1100313	5	N	11	73	66000	R22.324	22.215	32.79154	-117.20716		FR	1	CLAIREMONT DR	151			
1100323	5	N	11	73	66000	R34.142	34.033	32.950106	-117.243571		FR	2	DEL MAR HTS RD	129			
1100326	5	N	11	73	66000	R36.232	36.123	32.979357	-117.252719		FR	2	VIA DE LA VALLE	130			
1100330	5	N	11	73	66000	R37.37	37.261	32.995494	-117.25635		FR	2	LOMAS SANTA FE	341			


In [4]:
def freeway_stations(freeway, direction, year=2015, data=None,
                     meta_dir='../../cohort2/output_unzipped/meta/%s/d11/*text_meta_*.txt',
                     debug=False):
    """
    To use this function the parameters freeway and direction are required.
    
    The current implementation assumes that the last entry in the year is the correct one.
    Further analysis will be needed to determine if that's true or not.
    
    :param str freeway: The freeway number as a string
    :param str direction: The direction of the freeway. IE: N, S, E, W
    :param str year: The inputted will force the function to grab a particular year
    :param Dataframe data: If provided will assume that the dataframe has already been loaded and will use
        it to return the appropriate data. Dataframe should have the columns that are expected in the metadata files
    :param str meta_dir: The directory to use for loading the metadata. Note: The %s is required since the year is
        an expected folder in the directory structure
    :param bool debug: If enabled will output debug output.  Default: False
    """
    def calc_haversine(point, data_frame):
        result = data_frame.apply(lambda x: haversine((point.Latitude, point.Longitude), (x.Latitude, x.Longitude)))
        return result.idxmin()
    
    if not isinstance(freeway, int):
        try:
            freeway = int(freeway)
        except ValueError:
            raise RuntimeError("freeway needs to be an int. Value Found: %s" % freeway)
        
    valid_directions = ['N', 'S', 'E', 'W']
    if direction not in valid_directions:
        raise RuntimeError("Invalid direction")
        
    if 2008 > year or year > 2016:
        raise RuntimeError("Invalid year: %s" % year)

    if direction == "N":
#         sort_order = ('Longitude', False)
        sort_order = ('Abs_PM', True)        
    elif direction == "S":
#         sort_order = ('Longitude', True)
        sort_order = ('Abs_PM', False)        
    elif direction == "E":
#         sort_order = ('Latitude', False)
        sort_order = ('Abs_PM', True)        
    elif direction == "W":
#         sort_order = ('Latitude', True)
        sort_order = ('Abs_PM', False)         

    if not data:
        meta_dir = meta_dir % year
        meta_files = glob.glob(meta_dir)
        meta_file_list = []
        for meta_file in meta_files:
            date = str('_'.join(meta_file.split('_')[4:7])).split('.')[0]
            df = pd.read_table(meta_file, index_col=None, header=0)
            date_col = pd.Series([date] * len(df))
            df['file_date'] = date_col
            # drop rows that are missing latitude / longitude values TODO: determine if this is ok.
            df.dropna(inplace=True, subset=['Latitude', 'Longitude'], how='any')
            meta_file_list.append(df)
        if debug:
            print meta_files
        meta_frame = pd.concat(meta_file_list).drop_duplicates(subset='ID', keep='last')
    else:
        meta_frame = data

    ret = meta_frame[(meta_frame.Fwy == freeway) & (meta_frame.Dir == direction)]\
        .sort_values(by=sort_order[0], ascending=sort_order[1])
    ret.index = np.arange(0, ret.shape[0])
    
    return ret

In [16]:
def graph_it(dataframe_to_graph, group_markers=True, html_filename='mthood.html'):
    """
    This function is the graphing function for follium
    
    :param pandas.DataFrame dataframe_to_graph: The meta dataframe to graph
    :param bool group_markers: True to group the marker, else will add them individually
    :param string html_filename: the location to save html version of the map to
    """
    first = True
    for item in dataframe_to_graph.iterrows():
        if first:
            first = False
            map_1 = folium.Map(location=[item[1]['Latitude'], item[1]['Longitude']], zoom_start=12,
                       tiles='Stamen Terrain')
            marker_cluster = folium.MarkerCluster().add_to(map_1)
            if group_markers:
                part_of = marker_cluster
            else:
                part_of = map_1

        folium.Marker([item[1]['Latitude'], item[1]['Longitude']], popup="%s %s lat: %s lon: %s"
                      % (item[0], item[1]['Name'], item[1]['Latitude'], item[1]['Longitude']))\
            .add_to(part_of)
    map_1.save(html_filename)
    return map_1

In [18]:
data = freeway_stations(52, 'W')
map_data = graph_it(data, group_markers=False)
map_data

In [22]:
data = freeway_stations(15, 'S')
map_data = graph_it(data)
map_data