In [1]:
import pandas as pd
import numpy as np
import pickle
import re
from matplotlib import pyplot as plt
import ImprovedOSMToGraph as mkgraph
import xml.sax
import copy
import networkx
import scipy.spatial as spa
import time
import numpy.linalg as nla
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
seattle = pd.read_csv("Featured_Seattle.csv")
baton = pd.read_csv("Featured_Baton.csv")
bloom = pd.read_csv("Featured_Bloom.csv")

In [3]:
def load_graph(filename):
    with open(filename, 'rb') as file:
        graph = pickle.load(file)
    return graph

seattle_graph = load_graph("seattle_graph")
baton_graph = load_graph("baton_rouge_graph")
bloom_graph = load_graph("bloomington_graph")

In [66]:
def gather_key_values(c_node_ids, graph, wanted_tag):
    """
        Takes in ids of nodes we're interested in and the tag we care about
        Returns the values of the nodes at that tag
    """
    per_node = []
    for n in c_node_ids:
        node_tags = graph.node[str(n)]['tags']
        if wanted_tag in node_tags:
            
            if(node_tags[wanted_tag] == "elevator"):
                per_node.append(wanted_tag + "_" + "UNKNOWN")
            else:
                per_node.append(wanted_tag + "_" + node_tags[wanted_tag])
        else:
            per_node.append(wanted_tag + "_" + "UNKNOWN")
            
    return per_node
        

In [67]:
wanted_keys = ["traffic_signals", "crossing", "access", "junction",
               "highway", "traffic_calming","stop"]

def encode(crash_data, graph, wanted_keys):
    hots = []
    for key in wanted_keys:
        key_values_list = gather_key_values(list(crash_data["NODE"]), graph, key)       
        crash_data[key] = key_values_list
        hot = pd.get_dummies(crash_data[key])
        
        crash_data = pd.concat([crash_data, hot], axis=1)

    
    return crash_data
    

In [69]:
finished_seattle = encode(seattle, seattle_graph, wanted_keys)
finished_baton = encode(baton, baton_graph, wanted_keys)
finished_bloom = encode(bloom, bloom_graph, wanted_keys)

In [71]:
finished_seattle.to_csv("Complete_Seattle.csv")

In [72]:
finished_seattle.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'LONGITUDE', 'LATITUDE', 'INJURIES',
       'SERIOUSINJURIES', 'FATALITIES', 'INCDATE', 'INCDTTM', 'NODE', '1CR',
       '2CR', '3CR', '4CR', '5CR', '6CR', '7CR', '0OneWay', '1OneWay',
       '2OneWay', '3OneWay', '4OneWay', '5OneWay', '6OneWay', '0Ingoing',
       '1Ingoing', '2Ingoing', '3Ingoing', '4Ingoing', '5Ingoing', '6Ingoing',
       '0Outgoing', '1Outgoing', '2Outgoing', '3Outgoing', '4Outgoing',
       '5Outgoing', '6Outgoing', 'traffic_signals', 'traffic_signals_UNKNOWN',
       'traffic_signals_ramp_meter', 'traffic_signals_signal', 'crossing',
       'crossing_UNKNOWN', 'crossing_controlled', 'crossing_island',
       'crossing_no', 'crossing_traffic_signals', 'crossing_uncontrolled',
       'crossing_unmarked', 'crossing_yes', 'crossing_zebra', 'access',
       'access_UNKNOWN', 'access_private', 'junction', 'junction_UNKNOWN',
       'junction_roundabout', 'highway', 'highway_UNKNOWN', 'highway_bus_stop',
       'highway_crossing', '

In [33]:
highway = gather_key_values(list(seattle["NODE"]), seattle_graph, "highway")
highway_uniques = set()
for value in highway:
    highway_uniques.add(value)
print(highway_uniques)

{'turning_loop', 'give_way', 'UNKNOWN', 'mini_roundabout', 'stop', 'traffic_signals', 'motorway_junction', 'turning_circle', 'crossing', 'bus_stop'}


In [19]:
traffic_signals= gather_key_values(list(seattle["NODE"]), seattle_graph, "traffic_signals")
signals_uniques = set()
for value in traffic_signals:
    signals_uniques.add(value)
print(signals_uniques)

{'signal', 'ramp_meter', 'UNKNOWN'}


In [22]:
crossing = gather_key_values(list(seattle["NODE"]), seattle_graph, "crossing")
crossing_uniques = set()
for value in crossing:
    crossing_uniques.add(value)
print(crossing_uniques)

{'controlled', 'uncontrolled', 'island', 'UNKNOWN', 'unmarked', 'zebra', 'traffic_signals', 'yes', 'no'}


In [23]:
access = gather_key_values(list(seattle["NODE"]), seattle_graph, "access")
access_uniques = set()
for value in access:
    access_uniques.add(value)
print(access_uniques)

{'private', 'UNKNOWN'}


In [25]:
junction = gather_key_values(list(seattle["NODE"]), seattle_graph, "junction")
junction_uniques = set()
for value in junction:
    junction_uniques.add(value)
print(junction_uniques)

{'roundabout', 'UNKNOWN'}


In [27]:
calming = gather_key_values(list(seattle["NODE"]), seattle_graph, "traffic_calming")
calming_uniques = set()
for value in calming:
    calming_uniques.add(value)
print(calming_uniques)

{'cushion', 'island', 'UNKNOWN', 'hump', 'bump', 'yes', 'chicane', 'choker'}


In [28]:
stop = gather_key_values(list(seattle["NODE"]), seattle_graph, "stop")
stop_uniques = set()
for value in stop:
    stop_uniques.add(value)
print(stop_uniques)

{'UNKNOWN', 'all'}
