#### Import Libraries for Scraping

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction import stop_words

### Road Suffix List

In [2]:
road_suffix_df = pd.read_csv("../Data/Road_Suffix_List.csv")

In [3]:
suffix_list = list(set(road_suffix_df["Primary Street Name"].str.lower().tolist()))

In [4]:
suffix_dict = {}
for roads in road_suffix_df.index:
    name = road_suffix_df.loc[roads, "Primary Street Name"]
    suffix_dict[name] = road_suffix_df[road_suffix_df["Primary Street Name"] == name]["Abbreviations"].str.lower().to_list()

In [5]:
suffix_dict["ROUTE"] = ["route", "rte"]

In [6]:
def check_road_suffix(text, suffix_dict):
    text_suffix = []
    for suffix, abbrevs in suffix_dict.items():
        for abbrev in abbrevs:
            if abbrev in text:
                text_suffix.append(suffix)
    return text_suffix

### Scrape Main Interstate Highways

In [7]:
interstate_df = pd.read_csv("../Data/Interstate_List.csv")

In [8]:
interstate_list = list(set(interstate_df["Interstate Highway"].tolist()))

In [9]:
interstate_dict = {}
for interstate in interstate_list:
    interstate_dict[interstate] = {"states" : interstate_df[interstate_df["Interstate Highway"] == interstate]["State"].to_list(),
                                   "names" : []}

In [10]:
for roads, values in interstate_dict.items():
    lst = []
    lower = roads.lower()
    space = lower.replace("-", " ")
    remove = lower.replace("-", "")
    lst.append(lower)
    lst.append(space)
    lst.append(remove)
    base_list = [lower, space, remove]
    replacements = [{"i" : "interstate"}]
    for dicts in replacements:
        keys = list(dicts.keys())[0]
        values = list(dicts.values())[0]
        for bases in base_list:
            lst.append(bases.replace(keys, values))
    interstate_dict[roads]["names"] = lst

In [11]:
def get_state_interstates(state, i_dict):
    new_dict = {}
    for roads, values in i_dict.items():
        if(state in i_dict[roads]["states"]):
            new_dict[roads] = i_dict[roads]["names"]
    return new_dict

In [12]:
virginia_interstates = get_state_interstates("Virginia", interstate_dict)
texas_interstates = get_state_interstates("Texas", interstate_dict)
ncarolina_interstates = get_state_interstates("North Carolina", interstate_dict)
scarolina_interstates = get_state_interstates("South Carolina", interstate_dict)
florida_interstates = get_state_interstates("Florida", interstate_dict)

## Auxiliary Interstate Roads

In [13]:
aux_df = pd.read_csv("../Data/Auxiliary_List.csv")

In [14]:
aux_list = list(set(aux_df["Interstate Route"].tolist()))

In [15]:
aux_dict = {}

In [16]:
for aux_routes in aux_list:
    aux_dict[aux_routes] = {"states" : [], "names" : []}

In [17]:
for highways, values in aux_dict.items():
    values["states"] = aux_df[aux_df["Interstate Route"] == highways]["State"].to_list()

In [18]:
for roads, values in aux_dict.items():
    lst = []
    lower = roads.lower()
    space = lower.replace("-", " ")
    remove = lower.replace("-", "")
    lst.append(lower)
    lst.append(space)
    lst.append(remove)
    base_list = [lower, space, remove]
    replacements = [{"i" : "interstate"}]
    for dicts in replacements:
        keys = list(dicts.keys())[0]
        values = list(dicts.values())[0]
        for bases in base_list:
            lst.append(bases.replace(keys, values))
    aux_dict[roads]["names"] = lst

In [19]:
virginia_aux = get_state_interstates("Virginia", aux_dict)
ncarolina_aux = get_state_interstates("North Carolina", aux_dict)
scarolina_aux = get_state_interstates("South Carolina", aux_dict)
florida_aux = get_state_interstates("Florida", aux_dict)
texas_aux = get_state_interstates("Texas", aux_dict)

## US Routes

In [20]:
us_routes_df = pd.read_csv("../Data/US_Routes.csv")

In [21]:
us_routes_dict = {}
for roads in us_routes_df.index:
    us_routes_dict[us_routes_df.loc[roads, "US Routes"]] = []

In [22]:
for roads in us_routes_df.index:
    name = us_routes_df.loc[roads, "US Routes"]
    lst = []
    lower = name.lower()
    dash = lower.replace(" ", "-")
    remove = lower.replace(" ", "")
    lst.append(lower)
    lst.append(dash)
    lst.append(remove)
    base_list = [lower, dash, remove]
    replacements = [{"us" : "u.s."},
                   {"us" : "us route"},
                   {"us" : "u.s. route"},
                   {"us" : "us rte"},
                   {"us" : "u.s. rte"},
                   {"us" : "us-route"},
                   {"us" : "u.s.-route"},
                   {"us" : "us-rte"},
                   {"us" : "u.s.-rte"}]
    for dicts in replacements:
        keys = list(dicts.keys())[0]
        values = list(dicts.values())[0]
        for bases in base_list:
            lst.append(bases.replace(keys, values))
    us_routes_dict[name] = lst    

## State Routes

In [23]:
state_roads_df = pd.read_csv("../Data/State_Road_List.csv")

In [24]:
texas_roads_df = state_roads_df[state_roads_df["State"] == "Texas"][["Road Type", "Name"]]
ncarolina_roads_df = state_roads_df[state_roads_df["State"] == "North Carolina"][["Road Type", "Name"]]
scarolina_roads_df = state_roads_df[state_roads_df["State"] == "South Carolina"][["Road Type", "Name"]]
florida_roads_df = state_roads_df[state_roads_df["State"] == "Florida"][["Road Type", "Name"]]
virginia_roads_df = state_roads_df[state_roads_df["State"] == "Virginia"][["Road Type", "Name"]]

### Create dictionaries for each state that contain state roads as keys and list "names" to be populated with alternative spellings

In [25]:
texas_roads_dict = {}
for roads in texas_roads_df.index:
    texas_roads_dict[texas_roads_df.loc[roads, "Name"]] = []

In [26]:
ncarolina_roads_dict = {}
for roads in ncarolina_roads_df.index:
    ncarolina_roads_dict[ncarolina_roads_df.loc[roads, "Name"]] = []

In [27]:
scarolina_roads_dict = {}
for roads in scarolina_roads_df.index:
    scarolina_roads_dict[scarolina_roads_df.loc[roads, "Name"]] = []

In [28]:
florida_roads_dict = {}
for roads in florida_roads_df.index:
    florida_roads_dict[florida_roads_df.loc[roads, "Name"]] = []

In [29]:
virginia_roads_dict = {}
for roads in virginia_roads_df.index:
    virginia_roads_dict[virginia_roads_df.loc[roads, "Name"]] = []

### Create list of alternative highway/road spellings for each state highway

In [30]:
for roads in texas_roads_df.index:
    name = texas_roads_df.loc[roads, "Name"]
    road_type = texas_roads_df.loc[roads, "Road Type"]
    lst = []
    lower = name.lower()
    dash = lower.replace(" ", "-")
    remove = lower.replace(" ", "")
    lst.append(lower)
    lst.append(dash)
    lst.append(remove)
    base_list = [lower, dash, remove]
    if(road_type == "State Highway"):
        for abbrevs in suffix_dict["HIGHWAY"]:
            lst.append(lower.replace("sh", "state " + abbrevs.lower()))
            lst.append(lower.replace("sh", "st " + abbrevs.lower()))
            lst.append(dash.replace("sh", "state " + abbrevs.lower()))
            lst.append(dash.replace("sh", "st " + abbrevs.lower()))
            lst.append(remove.replace("sh", "state " + abbrevs.lower()))
            lst.append(remove.replace("sh", "st " + abbrevs.lower()))
            lst.append(lower.replace("sh", "state " + abbrevs.lower() + "s"))
            lst.append(lower.replace("sh", "st " + abbrevs.lower() + "s"))
            lst.append(dash.replace("sh", "state " + abbrevs.lower() + "s"))
            lst.append(dash.replace("sh", "st " + abbrevs.lower() + "s"))
            lst.append(remove.replace("sh", "state " + abbrevs.lower() + "s"))
            lst.append(remove.replace("sh", "st " + abbrevs.lower() + "s"))
    replacements = [{"loop" : "sl"},
                    {"fm" : "farm to market"},
                    {"fm" : "farm-to-market"},
                    {"fm" : "farm to market road"},
                    {"fm" : "farm-to-market road"},
                    {"fm" : "farm to market roads"},
                    {"fm" : "farm-to-market roads"},
                    {"pr" : "park road"},
                    {"recreational road" : "re"},
                    {"recreational road" : "recreational roads"},
                    {"ranch road" : "rr"}]
    for dicts in replacements:
        keys = list(dicts.keys())[0]
        values = list(dicts.values())[0]
        for bases in base_list:
            lst.append(bases.replace(keys, values))
    texas_roads_dict[name] = lst    

In [31]:
for roads in ncarolina_roads_df.index:
    name = ncarolina_roads_df.loc[roads, "Name"]
    road_type = ncarolina_roads_df.loc[roads, "Road Type"]
    lst = []
    lower = name.lower()
    dash = lower.replace(" ", "-")
    remove = lower.replace(" ", "")
    lst.append(lower)
    lst.append(dash)
    lst.append(remove)
    base_list = [lower, dash, remove]
    for abbrevs in suffix_dict["HIGHWAY"]:
        for bases in base_list:
            lst.append(bases.replace("nc", abbrevs))
    ncarolina_roads_dict[name] = lst    

In [32]:
for roads in scarolina_roads_df.index:
    name = scarolina_roads_df.loc[roads, "Name"]
    road_type = scarolina_roads_df.loc[roads, "Road Type"]
    lst = []
    lower = name.lower()
    dash = lower.replace(" ", "-")
    remove = lower.replace(" ", "")
    lst.append(lower)
    lst.append(dash)
    lst.append(remove)
    base_list = [lower, dash, remove]
    for abbrevs in suffix_dict["HIGHWAY"]:
        for bases in base_list:
            lst.append(bases.replace("sc", abbrevs))
    scarolina_roads_dict[name] = lst    

In [33]:
for roads in virginia_roads_df.index:
    name = virginia_roads_df.loc[roads, "Name"]
    road_type = virginia_roads_df.loc[roads, "Road Type"]
    lst = []
    lower = name.lower()
    dash = lower.replace(" ", "-")
    remove = lower.replace(" ", "")
    lst.append(lower)
    lst.append(dash)
    lst.append(remove)
    base_list = [lower, dash, remove]
    for bases in base_list:
        lst.append(bases.replace("sr", "state route"))
        lst.append(bases.replace("sr", "st route"))
        lst.append(bases.replace("sr", "state rte"))
        lst.append(bases.replace("sr", "st rte "))
    virginia_roads_dict[name] = lst

In [34]:
for roads in florida_roads_df.index:
    name = florida_roads_df.loc[roads, "Name"]
    road_type = florida_roads_df.loc[roads, "Road Type"]
    lst = []
    lower = name.lower()
    dash = lower.replace(" ", "-")
    remove = lower.replace(" ", "")
    lst.append(lower)
    lst.append(dash)
    lst.append(remove)
    base_list = [lower, dash, remove]
    replacements = [{"sr" : "state road"},
                    {"sr" : "state rd"},
                    {"sr" : "st road"},
                    {"sr" : "st rd"},
                    {"causeway" : "causwa"},
                    {"causeway" : "cswy"},
                    {"bridge" : "brdge"},
                    {"bridge" : "brg"},
                   {"road" : "rd"},
                   {"expresway" : "exp"},
                   {"expresway" : "expr"},
                   {"expresway" : "express"},
                   {"expresway" : "expw"},
                   {"expresway" : "expy"}]
    for dicts in replacements:
        keys = list(dicts.keys())[0]
        values = list(dicts.values())[0]
        for bases in base_list:
            lst.append(bases.replace(keys, values))
    florida_roads_dict[name] = lst    

In [35]:
def string_from_list(values_list):
    values_string = ""
    if(len(values_list) > 0):
        for i in range(len(values_list)):  
            values_string = str(values_string) + str(values_list[i]) + " "
    else:
        values_string = "None"
    return values_string

## Create dictionary containing a list of roads relevant to each state

In [36]:
master_road_dict = {"Florida" : [florida_interstates, florida_aux, us_routes_dict, florida_roads_dict],
                   "Virginia" : [virginia_interstates, virginia_aux, us_routes_dict, virginia_roads_dict],
                   "North Carolina" : [ncarolina_interstates, ncarolina_aux, us_routes_dict, ncarolina_roads_dict],
                   "South Carolina" : [scarolina_interstates, scarolina_aux, us_routes_dict, scarolina_roads_dict],
                   "Texas" : [texas_interstates, texas_aux, us_routes_dict, texas_roads_dict]}

### Define function to check for presence of road in string
This function iterates through text to create list of all roads that are found in the string, and returns a dataframe that contains 3 columns:

    1) name of the road
    2) which word the road is contained within (when text is split into words by a space)
    3) index position of the road within the word

Roads that are substrings of other roads are removed, but roads that appear more than once in separate parts of the text are not removed

In [83]:
def check_roads(state, text):
# Create variables including dictionary of roads to search over, a list of words in each tweet,
# and lists to track the presence of roads inside text
    state_roads = master_road_dict[state]
    words = text.split()
    relevant_roads = []
    road_list_position = []
    road_substring_indices = []
    index_tracker = 0
# Loop over the dictionary of roads to check if that road is present in the tweet, and keep track of its relative position 
# within the tweet
    for dicts in state_roads:
        for road, abbrevs in dicts.items():
            for abbrev in abbrevs:
                for i in range(len(words)):
                    if((abbrev in words[i]) & (road not in relevant_roads)):
                        relevant_roads.append(road)
                        road_substring_indices.append(words[i].find(abbrev))
                        road_list_position.append(i)
    
    print(road_list_position)
    print(road_substring_indices)
    print(relevant_roads)
# Loop over newly created list of roads found within the tweet, and delete any roads that are a substring of another road.
# For instance, US-10 should only return US-10 and exclude US-1 even though US-1 is found within US-10.
# First, a list of indices to delete is created - first set of loops. Then, new lists are created where only those not to be
# deleted are added - second loop

# First loop
    delete_list = []
    for i in range(len(relevant_roads)):
        for j in range(len(relevant_roads)):
            if ((i != j) &
                (j not in delete_list) &
                (road_substring_indices[i] == road_substring_indices[j]) &
               (road_list_position[i] == road_list_position[j]) &
               (road))
# New lists
    clean_relevant_roads = []
    clean_road_substring_indices = []
    clean_road_list_position = []
# Second loop
    for i in range(len(relevant_roads)):
        if i not in delete_list:
            clean_relevant_roads.append(relevant_roads[i])
            clean_road_substring_indices.append(road_substring_indices[i])
            clean_road_list_position.append(road_list_position[i])
# Create dataframe such that 1) all lists can be sorted in the same way
# and 2) to calculate the overall positioning of a found road within the tweet as a combination of:
# a) which word the road was found in and b) its index within the word.
# This accounts for typos where spaces are not used in between words.
# For instance, in the text "Accident on I-90 between I-93andi-95", we would want to differentiate the relative position
# of i-93 and i-95 within the text, even though both are part of the same word.
    roads_df = pd.DataFrame()
    roads_df["relevant_roads"] = clean_relevant_roads
    roads_df["road_substring_indices"] = clean_road_substring_indices
    roads_df["road_list_position"] = clean_road_list_position
    roads_df = roads_df.sort_values(by=["road_list_position", "road_substring_indices"])
    roads_df = roads_df.reset_index(drop=True)
    roads_df["overall_road_position"] = roads_df.index + 1
# After getting values as a dataframe for each text, below code and use of above function
# takes each series and converts to a list such that text can be stored inside of a dataframe containing many tweets
    roads_string = string_from_list(roads_df["relevant_roads"])
    roads_order = string_from_list(roads_df["overall_road_position"])
    has_road = 1 if roads_string != "None" else 0
    num_roads = roads_df["relevant_roads"].size
    return [roads_string, roads_order, has_road, num_roads]

In [84]:
check_roads("Texas","fm-2855 southbound at us-90 - high water - status: cleared at 6:40 pm")

[3, 3, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0]
['US 9', 'US 90', 'FM 2', 'FM 28', 'FM 285', 'FM 2855']
[1]


['FM 2 FM 28 FM 285 FM 2855 US 9 ', '1 2 3 4 5 ', 1, 5]

### Cities Towns and Counties Matching

In [38]:
cities_list = pd.read_csv("../Data/Cities_List.csv")

In [39]:
counties_list = pd.read_csv("../Data/us_counties.csv",  encoding='ISO-8859-1')

In [40]:
cities_list["City"] = cities_list["City"].str.lower()

In [41]:
counties_list["County or equivalent"] = counties_list["County or equivalent"].str.lower()
relevant_counties = counties_list[(counties_list["State or district"] == "Virginia") |
                                   (counties_list["State or district"] == "North Carolina") |
                                  (counties_list["State or district"] == "South Carolina") |
                                  (counties_list["State or district"] == "Florida") |
                                  (counties_list["State or district"] == "Texas")]

In [42]:
state_list = list(set(relevant_counties["State or district"].to_list()))
state_areas_dict = {}
for state in state_list:
    state_areas_dict[state] = {"counties" : {},
                               "cities" : {}}

In [43]:
for state in state_list:
    counties = relevant_counties[relevant_counties["State or district"] == state]["County or equivalent"]
    cities = cities_list[cities_list["State"] == state]["City"]
    for county in counties:
        state_areas_dict[state]["counties"][county] = [county,
                                                       county.replace("county", "cty"),
                                                       county.replace("county", "cnty"),
                                                       county.replace(" county", "county"),
                                                       county.replace(" county", "cty"),
                                                       county.replace(" county", "cnty"),
                                                      county.replace(" county", "")]
    for city in cities:
        state_areas_dict[state]["cities"][city] = [city]        

In [44]:
def check_city_county(state, state_areas_dict, text, area_type):
    areas_list = []
    areas_list_position = []
    areas_substring_indices = []
    words = text.split()
    for area in state_areas_dict[state][area_type]:
        for abbrev in state_areas_dict[state][area_type][area]:
            for i in range(len(words)):
                if abbrev in words[i]:
                    areas_list.append(area)
                    areas_list_position.append(i)
                    areas_substring_indices.append(words[i].find(abbrev))
    areas_df = pd.DataFrame()
    areas_df[area_type + "_list"] = areas_list
    areas_df[area_type + "_list_position"] = areas_list_position
    areas_df[area_type + "_substring_indices"] = areas_substring_indices
    areas_df = areas_df.sort_values(by = [area_type + "_list_position", area_type + "_substring_indices"])
    areas_df = areas_df.reset_index(drop=True)
    areas_df["overall_" + area_type +"_position"] = areas_df.index + 1
    
    area_string = string_from_list(areas_df[area_type + "_list"])
    area_order = string_from_list(areas_df[area_type + "_list_position"])
    has_area = 1 if area_string != "None" else 0
    num_areas = areas_df[area_type + "_list"].size
    return [area_string, area_order, has_area, num_areas]

In [45]:
def check_county_word(text):
    county_abbrevs = ["county", "cty", "cnty"]
    has_county = False
    for abbrev in county_abbrevs:
        if abbrev in text:
            has_county = True
    return has_county        

### Read in Tweets from Each Hurricane

In [46]:
florence_df = pd.read_csv("../Data/florence_clean.csv")
harvey_df = pd.read_csv("../Data/harvey_clean.csv")
michael_df = pd.read_csv("../Data/michael_clean.csv")

In [49]:
def annotate(df):
    new_cols = pd.DataFrame()
    num = []
    highway_strings = []
    highway_orders = []
    highway_bools = []
    highway_lengths = []
    
    city_strings = []
    city_orders = []
    city_bools = []
    city_lengths = []
    
    county_strings = []
    county_orders = []
    county_bools = []
    county_lengths = []
    county_words=[]
    
    
    for rows in df.index:
        state = df.loc[rows,"state"]
        text = df.loc[rows,"text"]
        roads_values = check_roads(state, text)
        city_values = check_city_county(state, state_areas_dict, text, "cities")
        county_values = check_city_county(state, state_areas_dict, text, "counties")
        county_word = check_county_word(text)
        
        highway_strings.append(roads_values[0])
        highway_orders.append(roads_values[1])
        highway_bools.append(roads_values[2])
        highway_lengths.append(roads_values[3])

        city_strings.append(city_values[0])
        city_orders.append(city_values[1])
        city_bools.append(city_values[2])
        city_lengths.append(city_values[3])

        county_strings.append(county_values[0])
        county_orders.append(county_values[1])
        county_bools.append(county_values[2])
        county_lengths.append(county_values[3])
        
    new_cols["highway_string"] = highway_strings
    new_cols["highway_order"] = highway_orders
    new_cols["has_highway"] = highway_bools
    new_cols["num_highways"] = highway_lengths
    
    new_cols["county_string"] = county_strings
    new_cols["county_order"] = county_orders
    new_cols["has_county"] = county_bools
    new_cols["num_counties"] = county_lengths
    
    new_cols["city_string"] = city_strings
    new_cols["city_order"] = city_orders
    new_cols["has_city"] = city_bools
    new_cols["num_cities"] = city_lengths

    return new_cols

In [85]:
florence_new_values = annotate(florence_df)

[36, 40]
[0, 0]
['US 1', 'US 7']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[23, 23, 23]
[0, 0, 0]
['US 1', 'US 11', 'US 117']
[1]
[34, 34]
[0, 0]
['US 7', 'US 70']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[0, 0]
[0, 0]
['US 1', 'US 15']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[0, 0, 0]
[0, 0, 0]
['NC 9', 'NC 90', 'NC 902']
[1]
[0, 0, 0]
[0, 0, 0]
['NC 9', 'NC 90', 'NC 902']
[1]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[0, 0, 0]
[0, 0, 0]
['US 5', 'US 50', 'US 501']
[1]
[0, 0, 0]
[0, 0, 0]
['US 5', 'US 50', 'US 501']
[1]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[0, 0]
[0, 0]
['NC 8', 'NC 87']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[0, 0]
[0, 0]
['NC 3', 'NC 39']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[0]
[0]
['I-540']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


[0, 0, 0]
[0, 0, 0]
['US 5', 'US 50', 'US 501']
[1]
[]
[]
[]
[]
[0]
[0]
['I-40']
[]
[0]
[0]
['I-440']
[]
[0]
[0]
['I-40']
[]
[]
[]
[]
[]
[0]
[0]
['I-40']
[]
[0]
[0]
['I-40']
[]
[0]
[0]
['I-40']
[]
[0]
[0]
['I-40']
[]
[0]
[0]
['I-40']
[]
[]
[]
[]
[]
[0]
[0]
['I-95']
[]
[10]
[0]
['I-40']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[5, 8]
[0, 0]
['I-40', 'I-540']
[]
[5, 8]
[0, 0]
['I-40', 'I-540']
[]
[]
[]
[]
[]
[5, 8]
[0, 0]
['I-40', 'I-540']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[3]
[0]
['I-440']
[]
[5]
[0]
['I-40']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[5]
[0]
['I-40']
[]
[]
[]
[]
[]
[5]
[0]
['I-40']
[]
[5]
[0]
['I-40']
[]
[]
[]
[]
[]
[6]
[0]
['I-40']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[7]
[0]
['I-40']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[14, 12, 12]
[0, 0, 0]
['I-40', 'NC 8', 'NC 82']
[1]
[18, 16, 16]
[0, 0, 0]
['I-95', 'NC 4', 'NC 403']
[1]
[]
[]
[]
[]
[]
[]
[]
[]
[11]
[0]
['I-40']
[]
[]
[]
[]
[]
[4]
[0]
['I-40']
[]
[]
[]
[]
[]
[9]
[0]
['I-40']

[]
[]
[]
[]
[7]
[0]
['I-440']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[5]
[0]
['I-40']
[]
[5]
[0]
['I-440']
[]
[5]
[0]
['I-440']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[4]
[0]
['I-440']
[]
[5, 12, 5, 12]
[0, 0, 0, 0]
['US 1', 'US 11', 'US 13', 'US 117']
[1]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[25]
[0]
['I-85']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[7]
[0]
['I-440']
[]
[18, 16, 16]
[0, 0, 0]
['I-95', 'NC 4', 'NC 403']
[1]
[]
[]
[]
[]
[]
[]
[]
[]
[7]
[0]
['I-40']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[5]
[0]
['I-440']
[]
[]
[]
[]
[]
[7]
[0]
['I-40']
[]
[]
[]
[]
[]
[5]
[0]
['I-440']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[4]
[0]
['I-540']
[]
[5]
[0]
['I-40']
[]
[]
[]
[]
[]
[19]
[0]
['I-85']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[10]
[0]
['I-540']
[]
[7]
[0]
['I-40']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[5]
[0]
['I-540']
[]
[]
[]
[]
[]
[19]
[0]
['I-85']
[]
[5]
[0]
['I

[5, 5]
[0, 0]
['SC 2', 'SC 22']
[]
[5, 5]
[0, 0]
['US 3', 'US 378']
[]
[]
[]
[]
[]
[10, 10]
[0, 0]
['US 3', 'US 378']
[]
[10, 10]
[0, 0]
['US 3', 'US 378']
[]
[6, 10, 6, 10]
[0, 0, 0, 0]
['SC 2', 'SC 5', 'SC 22', 'SC 544']
[1]
[2]
[0]
['I-95']
[]
[1]
[0]
['I-95']
[]
[6, 10, 6, 10]
[0, 0, 0, 0]
['SC 2', 'SC 5', 'SC 22', 'SC 544']
[1]
[11, 11]
[0, 0]
['SC 2', 'SC 22']
[]
[3]
[0]
['I-20']
[]
[3]
[0]
['I-20']
[]
[2]
[0]
['I-20']
[]
[6, 6]
[0, 0]
['US 3', 'US 378']
[]
[10, 10]
[0, 0]
['US 3', 'US 378']
[]
[6, 6]
[0, 0]
['US 3', 'US 378']
[]
[9, 9]
[0, 0]
['US 3', 'US 378']
[]
[5, 5, 5]
[0, 0, 0]
['US 7', 'US 70', 'US 701']
[1]
[7, 7, 7]
[0, 0, 0]
['US 7', 'US 70', 'US 701']
[1]
[7, 7, 7]
[0, 0, 0]
['US 7', 'US 70', 'US 701']
[1]
[6, 6, 6]
[0, 0, 0]
['US 7', 'US 70', 'US 701']
[1]
[1]
[0]
['I-95']
[]
[1]
[0]
['I-95']
[]
[4]
[0]
['I-95']
[]
[4]
[0]
['I-95']
[]
[3]
[0]
['I-95']
[]
[4]
[0]
['I-95']
[]
[4]
[0]
['I-95']
[]
[3]
[0]
['I-95']
[]
[3]
[0]
['I-95']
[]
[4]
[0]
['I-95']
[]
[4]
[0]
['I-95

[3, 7, 7]
[0, 10, 10]
['I-85', 'SC 2', 'SC 291']
[1]
[3]
[0]
['I-385']
[]
[2]
[0]
['I-385']
[]
[2, 6, 6]
[0, 10, 10]
['I-85', 'SC 2', 'SC 291']
[1]
[2, 9, 9]
[0, 4, 4]
['I-85', 'SC 14', 'SC 146']
[1]
[2]
[0]
['I-385']
[]
[2]
[0]
['I-385']
[]
[1]
[0]
['I-385']
[]
[2, 6, 6]
[0, 4, 4]
['I-85', 'SC 14', 'SC 146']
[1]
[2, 6, 6]
[0, 4, 4]
['I-85', 'SC 14', 'SC 146']
[1]
[3]
[0]
['I-385']
[]
[3]
[0]
['I-385']
[]
[2]
[0]
['I-385']
[]
[2, 6, 6]
[0, 4, 4]
['I-85', 'SC 14', 'SC 146']
[1]
[3]
[0]
['I-385']
[]
[1, 5, 5]
[0, 4, 4]
['I-85', 'SC 14', 'SC 146']
[1]
[3]
[0]
['I-385']
[]
[2]
[0]
['I-385']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[3]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[1]
[0]
['I-26']
[]
[2]
[0]
['I-385']
[]
[2]
[0]
['I-385']
[]
[1]
[0]
['I-385']
[]
[2]
[0]
['I-85']
[]
[2]
[0]
['I-85']
[]
[2]
[0]
['I-85']
[]
[2]
[0]
['I-85']
[]
[2]
[0]
['I-85']
[]
[2]
[0]
['I-85']
[]
[1]
[0]
['I-85']
[]
[2]
[0]
['I-85']
[]
[3, 13, 13]
[0, 4, 4]
['I-85', 'SC 14', 'SC 146']
[1]
[2]
[0

[]
[]
[]
[]
[4]
[0]
['I-26']
[]
[3]
[0]
['I-26']
[]
[3]
[0]
['I-526']
[]
[2]
[0]
['I-526']
[]
[2]
[0]
['I-26']
[]
[3]
[0]
['I-526']
[]
[2]
[0]
['I-526']
[]
[1]
[0]
['I-95']
[]
[2]
[0]
['I-95']
[]
[2]
[0]
['I-95']
[]
[1]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[1]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[1]
[0]
['I-95']
[]
[1]
[0]
['I-95']
[]
[2]
[0]
['I-26']
[]
[1]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[1]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[1]
[0]
['I-26']
[]
[2]
[0]
['I-526']
[]
[2]
[0]
['I-526']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-526']
[]
[2]
[0]
['I-526']
[]
[2]
[0]
['I-26']
[]
[2]
[0]
['I-95']
[]
[2]
[0]
['I-26']
[]
[1]
[0]
['I-26']
[]
[2]
[0]
['I-95']
[]
[2]
[0]
['I-95']
[]
[1]
[0]
['I-95']
[]
[4]
[0]
['I-526']
[]
[3]
[0]
['I-526']
[]
[1]
[0]
['I-95']
[]
[4]
[0]
['I-26']
[]
[3]
[0]
['I-26']
[]
[4]
[0]
['I-95']
[]
[4]
[0]
['I-95']
[]
[4]
[0]
['I-95']
[]
[3]
[0]
['I-95']
[]
[3]
[0]
['I-526']


In [87]:
florence_new_values

Unnamed: 0,highway_string,highway_order,has_highway,num_highways,county_string,county_order,has_county,num_counties,city_string,city_order,has_city,num_cities
0,US 1 US 7,1 2,1,2,,,0,0,,,0,0
1,,,0,0,bertie county martin county,0 24,1,2,,,0,0
2,,,0,0,,,0,0,,,0,0
3,,,0,0,,,0,0,,,0,0
4,,,0,0,,,0,0,,,0,0
5,,,0,0,,,0,0,,,0,0
6,,,0,0,,,0,0,,,0,0
7,,,0,0,,,0,0,,,0,0
8,,,0,0,,,0,0,,,0,0
9,,,0,0,,,0,0,,,0,0


In [88]:
florence_new_values.to_csv("../Data/florence_new_values.csv")

In [53]:
harvey_new_values = annotate(harvey_df)

In [54]:
harvey_new_values.to_csv("../Data/harvey_new_values.csv")

In [55]:
michael_new_values = annotate(michael_df)

KeyError: 'Georgia'

In [None]:
michael_new_values.to_csv("../Data/michael_new_values.csv")