## Get Street Names
---
**Code Adapted from:** [Mitchell Bohman, Nour Zahlan, and Masiur Abik](https://github.com/mchbmn/radio-to-location) and [Joseph Hopkins, Carol, Chiu, Anthony Chapman, Kwamae Delva](https://github.com/delvakwa/police_radio_to_mapping)

#### Import Libraries

In [93]:
import spacy
import re
import pandas as pd
from spacy import displacy
from spacy.attrs import LOWER 
from collections import Counter
from spacy.matcher import Matcher
import numpy as np
import usaddress
nlp = spacy.load('en')

In [94]:
pd.options.display.max_colwidth = 1000

In [95]:
# Refresh stored variables from previous notebooks
%store -r

#### Open Saved File and Print First 5 Rows

In [96]:
df = pd.read_csv('./data/transcripts.csv')
df.head()

Unnamed: 0,transcripts,confidence,tokens
0,903 Cloud hole David Cloud hall because thank you sounds like you have a run in 801 Groban parking because we do we actually have a couple days and see you stand like this for that going to be quality priority 4018 Roi,0.780831,"['903', 'cloud', 'hole', 'david', 'cloud', 'hall', 'because', 'thank', 'you', 'sounds', 'like', 'you', 'have', 'a', 'run', 'in', '801', 'groban', 'parking', 'because', 'we', 'do', 'we', 'actually', 'have', 'a', 'couple', 'days', 'and', 'see', 'you', 'stand', 'like', 'this', 'for', 'that', 'going', 'to', 'be', 'quality', 'priority', '4018', 'roi']"
1,and I am going to throw safe I'll be going oh please,0.865382,"['and', 'i', 'am', 'going', 'to', 'throw', 'safe', ""i'll"", 'be', 'going', 'oh', 'please']"
2,having a psychotic episode in front of the house breaking things and pushing here to back to searching up three I don't want this boy bye,0.705537,"['having', 'a', 'psychotic', 'episode', 'in', 'front', 'of', 'the', 'house', 'breaking', 'things', 'and', 'pushing', 'here', 'to', 'back', 'to', 'searching', 'up', 'three', 'i', ""don't"", 'want', 'this', 'boy', 'bye']"
3,are the same as in a 3-1 one average because there's a male that Shadow Boxing on the sidewalk give us a raise or clothing description and there's a picture attached to the toes the business across the street what's the word,0.751208,"['are', 'the', 'same', 'as', 'in', 'a', '3-1', 'one', 'average', 'because', ""there's"", 'a', 'male', 'that', 'shadow', 'boxing', 'on', 'the', 'sidewalk', 'give', 'us', 'a', 'raise', 'or', 'clothing', 'description', 'and', ""there's"", 'a', 'picture', 'attached', 'to', 'the', 'toes', 'the', 'business', 'across', 'the', 'street', ""what's"", 'the', 'word']"
4,country your way will be in the area searching the enemy might include for DUI,0.660583,"['country', 'your', 'way', 'will', 'be', 'in', 'the', 'area', 'searching', 'the', 'enemy', 'might', 'include', 'for', 'dui']"


In [97]:
df.shape

(17, 3)

---

In [98]:
# function to extract locations using spaCy pre trained labels
def location_extraction(string_in):
    doc = nlp(string_in)
    locations = []
    # loop through every entity in the transcript
    for X in doc.ents:
        if (X.label_ == 'FAC') or (X.label_ == 'GPE'):
            locations.append(X.text)
    if len(locations) != 0:
        return locations
    return None

# Add a column with the extracted locations
df['location_extraction'] = df['transcripts'].map(location_extraction)

In [99]:
df.head(10)

Unnamed: 0,transcripts,confidence,tokens,location_extraction
0,903 Cloud hole David Cloud hall because thank you sounds like you have a run in 801 Groban parking because we do we actually have a couple days and see you stand like this for that going to be quality priority 4018 Roi,0.780831,"['903', 'cloud', 'hole', 'david', 'cloud', 'hall', 'because', 'thank', 'you', 'sounds', 'like', 'you', 'have', 'a', 'run', 'in', '801', 'groban', 'parking', 'because', 'we', 'do', 'we', 'actually', 'have', 'a', 'couple', 'days', 'and', 'see', 'you', 'stand', 'like', 'this', 'for', 'that', 'going', 'to', 'be', 'quality', 'priority', '4018', 'roi']",
1,and I am going to throw safe I'll be going oh please,0.865382,"['and', 'i', 'am', 'going', 'to', 'throw', 'safe', ""i'll"", 'be', 'going', 'oh', 'please']",
2,having a psychotic episode in front of the house breaking things and pushing here to back to searching up three I don't want this boy bye,0.705537,"['having', 'a', 'psychotic', 'episode', 'in', 'front', 'of', 'the', 'house', 'breaking', 'things', 'and', 'pushing', 'here', 'to', 'back', 'to', 'searching', 'up', 'three', 'i', ""don't"", 'want', 'this', 'boy', 'bye']",
3,are the same as in a 3-1 one average because there's a male that Shadow Boxing on the sidewalk give us a raise or clothing description and there's a picture attached to the toes the business across the street what's the word,0.751208,"['are', 'the', 'same', 'as', 'in', 'a', '3-1', 'one', 'average', 'because', ""there's"", 'a', 'male', 'that', 'shadow', 'boxing', 'on', 'the', 'sidewalk', 'give', 'us', 'a', 'raise', 'or', 'clothing', 'description', 'and', ""there's"", 'a', 'picture', 'attached', 'to', 'the', 'toes', 'the', 'business', 'across', 'the', 'street', ""what's"", 'the', 'word']",
4,country your way will be in the area searching the enemy might include for DUI,0.660583,"['country', 'your', 'way', 'will', 'be', 'in', 'the', 'area', 'searching', 'the', 'enemy', 'might', 'include', 'for', 'dui']",
5,Joyce we have them because there's a search for him I'm Peyton and I got a lot of static Anthony four times seven,0.657435,"['joyce', 'we', 'have', 'them', 'because', ""there's"", 'a', 'search', 'for', 'him', ""i'm"", 'peyton', 'and', 'i', 'got', 'a', 'lot', 'of', 'static', 'anthony', 'four', 'times', 'seven']",
6,that's realization for me for any reason yeah 105 I'll find you somebody to L6 Chrysler manual,0.800576,"[""that's"", 'realization', 'for', 'me', 'for', 'any', 'reason', 'yeah', '105', ""i'll"", 'find', 'you', 'somebody', 'to', 'l6', 'chrysler', 'manual']",
7,have you ever No George to Adam feature extraction are located the RP I'll be making the night,0.788395,"['have', 'you', 'ever', 'no', 'george', 'to', 'adam', 'feature', 'extraction', 'are', 'located', 'the', 'rp', ""i'll"", 'be', 'making', 'the', 'night']",[RP]
8,but a device area 30 PL on the noise today's a man on tape,0.781853,"['but', 'a', 'device', 'area', '30', 'pl', 'on', 'the', 'noise', ""today's"", 'a', 'man', 'on', 'tape']",
9,Locke and Larkin going to be like a massage table it's trash full trash bag probably,0.842343,"['locke', 'and', 'larkin', 'going', 'to', 'be', 'like', 'a', 'massage', 'table', ""it's"", 'trash', 'full', 'trash', 'bag', 'probably']",


---

#### Split All List Elements into Single Strings

In [100]:
streets_list[:5]

['10th Ave', '10th St', '11th Ave', '11th St', '12th Ave']

In [101]:
len(streets_list)

2451

In [102]:
streets_list = [i.lower().split(' ') for i in streets_list]

complete_list = []
for i in streets_list:
    for j in i:
        complete_list.append(j)
complete_list = list(set(complete_list))

In [103]:
complete_list[:5]

['', 'agua', 'turner', 'caire', 'dedman']

In [104]:
len(complete_list)

2274

---

#### Create Columns with Possible Addresses

In [105]:
addresses = []
for text in df['transcripts']:
    d = {}
    list_tuples = usaddress.parse(text)
    
    numbers= []
    streets = []
    types = []
    full_address = ''
    
    for i, n in enumerate(list_tuples):
        
        if list_tuples[i][1] == 'StreetName':
            streets.append(n[0].lower())
            
        if list_tuples[i][1] == 'AddressNumber':
            numbers.append(n[0].lower())
            
        if list_tuples[i][1] == 'StreetNamePostType':
            types.append(n[0].lower())
    
    d['streets'] = streets
    d['numbers'] = numbers
    d['types'] = types
    addresses.append(d)

In [106]:
df = pd.concat([df, pd.DataFrame(addresses)], axis=1)

In [111]:
df

Unnamed: 0,transcripts,confidence,tokens,location_extraction,streets,numbers,types
0,903 Cloud hole David Cloud hall because thank you sounds like you have a run in 801 Groban parking because we do we actually have a couple days and see you stand like this for that going to be quality priority 4018 Roi,0.780831,"['903', 'cloud', 'hole', 'david', 'cloud', 'hall', 'because', 'thank', 'you', 'sounds', 'like', 'you', 'have', 'a', 'run', 'in', '801', 'groban', 'parking', 'because', 'we', 'do', 'we', 'actually', 'have', 'a', 'couple', 'days', 'and', 'see', 'you', 'stand', 'like', 'this', 'for', 'that', 'going', 'to', 'be', 'quality', 'priority', '4018', 'roi']",,"[cloud, roi]","[903, 4018]",[]
1,and I am going to throw safe I'll be going oh please,0.865382,"['and', 'i', 'am', 'going', 'to', 'throw', 'safe', ""i'll"", 'be', 'going', 'oh', 'please']",,[],[],[]
2,having a psychotic episode in front of the house breaking things and pushing here to back to searching up three I don't want this boy bye,0.705537,"['having', 'a', 'psychotic', 'episode', 'in', 'front', 'of', 'the', 'house', 'breaking', 'things', 'and', 'pushing', 'here', 'to', 'back', 'to', 'searching', 'up', 'three', 'i', ""don't"", 'want', 'this', 'boy', 'bye']",,[],[],[]
3,are the same as in a 3-1 one average because there's a male that Shadow Boxing on the sidewalk give us a raise or clothing description and there's a picture attached to the toes the business across the street what's the word,0.751208,"['are', 'the', 'same', 'as', 'in', 'a', '3-1', 'one', 'average', 'because', ""there's"", 'a', 'male', 'that', 'shadow', 'boxing', 'on', 'the', 'sidewalk', 'give', 'us', 'a', 'raise', 'or', 'clothing', 'description', 'and', ""there's"", 'a', 'picture', 'attached', 'to', 'the', 'toes', 'the', 'business', 'across', 'the', 'street', ""what's"", 'the', 'word']",,[],[],[]
4,country your way will be in the area searching the enemy might include for DUI,0.660583,"['country', 'your', 'way', 'will', 'be', 'in', 'the', 'area', 'searching', 'the', 'enemy', 'might', 'include', 'for', 'dui']",,[],[],[]
5,Joyce we have them because there's a search for him I'm Peyton and I got a lot of static Anthony four times seven,0.657435,"['joyce', 'we', 'have', 'them', 'because', ""there's"", 'a', 'search', 'for', 'him', ""i'm"", 'peyton', 'and', 'i', 'got', 'a', 'lot', 'of', 'static', 'anthony', 'four', 'times', 'seven']",,[],[],[]
6,that's realization for me for any reason yeah 105 I'll find you somebody to L6 Chrysler manual,0.800576,"[""that's"", 'realization', 'for', 'me', 'for', 'any', 'reason', 'yeah', '105', ""i'll"", 'find', 'you', 'somebody', 'to', 'l6', 'chrysler', 'manual']",,[],[],[]
7,have you ever No George to Adam feature extraction are located the RP I'll be making the night,0.788395,"['have', 'you', 'ever', 'no', 'george', 'to', 'adam', 'feature', 'extraction', 'are', 'located', 'the', 'rp', ""i'll"", 'be', 'making', 'the', 'night']",[RP],[],[],[]
8,but a device area 30 PL on the noise today's a man on tape,0.781853,"['but', 'a', 'device', 'area', '30', 'pl', 'on', 'the', 'noise', ""today's"", 'a', 'man', 'on', 'tape']",,[],[],[]
9,Locke and Larkin going to be like a massage table it's trash full trash bag probably,0.842343,"['locke', 'and', 'larkin', 'going', 'to', 'be', 'like', 'a', 'massage', 'table', ""it's"", 'trash', 'full', 'trash', 'bag', 'probably']",,[],[],[]


---

#### X

In [None]:
# numbers= []
# streets = []
# types = []

# for i in range(len(list_tuples) - 1):
#     if list_tuples[i][1] == 'StreetName':
#         streets.append(list_tuples[i][0].lower())
#     if list_tuples[i][1] == 'AddressNumber':
#         numbers.append(list_tuples[i][0].lower())
#     if list_tuples[i][1] == 'StreetNamePostType':
#         types.append(list_tuples[i][0].lower())  

# full_address = ''

# numbers = [int(i) for i in numbers]
# final_number = max(numbers)

# for s in streets:
#     if s in complete_list:
#         final_street = s
# for t in types:
#     if t in complete_list:
#         final_type = t
# full_address = full_address + str(final_number) + ' ' + final_street + ' ' + final_type

---

In [None]:
df.shape

In [None]:
# Save them as csv
df.to_csv(f"../data/Data/Long_and_Lat_result /{df['filename']}.csv")
df_context.to_csv(f"../data/Data/Long_and_Lat_result_w_o_context/{df_context['filename']}.csv")