In [22]:
import pandas as pd
import numpy as np

In [23]:
# Importing final dataset used by topic clustering model 

clean = pd.read_csv('clean.csv').drop(columns=['Unnamed: 0'])

In [3]:
clean.shape

(4328, 7)

In [4]:
clean.head()

Unnamed: 0,outlet,url,title,authors,publish_date,text,keywords
0,cbc,https://www.cbc.ca/news/covid-19,CBC News | Information about COVID-19 in Canada,'Darren Bernhardt',,Sylvan Lake to clamp down on beach crowds with...,[]
1,cbc,https://www.cbc.ca/news/local,CBC News,'Andrew Kurjata',,What you need to know about COVID-19 in Ottawa...,[]
2,cbc,https://www.cbc.ca/news/politics,Politics,'John Paul Tasker',,WE Charity contract could have been worth up t...,[]
3,cbc,https://www.cbc.ca/news/indigenous,Indigenous,'Jessica Deer',,Manitoba judge points to systemic issues in In...,[]
4,cbc,https://www.cbc.ca/news/business,Business,'Pete Evans',,Twitter says about 130 accounts were targeted ...,[]


There are 21 unique news outlets in this dataset.

In [5]:
clean['outlet'].unique()

array(['cbc', 'ctvnews', 'nationalpost', 'torontosun', 'thestar', 'cp24',
       'mapleridgenews', 'tricitynews', 'langleyadvancetimes', 'abbynews',
       'theprogress', 'northdeltareporter', 'surreynowleader',
       'vancouverobserver', 'vancourier', 'nsnews', 'richmond-news',
       'burnabynow', 'newwestrecord', 'bowenislandundercurrent',
       'The Record'], dtype=object)

**Options for location-based coordinates for Demo Day:**

1. Count number of locations in the text and find coordinates of most frequent 
    - spacy API
    
2. Use location of outlet (21 unique locations) 

**Pipeline:**

- For all text that have GPE (location) items,
        - Count occurrences of location names in text
        - Select most frequently mentioned location  **(1A)**

- For text without GPE item,
        - Select location of news outlet  **(1B)**
        
        
- Find coordinates of location **(2)**
        - use mapbox API and forward geocoding (https://docs.mapbox.com/api/search/#forward-geocoding)
        - Add noise to lat and long coordinates 

## (1) Determining location of article

In [6]:
import spacy

### Create dictionary of news outlets and location

In [7]:
def find_location(text, model):
    
    doc = model(text)
    final_loc = ''
    locations = []
    
    for ent in doc.ents:
        if (ent.label_ == 'GPE'):
            locations.append(ent.text)
            #print(ent.text, ent.start_char, ent.end_char, ent.label_)
    
    if len(locations) > 0:
        loc_counts = pd.DataFrame([[x, locations.count(x)] for x in set(locations)])
        loc_counts.columns = ['location', 'count']
        loc_counts = loc_counts.sort_values(by=['count'], ascending=False)
    
        final_loc = loc_counts.iloc[0, 0]
        
    return final_loc   

### Creating new .csv of articles with locations

In [94]:
new_clean = clean

In [9]:
# Model to use for location 

nlp = spacy.load("en_core_web_sm")

In [95]:
count_spacy = 0
count_outlet = 0

In [101]:
for i in range(len(new_clean)):
    
    # Finding location of article
    print('Row: ' + str(i))
    text_loc = find_location(new_clean.loc[i, 'text'], nlp)
    
    if text_loc=='Surrey':
        text_loc = 'Surrey, British Columbia'
    elif text_loc=='B.C.':
        text_loc = 'British Columbia'
        
    print('Location from spacy: ' + text_loc)
    
    if text_loc != '': 
        try: 
            new_clean.loc[i, 'location'] = text_loc        # (1A)
            loc = gl.geocode(text_loc)
            lat, long = loc.latitude, loc.longitude
            
            print('Coordinates from geopy: [' + str(lat) + ', ' + str(long) + ']' )
            
            new_clean.loc[i, 'lat'] = lat
            new_clean.loc[i, 'long'] = long
                
            count_spacy +=1
            
        except:
            print("Error with .")
            new_clean.loc[i, 'lat'] = 0.0
            new_clean.loc[i, 'long'] = 0.0
            
    else:
        try:
            new_clean.loc[i, 'location'] = loc_dict[new_clean.loc[i, 'outlet']]      # (1B)
            new_clean.loc[i, 'lat'] = lat_dict[new_clean.loc[i, 'outlet']] 
            new_clean.loc[i, 'long'] = long_dict[new_clean.loc[i, 'outlet']]  
            count_outlet += 1
        except:
            print("Error calling from dictionaries.")

Row: 0
Location from spacy: Alberta
Coordinates from geopy: [55.001251, -115.002136]
Row: 1
Location from spacy: Ottawa
Coordinates from geopy: [45.421106, -75.690308]
Row: 2
Location from spacy: 
Row: 3
Location from spacy: Manitoba
Coordinates from geopy: [55.001251, -97.001038]
Row: 4
Location from spacy: 
Row: 5
Location from spacy: 
Row: 6
Location from spacy: 
Row: 7
Location from spacy: N.S.
Coordinates from geopy: [24.4349856, 105.5550331]
Row: 8
Location from spacy: 
Row: 9
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 10
Location from spacy: Greece
Coordinates from geopy: [38.9953683, 21.9877132]
Row: 11
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 12
Location from spacy: 
Row: 13
Location from spacy: Moscow
Coordinates from geopy: [55.7504461, 37.6174943]
Row: 14
Location from spacy: Jordan
Coordinates from geopy: [31.1667049, 36.941628]
Row: 15
Location from spacy: Canada
Coordinates from geopy: [6

Coordinates from geopy: [55.001251, -125.002441]
Row: 101
Location from spacy: Italy
Coordinates from geopy: [42.6384261, 12.674297]
Row: 102
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 103
Location from spacy: Orchestre Métropolitain
Error with .
Row: 104
Location from spacy: Montreal
Coordinates from geopy: [45.4972159, -73.6103642]
Row: 105
Location from spacy: New Orleans
Coordinates from geopy: [29.9499323, -90.0701156]
Row: 106
Location from spacy: Victoria
Coordinates from geopy: [-36.5986096, 144.6780052]
Row: 107
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 108
Location from spacy: Cambodia
Coordinates from geopy: [13.5066394, 104.869423]
Row: 109
Location from spacy: Xoxox
Error with .
Row: 110
Location from spacy: Alessia
Coordinates from geopy: [40.6855514, 14.7325899]
Row: 111
Location from spacy: 
Row: 112
Location from spacy: Weeknd
Coordinates from geopy: [10.4273236, -85.0943199]
Row: 113
Lo

Coordinates from geopy: [45.421106, -75.690308]
Row: 199
Location from spacy: 
Row: 200
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 201
Location from spacy: Ottawa
Coordinates from geopy: [45.421106, -75.690308]
Row: 202
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 203
Location from spacy: 
Row: 204
Location from spacy: Alberta
Coordinates from geopy: [55.001251, -115.002136]
Row: 205
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 206
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 207
Location from spacy: 
Row: 208
Location from spacy: McKnight
Coordinates from geopy: [40.5550681, -80.0364448]
Row: 209
Location from spacy: Edmonton
Coordinates from geopy: [53.535411, -113.507996]
Row: 210
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 211
Location from spacy: Edmonton
Coordinates from geopy:

Coordinates from geopy: [55.001251, -125.002441]
Row: 301
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 302
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 303
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 304
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 305
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 306
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 307
Location from spacy: Langley
Coordinates from geopy: [48.3604648, 6.3258262]
Row: 308
Location from spacy: 
Row: 309
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 310
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 311
Location from spacy: South Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 312


Coordinates from geopy: [55.001251, -115.002136]
Row: 410
Location from spacy: Wisconsin
Coordinates from geopy: [44.4308975, -89.6884637]
Row: 411
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 412
Location from spacy: Edinburgh
Coordinates from geopy: [55.9533456, -3.1883749]
Row: 413
Location from spacy: U.K.
Coordinates from geopy: [54.7023545, -3.2765753]
Row: 414
Location from spacy: Selma
Coordinates from geopy: [32.4078632, -87.0207473]
Row: 415
Location from spacy: Traore
Coordinates from geopy: [16.068147, 0.0648645]
Row: 416
Location from spacy: China
Coordinates from geopy: [35.000074, 104.999927]
Row: 417
Location from spacy: Calgary
Coordinates from geopy: [51.0534234, -114.0625892]
Row: 418
Location from spacy: Calgary
Coordinates from geopy: [51.0534234, -114.0625892]
Row: 419
Location from spacy: Alberta
Coordinates from geopy: [55.001251, -115.002136]
Row: 420
Location from spacy: Alberta
Coordinates from geopy: [55.001251, -115.0021

Error with .
Row: 513
Location from spacy: Quebec
Coordinates from geopy: [52.4760892, -71.8258668]
Row: 514
Location from spacy: Regina
Coordinates from geopy: [50.44876, -104.61731]
Row: 515
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 516
Location from spacy: India
Coordinates from geopy: [22.3511148, 78.6677428]
Row: 517
Location from spacy: Wisconsin
Coordinates from geopy: [44.4308975, -89.6884637]
Row: 518
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 519
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 520
Location from spacy: OTTAWA
Coordinates from geopy: [45.421106, -75.690308]
Row: 521
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 522
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 523
Location from spacy: the City of Victoria's
Coordinates from geopy: [10.9013412, 123.071494

Coordinates from geopy: [46.500283, -66.750183]
Row: 609
Location from spacy: Colchester County
Coordinates from geopy: [45.4377329, -63.3488873]
Row: 610
Location from spacy: Fredericton
Coordinates from geopy: [45.966425, -66.645813]
Row: 611
Location from spacy: New-Brunswick
Coordinates from geopy: [46.500283, -66.750183]
Row: 612
Location from spacy: 
Row: 613
Location from spacy: N.S.
Coordinates from geopy: [24.4349856, 105.5550331]
Row: 614
Location from spacy: Ontario
Coordinates from geopy: [50.000678, -86.000977]
Row: 615
Location from spacy: N.B.
Coordinates from geopy: [39.830767, -84.923802]
Row: 616
Location from spacy: Ottawa
Coordinates from geopy: [45.421106, -75.690308]
Row: 617
Location from spacy: Ottawa
Coordinates from geopy: [45.421106, -75.690308]
Row: 618
Location from spacy: Ottawa
Coordinates from geopy: [45.421106, -75.690308]
Row: 619
Location from spacy: Ontario
Coordinates from geopy: [50.000678, -86.000977]
Row: 620
Location from spacy: Ottawa
Coordinat

Coordinates from geopy: [-10.3333333, -53.2]
Row: 709
Location from spacy: Turkmenistan
Coordinates from geopy: [39.3763807, 59.3924609]
Row: 710
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 711
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 712
Location from spacy: Trikafta
Error with .
Row: 713
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 714
Location from spacy: Calgary
Coordinates from geopy: [51.0534234, -114.0625892]
Row: 715
Location from spacy: Barrie
Coordinates from geopy: [44.3893113, -79.6901736]
Row: 716
Location from spacy: Thornhill
Coordinates from geopy: [38.2884032, -85.6257945]
Row: 717
Location from spacy: 
Row: 718
Location from spacy: 
Row: 719
Location from spacy: 
Row: 720
Location from spacy: Alberta
Coordinates from geopy: [55.001251, -115.002136]
Row: 721
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 722
Locati

Coordinates from geopy: [43.6534817, -79.3839347]
Row: 813
Location from spacy: Montreal
Coordinates from geopy: [45.4972159, -73.6103642]
Row: 814
Location from spacy: Matheson
Coordinates from geopy: [-29.7520295, 151.4991178]
Row: 815
Location from spacy: 
Row: 816
Location from spacy: 
Row: 817
Location from spacy: Boston
Coordinates from geopy: [42.3602534, -71.0582912]
Row: 818
Location from spacy: Miami Heat
Coordinates from geopy: [51.38419565, -2.3601276511123825]
Row: 819
Location from spacy: Boatloadofnerve
Error with .
Row: 820
Location from spacy: California
Coordinates from geopy: [36.7014631, -118.7559974]
Row: 821
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 822
Location from spacy: Bronco
Coordinates from geopy: [39.3862969, -120.0226966]
Row: 823
Location from spacy: F-450
Coordinates from geopy: [24.0856424, 52.6466381]
Row: 824
Location from spacy: Stang
Coordinates from geopy: [48.5714925, -4.1032139]
Row: 825
Location from sp

Coordinates from geopy: [50.000678, -86.000977]
Row: 915
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 916
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 917
Location from spacy: Brampton
Coordinates from geopy: [43.6858146, -79.7599337]
Row: 918
Location from spacy: Johnston
Coordinates from geopy: [34.31888, -96.6752298]
Row: 919
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 920
Location from spacy: Oshawa
Coordinates from geopy: [43.91768, -78.862953]
Row: 921
Location from spacy: Ontario
Coordinates from geopy: [50.000678, -86.000977]
Row: 922
Location from spacy: the City of Toronto
Coordinates from geopy: [43.7170226, -79.41978303501344]
Row: 923
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 924
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 925
Location from spacy: Ontario
Coordinates from geop

Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1018
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1019
Location from spacy: Priya
Coordinates from geopy: [43.1982425, -5.4865067]
Row: 1020
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 1021
Location from spacy: 
Row: 1022
Location from spacy: New Brunswick
Coordinates from geopy: [46.500283, -66.750183]
Row: 1023
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 1024
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 1025
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1026
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1027
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1028
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1029
Location from spacy: Ca

Coordinates from geopy: [42.6384261, 12.674297]
Row: 1118
Location from spacy: Hollywood
Coordinates from geopy: [34.0980031, -118.3295232]
Row: 1119
Location from spacy: Los Angeles
Coordinates from geopy: [34.0536909, -118.2427666]
Row: 1120
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 1121
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 1122
Location from spacy: Lebanon
Coordinates from geopy: [33.8750629, 35.843409]
Row: 1123
Location from spacy: Quebec
Coordinates from geopy: [52.4760892, -71.8258668]
Row: 1124
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1125
Location from spacy: America
Coordinates from geopy: [51.44770365, 5.966069282055592]
Row: 1126
Location from spacy: NEW YORK
Coordinates from geopy: [40.7127281, -74.0060152]
Row: 1127
Location from spacy: Brittany
Coordinates from geopy: [48.16282835, -2.684147344521837]
Row: 1128
Location from spacy: Toronto
Coo

Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1213
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1214
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1215
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1216
Location from spacy: Spain
Coordinates from geopy: [39.3262345, -4.8380649]
Row: 1217
Location from spacy: Naples
Coordinates from geopy: [40.8359336, 14.2487826]
Row: 1218
Location from spacy: Orlando
Coordinates from geopy: [28.5421109, -81.3790304]
Row: 1219
Location from spacy: Florida
Coordinates from geopy: [27.7567667, -81.4639835]
Row: 1220
Location from spacy: Florida
Coordinates from geopy: [27.7567667, -81.4639835]
Row: 1221
Location from spacy: Florida
Coordinates from geopy: [27.7567667, -81.4639835]
Row: 1222
Location from spacy: New Orleans
Coordinates from geopy: [29.9499323, -90.0701156]
Row: 1223
Location from spacy: Toronto
Coordinates from geopy

Coordinates from geopy: [55.001251, -125.002441]
Row: 1309
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1310
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1311
Location from spacy: Surrey, British Columbia
Coordinates from geopy: [49.1913033, -122.8491439]
Row: 1312
Location from spacy: Quebec
Coordinates from geopy: [52.4760892, -71.8258668]
Row: 1313
Location from spacy: Abbotsford
Coordinates from geopy: [49.0521162, -122.3294792]
Row: 1314
Location from spacy: Beverly Hills
Coordinates from geopy: [34.0696501, -118.3963062]
Row: 1315
Location from spacy: Abbotsford
Coordinates from geopy: [49.0521162, -122.3294792]
Row: 1316
Location from spacy: Cowichan
Coordinates from geopy: [48.8359655, -124.1502799]
Row: 1317
Location from spacy: Windermere
Coordinates from geopy: [54.3794828, -2.9061994]
Row: 1318
Location from spacy: Leptospirosis
Error with .
Row: 1319
Location from spacy: 
Row: 132

Coordinates from geopy: [13.5066394, 104.869423]
Row: 1404
Location from spacy: Payton
Coordinates from geopy: [30.113819, -98.3052944]
Row: 1405
Location from spacy: MyCanadaPayday.com
Error with .
Row: 1406
Location from spacy: Shuswap
Coordinates from geopy: [50.538892, -116.0081852]
Row: 1407
Location from spacy: 
Row: 1408
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1409
Location from spacy: Nakusp
Coordinates from geopy: [50.2396111, -117.7974031]
Row: 1410
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1411
Location from spacy: Brogan
Coordinates from geopy: [44.2462705, -117.5171733]
Row: 1412
Location from spacy: Benito
Coordinates from geopy: [36.8216159, -120.4362833]
Row: 1413
Location from spacy: Lone Butte
Coordinates from geopy: [47.7327188, -113.2270106]
Row: 1414
Location from spacy: Abbotsford
Coordinates from geopy: [49.0521162, -122.3294792]
Row: 1415
Location from spacy: On

Coordinates from geopy: [49.0521162, -122.3294792]
Row: 1508
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1509
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 1510
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1511
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1512
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 1513
Location from spacy: Vezina
Coordinates from geopy: [46.8755026, -71.0317576]
Row: 1514
Location from spacy: the City of Edmonton
Coordinates from geopy: [53.510978949999995, -113.5556685469677]
Row: 1515
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1516
Location from spacy: Chamberlain
Coordinates from geopy: [43.810817, -99.330178]
Row: 1517
Location from spacy: Salaysay
Coordinates from geopy: [7.2657627, 125.2910812]
Row: 1518
Location

Coordinates from geopy: [49.2842958, -122.793281]
Row: 1619
Location from spacy: 
Row: 1620
Location from spacy: 
Row: 1621
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 1622
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1623
Location from spacy: bubblegum
Coordinates from geopy: [48.4623618, 35.0356682]
Row: 1624
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1625
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 1626
Location from spacy: 
Row: 1627
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1628
Location from spacy: Surrey, British Columbia
Coordinates from geopy: [49.1913033, -122.8491439]
Row: 1629
Location from spacy: 
Row: 1630
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1631
Location from spacy: Coquitlam
Coordinates from geopy: [49.2

Coordinates from geopy: [49.2608724, -123.1139529]
Row: 1725
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1726
Location from spacy: 
Row: 1727
Location from spacy: 
Row: 1728
Location from spacy: California
Coordinates from geopy: [36.7014631, -118.7559974]
Row: 1729
Location from spacy: 
Row: 1730
Location from spacy: 
Row: 1731
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 1732
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 1733
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1734
Location from spacy: 
Row: 1735
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1736
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1737
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 1738
Location from spacy: New Westminst

Coordinates from geopy: [64.9841821, -18.1059013]
Row: 1842
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1843
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1844
Location from spacy: 
Row: 1845
Location from spacy: 
Row: 1846
Location from spacy: Dinakis
Error with .
Row: 1847
Location from spacy: insomnia
Coordinates from geopy: [52.9924536, -6.9860145]
Row: 1848
Location from spacy: Maple Ridge
Coordinates from geopy: [49.2197345, -122.5997078]
Row: 1849
Location from spacy: Netherlands
Coordinates from geopy: [52.5001698, 5.7480821]
Row: 1850
Location from spacy: 
Row: 1851
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1852
Location from spacy: 
Row: 1853
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1854
Location from spacy: Hong Kong
Coordinates from geopy: [22.2793278, 114.1628131]
Row: 1855
Location from spacy: New 

Coordinates from geopy: [55.001251, -125.002441]
Row: 1943
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 1944
Location from spacy: Montreal
Coordinates from geopy: [45.4972159, -73.6103642]
Row: 1945
Location from spacy: Vancouver Island
Coordinates from geopy: [49.592949450000006, -125.70255696124094]
Row: 1946
Location from spacy: Ottawa
Coordinates from geopy: [45.421106, -75.690308]
Row: 1947
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 1948
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 1949
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 1950
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 1951
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 1952
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 1953
Location from spacy: B

Coordinates from geopy: [39.7837304, -100.4458825]
Row: 2041
Location from spacy: Langley City
Coordinates from geopy: [48.3604648, 6.3258262]
Row: 2042
Location from spacy: Langley
Coordinates from geopy: [48.3604648, 6.3258262]
Row: 2043
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2044
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2045
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 2046
Location from spacy: Forest Hills
Coordinates from geopy: [38.2153476, -85.5857931]
Row: 2047
Location from spacy: lodge
Coordinates from geopy: [-35.310735449999996, 149.1165501029762]
Row: 2048
Location from spacy: Fort Langley
Coordinates from geopy: [49.169126, -122.5793628]
Row: 2049
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2050
Location from spacy: 
Row: 2051
Location from spacy: The Township of Langley
Coordinates from geopy: [49.106

Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2136
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2137
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2138
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2139
Location from spacy: Alaska
Coordinates from geopy: [64.4459613, -149.680909]
Row: 2140
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2141
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2142
Location from spacy: England
Coordinates from geopy: [52.7954791, -0.5402402866174321]
Row: 2143
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2144
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2145
Location from spacy: us
Coordinates from geopy: [39.7837304, -100.44588

Coordinates from geopy: [49.157677, -121.9514307]
Row: 2240
Location from spacy: Abbotsford
Coordinates from geopy: [49.0521162, -122.3294792]
Row: 2241
Location from spacy: Harrison
Coordinates from geopy: [32.5233959, -94.3982158]
Row: 2242
Location from spacy: Tickets
Coordinates from geopy: [41.009661, -96.3026869]
Row: 2243
Location from spacy: Nathan
Coordinates from geopy: [31.3355141, 77.3448908]
Row: 2244
Location from spacy: Surrey, British Columbia
Coordinates from geopy: [49.1913033, -122.8491439]
Row: 2245
Location from spacy: Surrey, British Columbia
Coordinates from geopy: [49.1913033, -122.8491439]
Row: 2246
Location from spacy: Whalley
Coordinates from geopy: [53.8244874, -2.4118943]
Row: 2247
Location from spacy: Surrey, British Columbia
Coordinates from geopy: [49.1913033, -122.8491439]
Row: 2248
Location from spacy: Canoe
Coordinates from geopy: [38.9640466, -7.7626565]
Row: 2249
Location from spacy: Bitcoin
Coordinates from geopy: [52.3800847, 16.984005]
Row: 2250


Coordinates from geopy: [38.7251776, -105.6077167]
Row: 2335
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2336
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2337
Location from spacy: MyCanadaPayday.com
Error with .
Row: 2338
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 2339
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2340
Location from spacy: Cinematheque
Coordinates from geopy: [32.07074595, 34.78343128078966]
Row: 2341
Location from spacy: Australia
Coordinates from geopy: [-24.7761086, 134.755]
Row: 2342
Location from spacy: Masha
Coordinates from geopy: [56.80571865, 124.8745901832994]
Row: 2343
Location from spacy: Sri Lanka
Coordinates from geopy: [7.5554942, 80.7137847]
Row: 2344
Location from spacy: Tel Aviv On Fire
Error with .
Row: 2345
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724,

Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2434
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2435
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2436
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2437
Location from spacy: Garas
Coordinates from geopy: [10.182912, 45.376466]
Row: 2438
Location from spacy: 
Row: 2439
Location from spacy: 
Row: 2440
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2441
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2442
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2443
Location from spacy: Ottawa
Coordinates from geopy: [45.421106, -75.690308]
Row: 2444
Location from spacy: 
Row: 2445
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2446
Location from spacy: Canada
C

Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2533
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2534
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2535
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2536
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2537
Location from spacy: Taiwan
Coordinates from geopy: [23.59829785, 120.83536313817521]
Row: 2538
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2539
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2540
Location from spacy: 
Row: 2541
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2542
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2543
Location from spacy: Egypt
Coordinates from geopy: [26.2540493, 29.267546

Coordinates from geopy: [35.65586605, -105.98836165478004]
Row: 2641
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2642
Location from spacy: 
Row: 2643
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2644
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2645
Location from spacy: the City of Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2646
Location from spacy: West Georgia
Coordinates from geopy: [44.7025462, -73.1540233]
Row: 2647
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2648
Location from spacy: Shaughnessy
Coordinates from geopy: [49.8537374, -112.8417925]
Row: 2649
Location from spacy: Patel
Coordinates from geopy: [13.9142102, -0.2430971]
Row: 2650
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2651
Location from spacy: East Hastings
Coordinates from geop

Coordinates from geopy: [76.1830702, -26.5073332]
Row: 2737
Location from spacy: avocado
Coordinates from geopy: [36.789394, -119.404015]
Row: 2738
Location from spacy: Heywood Gravel Field
Error with .
Row: 2739
Location from spacy: Shipyards
Coordinates from geopy: [44.2246425, -76.48445213621795]
Row: 2740
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2741
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2742
Location from spacy: 
Row: 2743
Location from spacy: Saskatoon
Coordinates from geopy: [52.131802, -106.660767]
Row: 2744
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2745
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2746
Location from spacy: Alberta
Coordinates from geopy: [55.001251, -115.002136]
Row: 2747
Location from spacy: North Vancouver
Coordinates from geopy: [49.3207133, -123.0737831]
Row: 2748
Loc

Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2845
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2846
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2847
Location from spacy: North Vancouver
Coordinates from geopy: [49.3207133, -123.0737831]
Row: 2848
Location from spacy: 
Row: 2849
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2850
Location from spacy: 
Row: 2851
Location from spacy: West Vancouver
Coordinates from geopy: [49.3311208, -123.1594791]
Row: 2852
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2853
Location from spacy: 
Row: 2854
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2855
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2856
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Ro

Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2950
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 2951
Location from spacy: West Vancouver
Coordinates from geopy: [49.3311208, -123.1594791]
Row: 2952
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 2953
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 2954
Location from spacy: Dundarave
Coordinates from geopy: [49.3345423, -123.1808065]
Row: 2955
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 2956
Location from spacy: Instagram
Coordinates from geopy: [22.2890784, 114.1346515]
Row: 2957
Location from spacy: 
Row: 2958
Location from spacy: North Vancouver
Coordinates from geopy: [49.3207133, -123.0737831]
Row: 2959
Location from spacy: Instagram
Coordinates from geopy: [22.2890784, 114.1346515]
Row: 2960
Location from spacy: 
Row: 2961
Location from spacy: North Shore
Coordi

Coordinates from geopy: [37.5385087, -77.43428]
Row: 3056
Location from spacy: Liao
Coordinates from geopy: [45.7578137, 4.8320114]
Row: 3057
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3058
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3059
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3060
Location from spacy: Montreal
Coordinates from geopy: [45.4972159, -73.6103642]
Row: 3061
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3062
Location from spacy: Alaska
Coordinates from geopy: [64.4459613, -149.680909]
Row: 3063
Location from spacy: 
Row: 3064
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3065
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3066
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3067
Location from spacy: Ri

Coordinates from geopy: [37.5385087, -77.43428]
Row: 3159
Location from spacy: 
Row: 3160
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3161
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3162
Location from spacy: 
Row: 3163
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3164
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3165
Location from spacy: Avia
Coordinates from geopy: [42.0774337, 1.8221304]
Row: 3166
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3167
Location from spacy: Avia
Coordinates from geopy: [42.0774337, 1.8221304]
Row: 3168
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3169
Location from spacy: Steveston
Coordinates from geopy: [49.1333276, -123.1833306]
Row: 3170
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]

Coordinates from geopy: [37.5385087, -77.43428]
Row: 3259
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3260
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3261
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3262
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3263
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3264
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3265
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 3266
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3267
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3268
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3269
Location from spacy: Richmond
Coordinates from g

Coordinates from geopy: [23.59829785, 120.83536313817521]
Row: 3359
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3360
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 3361
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 3362
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3363
Location from spacy: hollowaypainting.ca
Error with .
Row: 3364
Location from spacy: Richmond
Coordinates from geopy: [37.5385087, -77.43428]
Row: 3365
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3366
Location from spacy: VITA
Coordinates from geopy: [40.8113662, -5.0053801]
Row: 3367
Location from spacy: 
Row: 3368
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3369
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3370
Location from spacy: Burnaby
C

Coordinates from geopy: [61.0666922, -107.9917071]
Row: 3461
Location from spacy: New West
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3462
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3463
Location from spacy: New York
Coordinates from geopy: [40.7127281, -74.0060152]
Row: 3464
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3465
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 3466
Location from spacy: 
Row: 3467
Location from spacy: Surrey, British Columbia
Coordinates from geopy: [49.1913033, -122.8491439]
Row: 3468
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3469
Location from spacy: US
Coordinates from geopy: [39.7837304, -100.4458825]
Row: 3470
Location from spacy: 
Row: 3471
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3472
Location from spacy: Philippines
Coordinates fro

Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3560
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3561
Location from spacy: https://burnabyartscouncil.org/news/creativity-chronicles-art-in-isolation/
Error with .
Row: 3562
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3563
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3564
Location from spacy: 
Row: 3565
Location from spacy: 
Row: 3566
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3567
Location from spacy: 
Row: 3568
Location from spacy: 
Row: 3569
Location from spacy: 
Row: 3570
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3571
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3572
Location from spacy: 
Row: 3573
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 35

Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 3674
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3675
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3676
Location from spacy: East Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3677
Location from spacy: Burnaby Heights
Coordinates from geopy: [49.2867503, -123.0146338]
Row: 3678
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3679
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3680
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3681
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3682
Location from spacy: Delta
Coordinates from geopy: [33.3926893, -95.6749486]
Row: 3683
Location from spacy: British Columbia
Coordinates from geop

Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3772
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3773
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 3774
Location from spacy: Strata
Coordinates from geopy: [51.4929249, -0.09951761876811618]
Row: 3775
Location from spacy: Abramskey
Error with .
Row: 3776
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3777
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 3778
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3779
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3780
Location from spacy: 
Row: 3781
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3782
Location from spacy: New West
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3783
Location 

Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3866
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3867
Location from spacy: Uptown
Coordinates from geopy: [29.9364059, -90.101181]
Row: 3868
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 3869
Location from spacy: mem.com
Coordinates from geopy: [38.78940245, -9.352990923077778]
Row: 3870
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3871
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3872
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3873
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 3874
Location from spacy: 
Row: 3875
Location from spacy: Sapperton
Coordinates from geopy: [49.224545, -122.8895213]
Row: 3876
Location from spacy: British Columbia
Coordinates from geop

Coordinates from geopy: [49.12120935, -56.69629621274099]
Row: 3963
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3964
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 3965
Location from spacy: Burnaby
Coordinates from geopy: [49.2433804, -122.9725459]
Row: 3966
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3967
Location from spacy: Coquitlam
Coordinates from geopy: [49.2842958, -122.793281]
Row: 3968
Location from spacy: Sapperton
Coordinates from geopy: [49.224545, -122.8895213]
Row: 3969
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3970
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 3971
Location from spacy: New Westminster
Coordinates from geopy: [49.2067726, -122.9108818]
Row: 3972
Location from spacy: New Westminster-Burnaby
Coordinates from geopy: [49.2312928, -1

Coordinates from geopy: [-41.5000831, 172.8344077]
Row: 4072
Location from spacy: BC
Coordinates from geopy: [55.001251, -125.002441]
Row: 4073
Location from spacy: 
Row: 4074
Location from spacy: Bastin
Coordinates from geopy: [48.5608587, 18.236212]
Row: 4075
Location from spacy: Vancouver
Coordinates from geopy: [49.2608724, -123.1139529]
Row: 4076
Location from spacy: 
Row: 4077
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 4078
Location from spacy: Bronwyn
Coordinates from geopy: [-33.9406024, 25.486865]
Row: 4079
Location from spacy: Halti
Coordinates from geopy: [69.3080614, 21.263321]
Row: 4080
Location from spacy: 
Row: 4081
Location from spacy: turkey
Coordinates from geopy: [38.9597594, 34.9249653]
Row: 4082
Location from spacy: 
Row: 4083
Location from spacy: British Columbia
Coordinates from geopy: [55.001251, -125.002441]
Row: 4084
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 4085
Location from s

Coordinates from geopy: [55.001251, -115.002136]
Row: 4172
Location from spacy: Daadab
Coordinates from geopy: [-0.7732949, 38.357783]
Row: 4173
Location from spacy: 
Row: 4174
Location from spacy: Nova Scotia
Coordinates from geopy: [45.1960403, -63.1653789]
Row: 4175
Location from spacy: Dorton
Coordinates from geopy: [37.2767726, -82.5790436]
Row: 4176
Location from spacy: New Brunswick
Coordinates from geopy: [46.500283, -66.750183]
Row: 4177
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 4178
Location from spacy: Toronto
Coordinates from geopy: [43.6534817, -79.3839347]
Row: 4179
Location from spacy: Ontario
Coordinates from geopy: [50.000678, -86.000977]
Row: 4180
Location from spacy: London
Coordinates from geopy: [51.5073219, -0.1276474]
Row: 4181
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.9917071]
Row: 4182
Location from spacy: 
Row: 4183
Location from spacy: Canada
Coordinates from geopy: [61.0666922, -107.991707

Coordinates from geopy: [22.2793278, 114.1628131]
Row: 4272
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 4273
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 4274
Location from spacy: Los Angeles
Coordinates from geopy: [34.0536909, -118.2427666]
Row: 4275
Location from spacy: St. Louis
Coordinates from geopy: [38.6268039, -90.1994097]
Row: 4276
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 4277
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 4278
Location from spacy: Mississippi
Coordinates from geopy: [32.9715645, -89.7348497]
Row: 4279
Location from spacy: Michigan
Coordinates from geopy: [43.6211955, -84.6824346]
Row: 4280
Location from spacy: Alaska
Coordinates from geopy: [64.4459613, -149.680909]
Row: 4281
Location from spacy: U.S.
Coordinates from geopy: [45.1895845, -88.73215845]
Row: 4282
Location from spacy: Rayong
Coordinates from geop

### Exploring dataset with locations (new_clean.csv)

In [102]:
count_outlet

684

In [103]:
count_spacy

3585

In [104]:
new_clean.head()

Unnamed: 0,outlet,url,title,authors,publish_date,text,keywords,location,lat,long
0,cbc,https://www.cbc.ca/news/covid-19,CBC News | Information about COVID-19 in Canada,'Darren Bernhardt',,Sylvan Lake to clamp down on beach crowds with...,[],Alberta,55.001251,-115.002136
1,cbc,https://www.cbc.ca/news/local,CBC News,'Andrew Kurjata',,What you need to know about COVID-19 in Ottawa...,[],Ottawa,45.421106,-75.690308
2,cbc,https://www.cbc.ca/news/politics,Politics,'John Paul Tasker',,WE Charity contract could have been worth up t...,[],Toronto,45.518055,-73.55093
3,cbc,https://www.cbc.ca/news/indigenous,Indigenous,'Jessica Deer',,Manitoba judge points to systemic issues in In...,[],Manitoba,55.001251,-97.001038
4,cbc,https://www.cbc.ca/news/business,Business,'Pete Evans',,Twitter says about 130 accounts were targeted ...,[],Toronto,45.518055,-73.55093


In [105]:
new_clean['location'].value_counts()

Vancouver              417
British Columbia       319
Toronto                285
Burnaby                284
Canada                 283
                      ... 
Hydra Perfection         1
C.Dir                    1
hollowaypainting.ca      1
Whonnock                 1
Liberty Village and      1
Name: location, Length: 896, dtype: int64

**Notes:**
- Change "Surrey" to "Surrey, British Columbia"
- Change "B.C." to "British Columbia"

## Exploring geo.csv (locations of outlets) 

In [2]:
geo = pd.read_csv('geo.csv')

In [3]:
clean.columns

NameError: name 'clean' is not defined

In [4]:
geo.columns = ['outlet', 'url', 'title', 'authors', 'publish_date', 'text', 'keywords', 'scope', 'location', 'latitude', 'longitude']

In [5]:
geo.head()

Unnamed: 0,outlet,url,title,authors,publish_date,text,keywords,scope,location,latitude,longitude
0,ctvnews,https://www.ctvnews.ca/health/coronavirus/trac...,Tracking every case of COVID-19 in Canada,,2020-03-13 14:17:00-04:00,July 17 – Ontario health officials recorded 11...,[],national,Ottawa,-75.697472,45.421861
1,ctvnews,https://www.ctvnews.ca/politics/liberals-revis...,"Liberals revise COVID-19 wage subsidy, ease el...",,2020-07-17 10:22:00-04:00,OTTAWA -- Finance Minister Bill Morneau says t...,[],national,Ottawa,-75.697472,45.421861
2,ctvnews,https://www.ctvnews.ca/health/coronavirus/new-...,New Normal: Casinos betting on temperature che...,,2020-07-17 12:03:00-04:00,TORONTO -- Canadian gamblers are trading in ca...,[],national,Ottawa,-75.697472,45.421861
3,ctvnews,https://www.ctvnews.ca/health/coronavirus/it-s...,'It's amazing': Toronto man recovering after 1...,,2020-07-15 22:00:00-04:00,"TORONTO -- For months, Bruno Iozzo’s family wa...",[],national,Ottawa,-75.697472,45.421861
4,ctvnews,https://www.ctvnews.ca/health/coronavirus/ariz...,Arizona man in a coma from coronavirus wakes u...,'Madeline Holcombe',2020-07-16 08:15:00-04:00,"For Eddie Case, recovering from COVID-19 comes...",[],national,Ottawa,-75.697472,45.421861


### Creates dictionaries from scope and location of each outlet

In [6]:
group = geo.groupby('outlet')

In [7]:
# Not all 

scope_dict = group.apply(lambda x: x['scope'].unique()).apply(pd.Series).loc[:, 0].to_dict()

In [28]:
loc_dict = group.apply(lambda x: x['location'].unique()).apply(pd.Series).loc[:, 0].to_dict()

In [29]:
lat_dict = group.apply(lambda x: x['latitude'].unique()).apply(pd.Series).loc[:, 0].to_dict()

In [30]:
long_dict = group.apply(lambda x: x['longitude'].unique()).apply(pd.Series).loc[:, 0].to_dict()

In [31]:
loc_dict['cbc'] = 'Toronto'
loc_dict['tricitynews'] = 'Port Coquitlam'
loc_dict['vancouverobserver'] = 'Vancouver'
loc_dict['vancourier'] = 'Vancouver'
loc_dict['richmond-news'] = 'Richmond'
loc_dict['burnabynow'] = 'Burnaby'
loc_dict['newwestrecord'] = 'Burnaby'
loc_dict['bowenislandundercurrent'] = 'Bowen Island'

In [32]:
lat_dict['cbc'] = 45.518055
lat_dict['tricitynews'] = 49.247012
lat_dict['vancouverobserver'] = -123.10096000000001
lat_dict['vancourier'] = 49.266922
lat_dict['richmond-news'] = 49.174152
lat_dict['burnabynow'] = 49.252834
lat_dict['newwestrecord'] = 49.252857
lat_dict['bowenislandundercurrent'] = 49.379632

In [33]:
long_dict['cbc'] = -73.550930
long_dict['tricitynews'] = -122.760220
long_dict['vancouverobserver'] = 49.318767
long_dict['vancourier'] = -123.110863
long_dict['richmond-news'] = -123.119285
long_dict['burnabynow'] = -122.917969
long_dict['newwestrecord'] = -122.916369
long_dict['bowenislandundercurrent'] = -123.338253

In [34]:
out_lat = pd.DataFrame(lat_dict.items(), columns=['outlet', 'lat'])
out_long = pd.DataFrame(long_dict.items(), columns=['outlet', 'long'])

In [37]:
out_location = pd.DataFrame(loc_dict.items(), columns=['outlet', 'location'])
out_location = pd.merge(out_location, out_lat, how='inner', on=['outlet'])
out_location = pd.merge(out_location, out_long, how='inner', on=['outlet'])

In [39]:
out_location.to_csv('outlet_locations.csv')

### Exploring geopy API for coordinates

In [29]:
from geopy.geocoders import Nominatim

gl = Nominatim(user_agent='newsworthy_ml')

In [35]:
loc = gl.geocode('Kanye West')

In [36]:
loc.address

'Kanye, Southern District, PLOT, Botswana'

In [37]:
loc.latitude

-24.9766112

In [38]:
loc.longitude

25.3358859

## Combining with topic modelled articles

In [106]:
final = pd.read_csv('complete.csv')

In [107]:
final = final.drop(columns='Unnamed: 0')

In [108]:
new_clean2 = new_clean

In [109]:
final.columns

Index(['url', 'num_articles_per_topic', 'topic_resid', 'category',
       'category_probability', 'topic_num', 'topics', 'outlet', 'title',
       'authors', 'publish_date', 'text', 'keywords', 'word_count',
       'article_resid'],
      dtype='object')

In [110]:
new_clean2.columns

Index(['outlet', 'url', 'title', 'authors', 'publish_date', 'text', 'keywords',
       'location', 'lat', 'long'],
      dtype='object')

In [111]:
new_clean2[['url', 'location', 'lat', 'long']]

Unnamed: 0,url,location,lat,long
0,https://www.cbc.ca/news/covid-19,Alberta,55.001251,-115.002136
1,https://www.cbc.ca/news/local,Ottawa,45.421106,-75.690308
2,https://www.cbc.ca/news/politics,Toronto,45.518055,-73.550930
3,https://www.cbc.ca/news/indigenous,Manitoba,55.001251,-97.001038
4,https://www.cbc.ca/news/business,Toronto,45.518055,-73.550930
...,...,...,...,...
4323,https://www.therecord.com/opinion/columnists/2...,Toronto,43.653482,-79.383935
4324,https://www.therecord.com/opinion/columnists/2...,U.S.,45.189585,-88.732158
4325,https://www.therecord.com/opinion/contributors...,America,51.447704,5.966069
4326,https://www.therecord.com/opinion/contributors...,Ottawa,45.421106,-75.690308


In [112]:
new_clean2 = new_clean2[['url', 'location', 'lat', 'long']]

In [113]:
final_df = pd.merge(final, new_clean2, how='inner', on=['url'])

In [114]:
final_df.head()

Unnamed: 0,url,num_articles_per_topic,topic_resid,category,category_probability,topic_num,topics,outlet,title,authors,publish_date,text,keywords,word_count,article_resid,location,lat,long
0,https://www.cbc.ca/parents/learning/view/covid...,222,0.959933,lifestyle,86.343329,0,famili just time peopl year home life day,cbc,I Don’t Know If My Kids Will Ever See Their Gr...,'Jennifer Power Scott',2020-07-07,On a sticky New Brunswick afternoon — the kind...,[],1120,0.932753,Newfoundland,49.121209,-56.696296
1,https://www.thestar.com/life/health_wellness/2...,222,0.959933,lifestyle,86.343329,0,famili just time peopl year home life day,thestar,Why your workout may not be helping you lose w...,'Christine Sismondo',2020-01-06,This is the first in a week-long series on die...,[],954,0.968334,Toronto,43.653482,-79.383935
2,https://www.thestar.com/life/health_wellness/2...,222,0.959933,lifestyle,86.343329,0,famili just time peopl year home life day,thestar,She lost 100 pounds and became an Instagram se...,'Jen Kirsch',2020-01-09,This story is part of a week-long series on di...,[],1131,0.917396,Landry,45.57,6.73991
3,https://www.thestar.com/life/relationships/adv...,222,0.959933,lifestyle,86.343329,0,famili just time peopl year home life day,thestar,How do I stop my adult children from being so ...,'Ellieadvice Columnisttue.',2020-07-14,"Q: I’m a male, 45, doing well professionally, ...",[],600,0.928283,Toronto,-79.540139,43.779361
4,https://www.ctvnews.ca/entertainment/magic-sch...,222,0.959933,lifestyle,86.343329,0,famili just time peopl year home life day,ctvnews,'Magic School Bus' author Joanna Cole dies at ...,,2020-07-16 07:32:00-04:00,"NEW YORK -- Author Joanna Cole, whose ""Magic S...",[],374,0.982396,Sioux City,42.496682,-96.405878


In [115]:
final_df.shape

(1786, 18)

In [116]:
final_df.to_csv('final_df.csv')