# Learning to use spacy with their amazing tutorial on https://course.spacy.io/chapter1

In [4]:
#importing a json file
import json

with open('nytimes_articles.json') as json_file:
    nytimes_articles = json.load(json_file)

In [5]:
# Import the English language class
from spacy.lang.en import English

# Create the nlp object
nlp = English()

In [6]:
# Created by processing a string of text with the nlp object
doc = nlp("Hello world!")

# Iterate over tokens in a Doc
for token in doc:
    print(token.text)

Hello
world
!


In [7]:
doc = nlp("Hello world!")

# Index into the Doc to get a single Token
token = doc[1]

# Get the token text via the .text attribute
print(token.text)

world


In [8]:
doc = nlp("Hello world!")

# A slice from the Doc is a Span object
span = doc[1:3]

# Get the span text via the .text attribute
print(span.text)

world!


In [9]:
doc = nlp("It costs $5.")
print('Index:   ', [token.i for token in doc])
print('Text:    ', [token.text for token in doc])

print('is_alpha:', [token.is_alpha for token in doc])
print('is_punct:', [token.is_punct for token in doc])
print('like_num:', [token.like_num for token in doc])

Index:    [0, 1, 2, 3, 4]
Text:     ['It', 'costs', '$', '5', '.']
is_alpha: [True, True, False, False, False]
is_punct: [False, False, False, False, True]
like_num: [False, False, False, True, False]


In [10]:
# Import the English language class
from spacy.lang.en import English

# Create the nlp object
nlp = English()

# Process a text
doc = nlp("This is a sentence.")

# Print the document text
print(doc.text)

This is a sentence.


In [None]:
from spacy.lang.en import English

nlp = English()

# Process the text
doc = nlp("I like tree kangaroos and narwhals.")

# Select the first token
first_token = doc[0]

# Print the first token's text
print(first_token.text)

In [None]:
# Import the English language class and create the nlp object
from spacy.lang.en import English

nlp = English()

# Process the text
doc = nlp("I like tree kangaroos and narwhals.")

# A slice of the Doc for "tree kangaroos"
tree_kangaroos = doc[2:4]
print(tree_kangaroos.text)

# A slice of the Doc for "tree kangaroos and narwhals" (without the ".")
tree_kangaroos_and_narwhals = doc[2:6]
print(tree_kangaroos_and_narwhals.text)

In [None]:
# Process the text
doc = nlp(
    "In 1990, more than 60% of people in East Asia were in extreme poverty. "
    "Now less than 4% are."
)

# Iterate over the tokens in the doc
for token in doc:
    # Check if the token resembles a number
    if token.like_num:
        # Get the next token in the document
        next_token = doc[token.i + 1]
        # Check if the next token's text equals '%'
        if next_token.text == "%":
            print("Percentage found:", token.text)

In [None]:
Match exact token texts

[{'TEXT': 'iPhone'}, {'TEXT': 'X'}]
Match lexical attributes
[{'LOWER': 'iphone'}, {'LOWER': 'x'}]
Match any token attributes
[{'LEMMA': 'buy'}, {'POS': 'NOUN'}]

In [None]:
import spacy
# Import the Matcher
from spacy.matcher import Matcher

# Load a model and create the nlp object
nlp = spacy.load('en_core_web_sm')

# Initialize the matcher with the shared vocab
matcher = Matcher(nlp.vocab)

# Add the pattern to the matcher
pattern = [{'TEXT': 'iPhone'}, {'TEXT': 'X'}]
matcher.add('IPHONE_PATTERN', None, pattern)

# Process some text
doc = nlp("New iPhone X release date leaked")

# Call the matcher on the doc
matches = matcher(doc)

# Iterate over the matches
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

In [None]:
pattern = [
    {'IS_DIGIT': True},
    {'LOWER': 'fifa'},
    {'LOWER': 'world'},
    {'LOWER': 'cup'},
    {'IS_PUNCT': True}
]
doc = nlp("2018 FIFA World Cup: France won!")

In [None]:
matcher.add('IPHONE_PATTERN', None, pattern)
matches = matcher(doc)
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

In [None]:
pattern = [
    {'LEMMA': 'love', 'POS': 'VERB'},
    {'POS': 'NOUN'}
]
doc = nlp("I loved dogs but now I love cats more.")
matcher.add('IPHONE_PATTERN', None, pattern)
matches = matcher(doc)
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

In [None]:
#the
pattern = [
    {'LEMMA': 'buy'},
    {'POS': 'DET', 'OP': '?'},  # optional: match 0 or 1 times
    {'POS': 'NOUN'}
]
doc = nlp("I bought a car")
matcher.add('IPHONE_PATTERN', None, pattern)
matches = matcher(doc)
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

In [316]:

import spacy

# Import the Matcher
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
doc = nlp("New release date iphone X leaked as Apple reveals pre-orders by mistake")

# Initialize the Matcher with the shared vocabulary
matcher = Matcher(nlp.vocab)

# Create a pattern matching two tokens: "iPhone" and "X"
pattern = [{"lower": "iphone"}, {"lower": "x"}]

# Add the pattern to the matcher
matcher.add("IPHONE_X_PATTERN", None, pattern)

# Use the matcher on the doc
matches = matcher(doc)
print("Matches:", [doc[start:end].text for match_id, start, end in matches])

Matches: ['iphone X']


In [None]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

doc = nlp(
    "After making the iOS update you won't notice a radical system-wide "
    "redesign: nothing like the aesthetic upheaval we got with iOS 7. Most of "
    "iOS 11's furniture remains the same as in iOS 10. But you will discover "
    "some tweaks once you delve a little deeper."
)

# Write a pattern for full iOS versions ("iOS 7", "iOS 11", "iOS 10")
pattern = [{"TEXT": "iOS"}, {"IS_DIGIT": True}]

# Add the pattern to the matcher and apply the matcher to the doc
matcher.add("IOS_VERSION_PATTERN", None, pattern)
matches = matcher(doc)
print("Total matches found:", len(matches))

# Iterate over the matches and print the span text
for match_id, start, end in matches:
    print("Match found:", doc[start:end].text)

In [None]:
matches

In [None]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

doc = nlp(
    "i downloaded Fortnite on my laptop and can't open the game at all. Help? "
    "so when I was downloading Minecraft, I got the Windows version where it "
    "is the '.zip' folder and I used the default program to unpack it... do "
    "I also need to download Winzip?"
)

# Write a pattern that matches a form of "download" plus proper noun
pattern = [{"LEMMA": "download"}, {"POS": "PROPN"}]

# Add the pattern to the matcher and apply the matcher to the doc
matcher.add("DOWNLOAD_THINGS_PATTERN", None, pattern)
matches = matcher(doc)
print("Total matches found:", len(matches))

# Iterate over the matches and print the span text
for match_id, start, end in matches:
    print("Match found:", doc[start:end].text)
    

In [3]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

doc = nlp(
    "Features of the app include a beautiful design, smart search, automatic "
    "labels and optional voice responses."
)

# Write a pattern for adjective plus one or two nouns
pattern = [{"POS": "ADJ"}, {"POS": "NOUN"}, {"POS": "NOUN", "OP": "?"}]

# Add the pattern to the matcher and apply the matcher to the doc
matcher.add("ADJ_NOUN_PATTERN", None, pattern)
matches = matcher(doc)
print("Total matches found:", len(matches))

# Iterate over the matches and print the span text
for match_id, start, end in matches:
    print("Match found:", doc[start:end].text)
    
    

Total matches found: 5
Match found: beautiful design
Match found: smart search
Match found: automatic labels
Match found: optional voice
Match found: optional voice responses


In [207]:
nytimes_articles.keys()

dict_keys(['2020/01/08 | https://www.nytimes.com/2020/01/08/health/china-pneumonia-outbreak-virus.html', '2020/01/10 | https://www.nytimes.com/2020/01/10/world/asia/china-virus-wuhan-death.html', '2020/01/15 | https://www.nytimes.com/2020/01/15/world/asia/coronavirus-japan-china.html', '2020/01/17 | https://www.nytimes.com/2020/01/17/health/china-coronavirus-airport-screening.html', 'interactiv | https://www.nytimes.com/interactive/2020/01/17/well/live/17healthquiz-01172020.html', '2020/01/18 | https://www.nytimes.com/2020/01/18/world/asia/china-virus-wuhan-coronavirus.html', '2020/01/20 | https://www.nytimes.com/2020/01/20/world/asia/coronavirus-china-symptoms.html', '2020/01/20 | https://www.nytimes.com/2020/01/20/briefing/virginia-davos-coco-gauff.html', '2020/01/20 | https://www.nytimes.com/2020/01/20/briefing/chinese-virus-boeing-australia-hail.html', 'article/wh | https://www.nytimes.com/article/what-is-coronavirus.html', 'interactiv | https://www.nytimes.com/interactive/2020/01/

In [208]:
blah='2020/01/08 | https://www.nytimes.com/2020/01/08/health/china-pneumonia-outbreak-virus.html'

In [211]:
blah[13:]

'https://www.nytimes.com/2020/01/08/health/china-pneumonia-outbreak-virus.html'

In [212]:
urls = []
for key in nytimes_articles.keys():
    urls.append(key[13:])

In [214]:
len(nytimes_articles)

1519

In [251]:
urls

['https://www.nytimes.com/2020/01/08/health/china-pneumonia-outbreak-virus.html',
 'https://www.nytimes.com/2020/01/10/world/asia/china-virus-wuhan-death.html',
 'https://www.nytimes.com/2020/01/15/world/asia/coronavirus-japan-china.html',
 'https://www.nytimes.com/2020/01/17/health/china-coronavirus-airport-screening.html',
 'https://www.nytimes.com/interactive/2020/01/17/well/live/17healthquiz-01172020.html',
 'https://www.nytimes.com/2020/01/18/world/asia/china-virus-wuhan-coronavirus.html',
 'https://www.nytimes.com/2020/01/20/world/asia/coronavirus-china-symptoms.html',
 'https://www.nytimes.com/2020/01/20/briefing/virginia-davos-coco-gauff.html',
 'https://www.nytimes.com/2020/01/20/briefing/chinese-virus-boeing-australia-hail.html',
 'https://www.nytimes.com/article/what-is-coronavirus.html',
 'https://www.nytimes.com/interactive/2020/01/21/world/asia/china-coronavirus-maps.html',
 'https://www.nytimes.com/2020/01/21/health/cdc-coronavirus.html',
 'https://www.nytimes.com/2020/0

In [247]:
urls_new=[]
for thing in urls:
    urls_new.append(thing+'&')

In [248]:
urls_new

['https://www.nytimes.com/2020/01/08/health/china-pneumonia-outbreak-virus.html&',
 'https://www.nytimes.com/2020/01/10/world/asia/china-virus-wuhan-death.html&',
 'https://www.nytimes.com/2020/01/15/world/asia/coronavirus-japan-china.html&',
 'https://www.nytimes.com/2020/01/17/health/china-coronavirus-airport-screening.html&',
 'https://www.nytimes.com/interactive/2020/01/17/well/live/17healthquiz-01172020.html&',
 'https://www.nytimes.com/2020/01/18/world/asia/china-virus-wuhan-coronavirus.html&',
 'https://www.nytimes.com/2020/01/20/world/asia/coronavirus-china-symptoms.html&',
 'https://www.nytimes.com/2020/01/20/briefing/virginia-davos-coco-gauff.html&',
 'https://www.nytimes.com/2020/01/20/briefing/chinese-virus-boeing-australia-hail.html&',
 'https://www.nytimes.com/article/what-is-coronavirus.html&',
 'https://www.nytimes.com/interactive/2020/01/21/world/asia/china-coronavirus-maps.html&',
 'https://www.nytimes.com/2020/01/21/health/cdc-coronavirus.html&',
 'https://www.nytime

In [249]:
new_dates = []
for url in urls_new:
    i=0
    for spot in url:
        if spot == '2':
            new_dates.append(url[i:i+10])
            i=0
            break
        elif spot == '&':
            new_dates.append('no_date')
        else:
            i+=1
            

In [252]:
x = {31:'pizza', 32: 'ice_cream'}

In [254]:
x.values()

dict_values(['pizza', 'ice_cream'])

In [256]:
i= 0
master = {}
for value in nytimes_articles.values():
    master[f'{new_dates[i]} | {urls[i]}'] = value
    i+=1

In [258]:
#saves the new nytimes data. THE GOOD ONE
with open('nytimes_articles.json', 'w') as fp:
    json.dump(master, fp, indent=4)

In [215]:
urls

['https://www.nytimes.com/2020/01/08/health/china-pneumonia-outbreak-virus.html',
 'https://www.nytimes.com/2020/01/10/world/asia/china-virus-wuhan-death.html',
 'https://www.nytimes.com/2020/01/15/world/asia/coronavirus-japan-china.html',
 'https://www.nytimes.com/2020/01/17/health/china-coronavirus-airport-screening.html',
 'https://www.nytimes.com/interactive/2020/01/17/well/live/17healthquiz-01172020.html',
 'https://www.nytimes.com/2020/01/18/world/asia/china-virus-wuhan-coronavirus.html',
 'https://www.nytimes.com/2020/01/20/world/asia/coronavirus-china-symptoms.html',
 'https://www.nytimes.com/2020/01/20/briefing/virginia-davos-coco-gauff.html',
 'https://www.nytimes.com/2020/01/20/briefing/chinese-virus-boeing-australia-hail.html',
 'https://www.nytimes.com/article/what-is-coronavirus.html',
 'https://www.nytimes.com/interactive/2020/01/21/world/asia/china-coronavirus-maps.html',
 'https://www.nytimes.com/2020/01/21/health/cdc-coronavirus.html',
 'https://www.nytimes.com/2020/0

In [None]:
for url in urls:
    

# Using Spacy for coronavirus stuff

In [38]:
import json

with open('cities_full.json') as json_file:
    cities_and_countries = json.load(json_file)
    

In [132]:
#turning cities into list of just the cities without the country
cities = []
for key in cities_and_countries.keys():
    cities.append(key.lower())

In [162]:
def create_matcher(syns, label):
    disease_patterns = list(nlp.pipe(syns))
    # print("disease_patterns:", disease_patterns)
    matcher = PhraseMatcher(nlp.vocab, attr = "LOWER")
    matcher.add(label, None, *disease_patterns)
    return matcher

In [154]:
def get_sentences_from_doc(doc):
    if not doc:
        return []
    sents = []
    for s in doc.sents:
        sents.append(s.text)
    return sents

In [296]:
def match_doc_with_sentences(doc, matcher):
    """
    Runs a matcher against a doc and returns a new doc just for the matched sentences
    :param doc:
    :return:
    """
    debug = False
    matched_sentences = []
    matched_sents_keys = {}

    # Add the pattern to the matcher and apply the matcher to the doc
    # matcher.add("ADJ_NOUN_PATTERN", None, pattern)
    matches = matcher(doc)
    if debug:
        print("Total matches found:", len(matches))

    # Iterate over the matches and print the span text
    for match_id, start, end in matches:
        span = doc[start:end]  # Matched span
        sent = span.sent
        #if sent in matched_sents_keys:
            #continue
        #matched_sents_keys[sent] = True
        matched_sentences.append(sent.text)
        if debug:
            print("Match found:", span.text)
            wrapper = textwrap.TextWrapper(width=100)
            word_list = wrapper.wrap(text=sent.text)
            for element in word_list:
                print(element)

    if len(matched_sentences) > 0:
        new_doc = nlp(" ".join(matched_sentences))
        return new_doc
    else:
        return None


In [297]:
match_counts = {}

In [298]:
def match_doc_with_counts(doc, matcher,date):
    """
    Runs a matcher against a doc and returns a new doc just for the matched sentences
    :param doc:
    :return:
    """
    global match_counts
    debug = True
    
    
    
    matched_sents_keys = {}

    # Add the pattern to the matcher and apply the matcher to the doc
    # matcher.add("ADJ_NOUN_PATTERN", None, pattern)
    matches = matcher(doc)
    #if debug:
        #print("Total matches found:", len(matches))

    # Iterate over the matches and print the span text
    for match_id, start, end in matches:
        span = doc[start:end]  # Matched span
        sent = span.sent
        if (date,span.text.lower()) in match_counts:
            match_counts[(date,span.text.lower())]+=1
        else:
            match_counts[(date,span.text.lower())]=1
        #print (f'this is match_counts: {match_counts}')
        #if sent in matched_sents_keys:
            #continue
        #matched_sents_keys[sent] = True
        if debug:
            #print("Match found:", span.text)
            wrapper = textwrap.TextWrapper(width=100)
            word_list = wrapper.wrap(text=sent.text)
            #for element in word_list:
                #print(element)


In [299]:
def search_article(article, date):
    # print(first_art)

    doc = nlp(article)

    #disease_syns = [{"lower":"coronavirus"}, {"lower":"virus"}, {"lower":"disease"}]
    disease_syns = ["coronavirus", "virus", "disease"]
    disease_matcher = create_matcher(disease_syns, "DISEASE")

    death_syns = cities
    death_matcher = create_matcher(death_syns, "DEATHS")

    # Write a pattern for adjective plus one or two nouns
    # pattern = [{"POS": "ADJ"}, {"POS": "NOUN"}, {"POS": "NOUN", "OP": "?"}]

    docs_match_disease = match_doc_with_sentences(doc, disease_matcher)
    docs_match_disease_and_death = match_doc_with_counts(docs_match_disease, death_matcher,date)

    if False and docs_match_disease:
        #print(f"Sentences only matching the disease")
        sents = get_sentences_from_doc(docs_match_disease)
        #for s in sents:
            #print(f"\t{s}")
    #print(f'{docs_match_disease_and_death}')
    if docs_match_disease_and_death:
        #print(f"Sentences matching the disease & death")
        sents = get_sentences_from_doc(docs_match_disease_and_death)
        return sents
    return None


In [106]:
import textwrap

In [308]:
#running program on all nytimes data. nytimes data is called master
counter = 0
for key in master.keys():
    search_article(master[key],key[0:10])
    if counter%10==0:
        print (counter)
    counter+=1   

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970
980
990
1000
1010
1020
1030
1040
1050
1060
1070
1080
1090
1100
1110
1120
1130
1140
1150
1160
1170
1180
1190
1200
1210
1220
1230
1240
1250
1260
1270
1280
1290
1300
1310
1320
1330
1340
1350
1360
1370
1380
1390
1400
1410
1420
1430
1440
1450
1460
1470
1480
1490
1500
1510


In [300]:
#tests out the program
#x = 'Coronavirus shanghai Wuhan wuhan'
#answer=search_article(x, '2020/01/31')

In [309]:
#turns matchcounts into json file so it can be saved
match_counts_json = {}
for dte, city in match_counts:
    ky = f"{dte} | {city}"
    match_counts_json[ky] = match_counts[(dte, city)]
with open('nytimes_city_counts.json', 'w') as fp:
    json.dump(match_counts_json, fp, indent=4)

In [306]:
match_counts = {}

In [311]:
#concatonating cities from the master match_counts
cities_concatonated = {}
for dt,cty in match_counts.keys():
    if cty not in cities_concatonated.keys():
        cities_concatonated[cty]=match_counts[dt,cty]
    else:
        cities_concatonated[cty]=cities_concatonated[cty]+match_counts[dt,cty]

In [314]:
cities_concatonated

{'wuhan': 1609,
 'yi': 7,
 'hong kong': 303,
 'xinhua': 21,
 'mile': 1,
 'she': 170,
 'incheon': 3,
 'xi': 129,
 'jinping': 54,
 'zhong': 8,
 'shanghai': 127,
 'beijing': 236,
 'li': 97,
 'singapore': 124,
 'macau': 62,
 'handan': 2,
 'huanan': 5,
 'lin': 6,
 'jing': 1,
 'tianjin': 14,
 'nanjing': 13,
 'wei': 13,
 'ji': 3,
 'huanggang': 5,
 'ezhou': 2,
 'yu': 4,
 'shibing': 1,
 'kunshan': 2,
 'date': 31,
 'qingyuan': 1,
 'chengdu': 11,
 'weixin': 1,
 'orange': 19,
 'kobe': 9,
 'nanshan': 2,
 'tokyo': 47,
 'xianning': 1,
 'tangshan': 1,
 'wen': 5,
 'nara': 1,
 'jiao': 2,
 'seoul': 26,
 'daxing': 3,
 'guide': 12,
 'bangkok': 8,
 'munich': 9,
 'melbourne': 7,
 'kato': 6,
 'toyota': 14,
 'anan': 1,
 'nakhon pathom': 3,
 'frankfurt': 7,
 'chongli': 6,
 'dongguan': 5,
 'boxing': 4,
 'foshan': 5,
 'osaka': 5,
 'chongqing': 5,
 'yokohama': 37,
 'ulsan': 2,
 'guangzhou': 25,
 'kuala lumpur': 3,
 'yanqing': 1,
 'hangzhou': 1,
 'dongfeng': 6,
 'sydney': 12,
 'obama': 2,
 'brisbane': 9,
 'berlin':

In [317]:
import pandas as pd

In [318]:
list(cities_concatonated.keys())

['wuhan',
 'yi',
 'hong kong',
 'xinhua',
 'mile',
 'she',
 'incheon',
 'xi',
 'jinping',
 'zhong',
 'shanghai',
 'beijing',
 'li',
 'singapore',
 'macau',
 'handan',
 'huanan',
 'lin',
 'jing',
 'tianjin',
 'nanjing',
 'wei',
 'ji',
 'huanggang',
 'ezhou',
 'yu',
 'shibing',
 'kunshan',
 'date',
 'qingyuan',
 'chengdu',
 'weixin',
 'orange',
 'kobe',
 'nanshan',
 'tokyo',
 'xianning',
 'tangshan',
 'wen',
 'nara',
 'jiao',
 'seoul',
 'daxing',
 'guide',
 'bangkok',
 'munich',
 'melbourne',
 'kato',
 'toyota',
 'anan',
 'nakhon pathom',
 'frankfurt',
 'chongli',
 'dongguan',
 'boxing',
 'foshan',
 'osaka',
 'chongqing',
 'yokohama',
 'ulsan',
 'guangzhou',
 'kuala lumpur',
 'yanqing',
 'hangzhou',
 'dongfeng',
 'sydney',
 'obama',
 'brisbane',
 'berlin',
 'yanting',
 'takeo',
 'xinjiang',
 'nakano',
 'dalian',
 'song',
 'dong',
 'qingdao',
 'victoria',
 'huangmei county',
 'zhongshan',
 'phuket',
 'shenyang',
 'xian',
 'tang',
 'wuxi',
 'shenzhen',
 'hobart',
 'yao',
 'jiujiang',
 'lin

In [324]:
#creating a pandas dataframe with the cities and mentions
cities_pd = {'city':list(cities_concatonated.keys()), 'counts':list(cities_concatonated.values())}
cities_df=pd.DataFrame.from_dict(cities_pd)

In [325]:
cities_df

Unnamed: 0,city,counts
0,wuhan,1609
1,yi,7
2,hong kong,303
3,xinhua,21
4,mile,1
5,she,170
6,incheon,3
7,xi,129
8,jinping,54
9,zhong,8


In [346]:
import json

with open('city_coords_full.json') as json_file:
    lat_lon = json.load(json_file)

In [354]:
cities_lower = []
for thing in list(lat_lon.keys()):
    cities_lower.append(thing.lower())               

In [355]:

lat = []
for thing in list(lat_lon.values()):
    lat.append(thing[0])

lon = []
for thing in list(lat_lon.values()):
    lon.append(thing[1])

cities_lat_lon = {'city':cities_lower, 'lat':lat, 'lon':lon}
cities_lat_lon_df=pd.DataFrame.from_dict(cities_lat_lon)


In [356]:
cities_lat_lon_df

Unnamed: 0,city,lat,lon
0,beijing,39.906217,116.391276
1,lhasa,29.655389,91.170474
2,alashankou,45.169797,82.565370
3,yining/qulja,43.905203,81.274780
4,aksu,41.166667,80.261243
5,altay,47.825756,88.129170
6,alxa left banner,38.835858,105.662277
7,amdo,32.267615,91.681071
8,anguo,38.427748,115.330380
9,ankang,32.716518,108.991299


In [357]:
result = pd.merge(cities_df,
                 cities_lat_lon_df,
                 on='city', 
                 how='inner')

In [365]:
result.head()

Unnamed: 0,city,counts,lat,lon
0,wuhan,1609,30.595105,114.299935
1,yi,7,39.352737,115.497201
2,hong kong,303,22.279328,114.162813
3,xinhua,21,33.738937,113.280364
4,mile,1,24.413921,103.412702


In [373]:
sum(list(result['counts']))

3530

In [363]:
list(result['counts'])[0]

1609

In [None]:
def 

In [369]:
lat_final = []
lon_final = []
i = 0
for count in list(result['counts']):
    for num in range(0,count):
        lat_final.append(list(result['lat'])[i])
        lon_final.append(list(result['lon'])[i])
    i+=1


In [370]:
len(lat_final)

3530

In [375]:
import pandas as pd
import gmplot
from IPython.display import display

In [376]:
# Creating the location we would like to initialize the focus on. 
# Parameters: Lattitude, Longitude, Zoom
gmap = gmplot.GoogleMapPlotter(30.595105, 114.299935, 10)

# Overlay our datapoints onto the map
gmap.heatmap(lat_final, lon_final)

# Generate the heatmap into an HTML file
gmap.draw("my_heatmap.html")