# Mapping Terms Application

This notebook explores the [Chronicling America API](https://chroniclingamerica.loc.gov/about/api/) for creating a map of the frequency certain terms appear in newspapers for a given time.

In [6]:
import pandas as pd
import requests
from geopy import geocoders
from geopy.extra.rate_limiter import RateLimiter
import plotly.offline as pyo
import plotly.graph_objs as go
import time

pyo.init_notebook_mode(connected=True)

In [9]:
searchTerm = "free+love"
beginYear = 1875
endYear = 1880
pg=1
endpg=2 #too much data? I'm just grabbing the first few pages right now
stop = False
allData = []
locs = {}
while not stop:
#http://chroniclingamerica.loc.gov/search/pages/results/?andtext={searchTerms}&page={startPage?}&ortext={chronam:booleanOrText?}&year={chronam:year?}&date1={chronam:date?}&date2={chronam:date?}&phrasetext={chronam:phraseText?}&proxText={chronam:proxText?}&proximityValue={chronam:proximityValue?}&format=json"
    url = "http://chroniclingamerica.loc.gov/search/pages/results/?phrasetext={0}&dateFilterType=yearRange&date1={1}&date2={2}&page={3}&format=json".format(searchTerm,beginYear,endYear,pg)
    print(url)
    call = requests.get(url)
    data = call.json()
    df = pd.DataFrame(data["items"])
    nf=pd.DataFrame()
    
    #only want city and date for this application
    nf['date']=pd.to_datetime(df['date'])
    nf['citystate']=df['city'].map(lambda a:a[0])+', '+df['state'].map(lambda a: a[0])
    gn = geocoders.GeoNames(username='abwatkins')
    geocode = RateLimiter(gn.geocode, min_delay_seconds=1)
    for index, row in nf.iterrows():
        if row['citystate'] not in locs:
            loc = geocode(row['citystate'])
            locs[row['citystate']]=(loc.latitude,loc.longitude)
    #print(locs)
    #nf['lats']=nf['citystate'].map(lambda g: locs[g][0])
    #nf['lons']=nf['citystate'].map(lambda g: locs[g][1])
    allData.append(nf)
    #print(allData.head())
    #if data["endIndex"]==data["totalItems"]:
    if data["endIndex"]==data["totalItems"] or pg==endpg:
        stop = True
    else:
        pg +=1
allData = pd.concat(allData)
#allData.head(25)        

  


http://chroniclingamerica.loc.gov/search/pages/results/?phrasetext=free+love&dateFilterType=yearRange&date1=1875&date2=1880&page=1&format=json
{'Saint Paul, Minnesota': (44.94441, -93.09327), 'Portland, Oregon': (45.52345, -122.67621), 'Indianapolis, Indiana': (39.76838, -86.15804), 'New York, New York': (40.71427, -74.00597), 'Washington, District of Columbia': (38.89511, -77.03637), 'Alexandria, District of Columbia': (49.09944, -123.90139), 'Columbia, South Carolina': (34.00071, -81.03481), 'Bellevue, Louisiana': (32.6432, -93.48017), 'Idaho City, Idaho': (43.6135, -116.20345), 'Selma, Alabama': (32.40736, -87.0211), 'Wisconsin Rapids, Wisconsin': (44.38358, -89.81735), 'Watertown, Wisconsin': (43.19472, -88.72899), 'Gallipolis, Ohio': (38.8098, -82.20237), 'Helena, Montana': (46.59271, -112.03611), 'Worthington, Minnesota': (43.61996, -95.5964), 'New Orleans, Louisiana': (29.95465, -90.07507), 'Chicago, Illinois': (41.85003, -87.65005)}
http://chroniclingamerica.loc.gov/search/page

In [55]:
#create a cross tab of that data to get frequency count of location and year
ct=pd.crosstab(allData['date'].dt.year,allData['citystate']).stack().reset_index().rename(columns={0:'Freq'})
ct['lats']=ct['citystate'].map(lambda g: locs[g][0])
ct['lons']=ct['citystate'].map(lambda g: locs[g][1])
ct=ct[ct['Freq']!=0]

In [56]:
# lots taken from https://plot.ly/python/bubble-maps/

ct['text']=ct['citystate']+' '+ct['date'].astype(str)+'<br>Mentions: '+ct['Freq'].astype(str)
data=[]
for yr in ct['date'].unique():
    ct_sub = ct[ct['date']==yr]
    data.append(go.Scattergeo(
        visible=False,
        locationmode = 'USA-states',
        lon=ct_sub['lons'],
        lat=ct_sub['lats'],
        text=ct_sub['text'],
        marker = dict(
        size=ct_sub['Freq']*100,
        #line_color='rgb(40,40,40)',
         #   line_width=0.5,
        sizemode = 'area'
        ),
        name = str(yr))
        )
    
layout=dict(
        title = 'Searching for {0}'.format(searchTerm),
        showlegend = True,
        geo = dict(
            scope = 'usa',
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
        )
    )
fig = go.Figure(data = data, layout=layout)
fig.data[0].visible=True
steps = []
for i in range(len(fig.data)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
        label = fig.data[i].name
    )
    step["args"][1][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "year: "},
    pad={"t": 50},
    steps= steps
)]

fig.layout['sliders']=sliders

pyo.iplot(fig)

In [57]:
# lots taken from https://plot.ly/python/bubble-maps/
ct =ct[ct['Freq']!=0] # don't want to map 0 counts
ct['text']=ct['citystate']+' '+ct['date'].astype(str)+'<br>Mentions: '+ct['Freq'].astype(str)
data=[]
for yr in ct['date'].unique():
    ct_sub = ct[ct['date']==yr]
    data.append(go.Scattergeo(
        #visible=False,
        locationmode = 'USA-states',
        lon=ct_sub['lons'],
        lat=ct_sub['lats'],
        text=ct_sub['text'],
        marker = dict(
        size=ct_sub['Freq']*100,
        #line_color='rgb(40,40,40)',
         #   line_width=0.5,
        sizemode = 'area'
        ),
        name = str(yr))
        )
    
layout=dict(
        title = 'Searching for {0}<br>(Click legend to toggle traces)'.format(searchTerm),
        showlegend = True,
        geo = dict(
            scope = 'usa',
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
        )
    )
fig = go.Figure(data = data, layout=layout)
#fig.layout['sliders']=sliders

pyo.iplot(fig)