## Mapping the New York Times Data

A first attempt to pull and plot the Covid-19 data collected by the New York Times.
See [The Times github repository](https://github.com/nytimes/covid-19-data).

The plot is rendered using a [plotly](https://plotly.com/python/) style that is reverse-engineered to be similar to the style of the [maps published on the Times site](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html). Hovering over a county shows the most recent cases/deaths. An interface is added that allows a user to type in an address, and see the most recent Covid-19 statistics of the US county of that address. The idea is to allow people to see what's going on near family and friends, without having to know what county they live in. This uses a geocoding API from [HERE.com](https://www.here.com/). The result looks something like this:

![example image](plot-example.png)

Next up will be timelines and forecasts of the cases and deaths over the coming days, incorporating some basic statistical modelling. 


In [1]:
import numpy as np
import pandas as pd
import json
import herepy
import plotly.graph_objects as go
import datetime
import dateutil.parser

geocoderApi = herepy.GeocoderApi('VbY-MyI6ZT9U8h-Y5GP5W1YaOzQuvNnL4aSTulNEyEQ')
df_counties = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv", dtype={"fips": str})
df_states = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv", dtype={"fips": str})


In [2]:
df_states
df_counties

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061,1,0
1,2020-01-22,Snohomish,Washington,53061,1,0
2,2020-01-23,Snohomish,Washington,53061,1,0
3,2020-01-24,Cook,Illinois,17031,1,0
4,2020-01-24,Snohomish,Washington,53061,1,0
...,...,...,...,...,...,...
21794,2020-03-30,Sheridan,Wyoming,56033,8,0
21795,2020-03-30,Sublette,Wyoming,56035,1,0
21796,2020-03-30,Sweetwater,Wyoming,56037,2,0
21797,2020-03-30,Teton,Wyoming,56039,17,0


In [3]:
last_date = max([dateutil.parser.parse(d) for d in np.array(df_counties['date'])])
most_recent_date = last_date.strftime("%Y-%m-%d")
most_recent_date_long = last_date.strftime("%A %B %d, %Y")
most_recent_date
most_recent_date_long

'Monday March 30, 2020'

In [4]:
df_recent = df_counties[df_counties['date']==most_recent_date]
df_recent = df_recent.sort_values('cases', ascending=False)
df_recent = df_recent.reset_index().drop('index',1)
df_recent

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-03-30,New York City,New York,,38087,914
1,2020-03-30,Westchester,New York,36119,9326,19
2,2020-03-30,Nassau,New York,36059,7344,48
3,2020-03-30,Suffolk,New York,36103,5791,44
4,2020-03-30,Unknown,New Jersey,,3840,7
...,...,...,...,...,...,...
2081,2020-03-30,Jo Daviess,Illinois,17085,1,0
2082,2020-03-30,Ray,Missouri,29177,1,0
2083,2020-03-30,Barnwell,South Carolina,45011,1,0
2084,2020-03-30,Franklin,Illinois,17055,1,0


In [5]:

def compute_lat_lon(df, geocoder):
    county = list(df['county'])
    state = list(df['state'])
    fips = list(df['fips'])
    cases = list(df['cases'])
    deaths = list(df['deaths'])
    lat = list(np.zeros(len(county)))
    lon = list(np.zeros(len(county)))

    num_to_print=20
    for i in range(len(county)):
        if type(fips[i])==str:
            response = geocoder.free_form('%s, %s' % (county[i], state[i]))
            result = response.as_json_string()
            res = eval(result)
            (clat, clon) = (res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude'],
                      res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude'])
            lat[i] = clat
            lon[i] = clon
            if (i <= num_to_print) | (i % int(len(county)/20) == 0):
                print("%s, %s: cases=%d, deaths=%d, lat=%f, lon=%f" % \
                        (county[i], state[i], cases[i], deaths[i], lat[i], lon[i]))
                if (i>=num_to_print):
                    print("...")            
    return (lat,lon)


def lat_lon_of_address(addr):
    response = geocoderApi.free_form(addr)
    type(response)
    result = response.as_json_string()
    res = eval(result)
    (lat, lon) = (res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude'],
                  res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude'])
    return (lat, lon)


def county_state_of_address(addr):
    response = geocoderApi.free_form(addr)
    type(response)
    result = response.as_json_string()
    res = eval(result)
    state = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][1]['value']
    county = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][2]['value']
    return (county, state)


In [6]:
recompute_geocodes = False
if (recompute_geocodes):
    lat, lon = compute_lat_lon(df_recent, geocoderApi)
    df_geo = df_recent
    df_geo['lat'] = lat
    df_geo['lon'] = lon
    lat_lon = df_recent
    lat_lon = lat_lon.drop('cases',1)
    lat_lon = lat_lon.drop('deaths',1)
    lat_lon = lat_lon.drop('date',1)
    lat_lon.to_csv('geo-counties.csv', header=True, index=False) 
    
df_geo = pd.read_csv("https://raw.githubusercontent.com/jdlafferty/covid-19/master/geo-counties.csv", dtype={"fips": str})
df_geo.head(20)

Unnamed: 0,county,state,fips,lat,lon
0,New York City,New York,,40.71455,-74.00714
1,Westchester,New York,36119.0,41.11909,-73.7887
2,Nassau,New York,36059.0,42.51642,-73.61127
3,Suffolk,New York,36103.0,40.96009,-72.83434
4,Cook,Illinois,17031.0,41.81392,-87.61546
5,King,Washington,53033.0,47.43248,-121.9594
6,Unknown,New Jersey,,0.0,0.0
7,Wayne,Michigan,26163.0,42.28515,-83.38361
8,Los Angeles,California,6037.0,34.05361,-118.2455
9,Bergen,New Jersey,34003.0,40.94757,-74.02765


In [7]:
df_recent.head(10)

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-03-30,New York City,New York,,38087,914
1,2020-03-30,Westchester,New York,36119.0,9326,19
2,2020-03-30,Nassau,New York,36059.0,7344,48
3,2020-03-30,Suffolk,New York,36103.0,5791,44
4,2020-03-30,Unknown,New Jersey,,3840,7
5,2020-03-30,Cook,Illinois,17031.0,3727,44
6,2020-03-30,Wayne,Michigan,26163.0,3195,83
7,2020-03-30,Rockland,New York,36087.0,2511,8
8,2020-03-30,Bergen,New Jersey,34003.0,2482,41
9,2020-03-30,Los Angeles,California,6037.0,2474,44


In [8]:
df_recent = pd.merge(df_recent, df_geo)
df_recent.head(10)

Unnamed: 0,date,county,state,fips,cases,deaths,lat,lon
0,2020-03-30,New York City,New York,,38087,914,40.71455,-74.00714
1,2020-03-30,Westchester,New York,36119.0,9326,19,41.11909,-73.7887
2,2020-03-30,Nassau,New York,36059.0,7344,48,42.51642,-73.61127
3,2020-03-30,Suffolk,New York,36103.0,5791,44,40.96009,-72.83434
4,2020-03-30,Unknown,New Jersey,,3840,7,0.0,0.0
5,2020-03-30,Cook,Illinois,17031.0,3727,44,41.81392,-87.61546
6,2020-03-30,Wayne,Michigan,26163.0,3195,83,42.28515,-83.38361
7,2020-03-30,Rockland,New York,36087.0,2511,8,41.89899,-74.83049
8,2020-03-30,Bergen,New Jersey,34003.0,2482,41,40.94757,-74.02765
9,2020-03-30,Los Angeles,California,6037.0,2474,44,34.05361,-118.2455


In [9]:
def render_map(show=True, min_cases=1, scale=3.0):
    df = df_recent

    df['text'] = df['county'] + ', ' + df['state'] + '<br>' + \
        (df['cases']).astype(str) + ' cases, ' + (df['deaths']).astype(str) + ' deaths'
    df_top = df[df['cases'] >= min_cases]
    df_top = df_top[df_top['county']!='Unknown']

    fig = go.Figure()

    fig.add_trace(go.Scattergeo(
        locationmode = 'USA-states',
        lon = df_top['lon'],
        lat = df_top['lat'],
        text = df_top['text'],
        name = '',
        marker = dict(
            size = df_top['cases']/scale,
            color = 'rgba(255, 0, 0, 0.2)',
            line_color='black',
            line_width=0.5,
            sizemode = 'area'
        ),
    ))
    
    fig.update_layout(
            width = 1000,
            height = 700,
            margin={"r":0,"t":0,"l":0,"b":0},
            showlegend = False,
            geo = dict(
                scope = 'usa',
                landcolor = 'rgb(230, 230, 230)',
            )
    )
    
    if show:
        fig.show(config={'scrollZoom': False})
        
    return(fig)
    
    
def render_map_with_address(addr=None, show=True, scale=3.0):
    fig = render_map(show=False)
    
    this_lat, this_lon = lat_lon_of_address(addr)
    this_county, this_state = county_state_of_address(addr)
    county_record = df_recent[(df_recent['county']==this_county) & (df_recent['state']==this_state)]
    this_text = '%s<br>County: %s' % (addr, np.array(county_record['text'])[0])
    
    td = pd.DataFrame()
    td['lat']=np.array([this_lat])
    td['lon']=np.array([this_lon])
    td['text']=np.array([this_text])
    fig.add_trace(go.Scattergeo(
        locationmode = 'USA-states',
        lon = td['lon'],
        lat = td['lat'],
        text = td['text'],
        name = '',
        marker = dict(
            size = 100/scale,
            color = 'rgba(0,255,0,0.2)',
            line_color='black',
            line_width=0.5,
            sizemode = 'area'
        ),
    ))

    fig.update_layout(
            width = 1000,
            height = 700,
            margin={"r":0,"t":0,"l":0,"b":0},
            showlegend = False,
            geo = dict(
                scope = 'usa',
                landcolor = 'rgb(230, 230, 230)',
            )
    )
    
    fig.update_layout(
        title={
            'text': "Data from The New York Times<br>https://github.com/nytimes/covid-19-data<br>%s" % most_recent_date_long,
            'y':0.05,
            'x':0.85,
            'xanchor': 'left',
            'yanchor': 'bottom'},
       font=dict(
            family="Times New Roman",
            size=6,
            color="#7f7f7f")
    )
    
    if show:
        fig.show(config={'scrollZoom': False})
    



In [210]:
render_map_with_address('seattle, washington')

In [18]:
#def data_for_address(addr):
addr = "100 River Street, Guilford CT"
this_lat, this_lon = lat_lon_of_address(addr)
this_county, this_state = county_state_of_address(addr)
df_county = df_counties[(df_counties['county']==this_county) & (df_counties['state']==this_state)]
df_county

Unnamed: 0,date,county,state,fips,cases,deaths
1994,2020-03-14,New Haven,Connecticut,9009,1,0
2376,2020-03-15,New Haven,Connecticut,9009,3,0
2811,2020-03-16,New Haven,Connecticut,9009,4,0
3293,2020-03-17,New Haven,Connecticut,9009,8,0
3849,2020-03-18,New Haven,Connecticut,9009,10,0
4512,2020-03-19,New Haven,Connecticut,9009,24,0
5290,2020-03-20,New Haven,Connecticut,9009,24,0
6217,2020-03-21,New Haven,Connecticut,9009,24,0
7253,2020-03-22,New Haven,Connecticut,9009,29,0
8399,2020-03-23,New Haven,Connecticut,9009,41,0


In [28]:
dates = list(df_county['date'])
cases = list(df_county['cases'])
deaths = list(df_county['deaths'])
fig = go.Figure([go.Bar(x=dates, y=cases)])
fig.show()


In [163]:
def histogram_for_address(addr,max_to_show=20):
    this_lat, this_lon = lat_lon_of_address(addr)
    this_county, this_state = county_state_of_address(addr)
    df_county = df_counties[(df_counties['county']==this_county) & (df_counties['state']==this_state)]
    df_county = df_county.tail(max_to_show)
    
    dates = list(df_county['date'])
    cases = list(df_county['cases'])
    deaths = list(df_county['deaths'])
    num_entries = len(dates)
    fig = go.Figure(data=[
        go.Bar(name='deaths', x=dates, y=deaths),
        go.Bar(name='cases', x=dates, y=cases)
    ])
    fig.update_layout(
        height = 200,
        width = num_entries*15, 
        margin={"r":0,"t":0,"l":0,"b":0},
        barmode='stack',
        showlegend=True,
    )
    fig.show()

histogram_for_address('100 River St, Guilford, CT')

In [195]:
def histogram_for_address(addr,max_to_show=20):
    this_lat, this_lon = lat_lon_of_address(addr)
    this_county, this_state = county_state_of_address(addr)
    df_county = df_counties[(df_counties['county']==this_county) & (df_counties['state']==this_state)]
    df_county = df_county.tail(max_to_show)
    this_text = '%s<br>County: %s, %s' % (addr, np.array(df_county['county'])[0], np.array(df_county['state'])[0])
    
    dates = list(df_county['date'])
    cases = list(df_county['cases'])
    deaths = list(df_county['deaths'])
    num_entries = len(dates)
    fig1 = go.Figure(data=[
        go.Bar(name='cases', x=dates, y=cases,  marker={'color':'rgba(255,0,0,0.5)'})
    ])
    fig1.update_layout(
        height = 200,
        width = num_entries*15, 
        margin={"r":0,"t":0,"l":0,"b":0},
        barmode='stack',
        showlegend=False,
        yaxis=dict(tickformat=',d'),
        font=dict(
            family="Times New Roman",
            size=10,
            color="#7f7f7f"),
    )
    fig1.show()
    fig2 = go.Figure(data=[
        go.Bar(name='deaths', x=dates, y=deaths, marker={'color':'rgba(55,55,55,0.5)'})
    ])
    fig2.update_layout(
        height = 100,
        width = num_entries*15, 
        margin={"r":0,"t":0,"l":0,"b":0},
        barmode='stack',
        showlegend=False,
        yaxis=dict(tickformat=',d'),
        font=dict(
            family="Times New Roman",
            size=10,
            color="#7f7f7f"),
    )
    fig2.show()
    print(this_text)

histogram_for_address('100 River St, Guilford, CT')

100 River St, Guilford, CT<br>County: New Haven, Connecticut


In [223]:
def map_and_histogram_for_address(addr, scale=3.0):
    fig = render_map(show=False)
    
    this_lat, this_lon = lat_lon_of_address(addr)
    this_county, this_state = county_state_of_address(addr)
    county_record = df_recent[(df_recent['county']==this_county) & (df_recent['state']==this_state)]
    this_text = '%s<br>County: %s' % (addr, np.array(county_record['text'])[0])
    
    td = pd.DataFrame()
    td['lat']=np.array([this_lat])
    td['lon']=np.array([this_lon])
    td['text']=np.array([this_text])
    fig.add_trace(go.Scattergeo(
        locationmode = 'USA-states',
        lon = td['lon'],
        lat = td['lat'],
        text = td['text'],
        name = '',
        marker = dict(
            size = 100/scale,
            color = 'rgba(0,255,0,0.2)',
            line_color='black',
            line_width=0.5,
            sizemode = 'area'
        ),
    ))

    fig.update_layout(
            width = 1000,
            height = 700,
            margin={"r":0,"t":0,"l":0,"b":0},
            showlegend = False,
            geo = dict(
                scope = 'usa',
                landcolor = 'rgb(230, 230, 230)',
            )
    )
    
    fig.update_layout(
        title={
            'text': "Data from The New York Times<br>https://github.com/nytimes/covid-19-data<br>%s" % most_recent_date_long,
            'y':0.05,
            'x':0.85,
            'xanchor': 'left',
            'yanchor': 'bottom'},
       font=dict(
            family="Times New Roman",
            size=6,
            color="#7f7f7f")
    )
    
    df_county = df_counties[(df_counties['county']==this_county) & (df_counties['state']==this_state)]
    df_county = df_county.tail(max_to_show)
    this_text = '%s<br>County: %s, %s' % (addr, np.array(df_county['county'])[0], np.array(df_county['state'])[0])
    
    dates = list(df_county['date'])
    cases = list(df_county['cases'])
    deaths = list(df_county['deaths'])
    num_entries = len(dates)
    case_hist = go.Figure(data=[
        go.Bar(name='cases', x=dates, y=cases,  marker={'color':'rgba(255,0,0,0.5)'})
    ])
    case_hist.update_layout(
        height = 200,
        width = num_entries*15, 
        margin={"r":0,"t":0,"l":0,"b":0},
        barmode='stack',
        showlegend=False,
        yaxis=dict(tickformat=',d'),
        font=dict(
            family="Times New Roman",
            size=10,
            color="#7f7f7f"),
    )

    death_hist = go.Figure(data=[
        go.Bar(name='deaths', x=dates, y=deaths, marker={'color':'rgba(55,55,55,0.5)'})
    ])
    death_hist.update_layout(
        height = 100,
        width = num_entries*15, 
        margin={"r":0,"t":0,"l":0,"b":0},
        barmode='stack',
        showlegend=False,
        yaxis=dict(tickformat=',d'),
        font=dict(
            family="Times New Roman",
            size=10,
            color="#7f7f7f"),
    )
    
    return (fig, case_hist, death_hist, this_text)
    


In [226]:
def show_map_and_histograms(addr):
    map, hist1, hist2, text = map_and_histogram_for_address(addr)
    map.show(config={'scrollZoom': False})
    hist1.show()
    hist2.show()
    print(text)
    
show_map_and_histograms('Chicago, IL')

Chicago, IL<br>County: Cook, Illinois


In [197]:
max_to_show=20
addr = 'Chicago, IL'
this_lat, this_lon = lat_lon_of_address(addr)
this_county, this_state = county_state_of_address(addr)
df_county = df_counties[(df_counties['county']==this_county) & (df_counties['state']==this_state)]
df_county = df_county.tail(max_to_show)
df_county

Unnamed: 0,date,county,state,fips,cases,deaths
1228,2020-03-11,Cook,Illinois,17031,22,0
1442,2020-03-12,Cook,Illinois,17031,27,0
1713,2020-03-13,Cook,Illinois,17031,40,0
2039,2020-03-14,Cook,Illinois,17031,51,0
2430,2020-03-15,Cook,Illinois,17031,76,0
2871,2020-03-16,Cook,Illinois,17031,76,0
3360,2020-03-17,Cook,Illinois,17031,107,1
3923,2020-03-18,Cook,Illinois,17031,179,1
4599,2020-03-19,Cook,Illinois,17031,279,2
5404,2020-03-20,Cook,Illinois,17031,412,3
