## Mapping the New York Times Data

This notebook is a first attempt to pull and plot the Covid-19 data collected by the New York Times.
See [The Times github repository](https://github.com/nytimes/covid-19-data).

I render the plot using a [plotly](https://plotly.com/python/) style that is very similar to that used in the [maps published on the Times site](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html). I add an interface that allows a user to type in an address, and show the most recent Covid-19 statistics of the US county of that address. The idea is to allow people to get an idea of what's going on nearby family and friends, without having to know what county they live in. This uses a geocoding API from [HERE.com](https://www.here.com/).

Next, I'd like to add timelines and forecasts of the cases and deaths over the coming days, incorporating some basic statistical modelling. 

3-20-20


In [1]:
import numpy as np
import pandas as pd
import json
import herepy
import plotly.graph_objects as go
import datetime
import dateutil.parser

geocoderApi = herepy.GeocoderApi('VbY-MyI6ZT9U8h-Y5GP5W1YaOzQuvNnL4aSTulNEyEQ')
df_counties = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv", dtype={"fips": str})
df_states = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv", dtype={"fips": str})


In [2]:
df_states
df_counties

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061,1,0
1,2020-01-22,Snohomish,Washington,53061,1,0
2,2020-01-23,Snohomish,Washington,53061,1,0
3,2020-01-24,Cook,Illinois,17031,1,0
4,2020-01-24,Snohomish,Washington,53061,1,0
5,2020-01-25,Orange,California,06059,1,0
6,2020-01-25,Cook,Illinois,17031,1,0
7,2020-01-25,Snohomish,Washington,53061,1,0
8,2020-01-26,Maricopa,Arizona,04013,1,0
9,2020-01-26,Los Angeles,California,06037,1,0


In [3]:
last_date = max([dateutil.parser.parse(d) for d in np.array(df_counties['date'])])
most_recent_date = last_date.strftime("%Y-%m-%d")
most_recent_date_long = last_date.strftime("%A %B %d, %Y")
most_recent_date
most_recent_date_long

'Saturday March 28, 2020'

In [4]:
df_recent = df_counties[df_counties['date']==most_recent_date]
df_recent = df_recent.sort_values('cases', ascending=False)
df_recent = df_recent.reset_index().drop('index',1)
df_recent

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-03-28,New York City,New York,,30766,672
1,2020-03-28,Westchester,New York,36119,7875,10
2,2020-03-28,Nassau,New York,36059,5537,35
3,2020-03-28,Suffolk,New York,36103,4138,37
4,2020-03-28,Cook,Illinois,17031,2613,28
5,2020-03-28,Unknown,New Jersey,,2478,7
6,2020-03-28,Wayne,Michigan,26163,2316,46
7,2020-03-28,King,Washington,53033,2079,138
8,2020-03-28,Rockland,New York,36087,1896,8
9,2020-03-28,Bergen,New Jersey,34003,1838,35


In [5]:

def compute_lat_lon(df, geocoder):
    county = list(df['county'])
    state = list(df['state'])
    fips = list(df['fips'])
    cases = list(df['cases'])
    deaths = list(df['deaths'])
    lat = list(np.zeros(len(county)))
    lon = list(np.zeros(len(county)))

    num_to_print=20
    for i in range(len(county)):
        if type(fips[i])==str:
            response = geocoder.free_form('%s, %s' % (county[i], state[i]))
            result = response.as_json_string()
            res = eval(result)
            (clat, clon) = (res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude'],
                      res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude'])
            lat[i] = clat
            lon[i] = clon
            if (i <= num_to_print) | (i % int(len(county)/20) == 0):
                print("%s, %s: cases=%d, deaths=%d, lat=%f, lon=%f" % \
                        (county[i], state[i], cases[i], deaths[i], lat[i], lon[i]))
                if (i>=num_to_print):
                    print("...")            
    return (lat,lon)


def lat_lon_of_address(addr):
    response = geocoderApi.free_form(addr)
    type(response)
    result = response.as_json_string()
    res = eval(result)
    (lat, lon) = (res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude'],
                  res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude'])
    return (lat, lon)


def county_state_of_address(addr):
    response = geocoderApi.free_form(addr)
    type(response)
    result = response.as_json_string()
    res = eval(result)
    state = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][1]['value']
    county = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][2]['value']
    return (county, state)


In [6]:
recompute_geocodes = False
if (recompute_geocodes):
    lat, lon = compute_lat_lon(df_recent, geocoderApi)
    df_geo = df_recent
    df_geo['lat'] = lat
    df_geo['lon'] = lon
    lat_lon = df_recent
    lat_lon = lat_lon.drop('cases',1)
    lat_lon = lat_lon.drop('deaths',1)
    lat_lon = lat_lon.drop('date',1)
    lat_lon.to_csv('geo-counties.csv', header=True, index=False) 
    
df_geo = pd.read_csv("./geo-counties.csv", dtype={"fips": str})
df_geo.head(20)

Unnamed: 0,county,state,fips,lat,lon
0,New York City,New York,,40.71455,-74.00714
1,Westchester,New York,36119.0,41.11909,-73.7887
2,Nassau,New York,36059.0,42.51642,-73.61127
3,Suffolk,New York,36103.0,40.96009,-72.83434
4,Cook,Illinois,17031.0,41.81392,-87.61546
5,King,Washington,53033.0,47.43248,-121.9594
6,Unknown,New Jersey,,0.0,0.0
7,Wayne,Michigan,26163.0,42.28515,-83.38361
8,Los Angeles,California,6037.0,34.05361,-118.2455
9,Bergen,New Jersey,34003.0,40.94757,-74.02765


In [7]:
df_recent.head(10)

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-03-28,New York City,New York,,30766,672
1,2020-03-28,Westchester,New York,36119.0,7875,10
2,2020-03-28,Nassau,New York,36059.0,5537,35
3,2020-03-28,Suffolk,New York,36103.0,4138,37
4,2020-03-28,Cook,Illinois,17031.0,2613,28
5,2020-03-28,Unknown,New Jersey,,2478,7
6,2020-03-28,Wayne,Michigan,26163.0,2316,46
7,2020-03-28,King,Washington,53033.0,2079,138
8,2020-03-28,Rockland,New York,36087.0,1896,8
9,2020-03-28,Bergen,New Jersey,34003.0,1838,35


In [8]:
df_recent = pd.merge(df_recent, df_geo)
df_recent.head(10)

Unnamed: 0,date,county,state,fips,cases,deaths,lat,lon
0,2020-03-28,New York City,New York,,30766,672,40.71455,-74.00714
1,2020-03-28,Westchester,New York,36119.0,7875,10,41.11909,-73.7887
2,2020-03-28,Nassau,New York,36059.0,5537,35,42.51642,-73.61127
3,2020-03-28,Suffolk,New York,36103.0,4138,37,40.96009,-72.83434
4,2020-03-28,Cook,Illinois,17031.0,2613,28,41.81392,-87.61546
5,2020-03-28,Unknown,New Jersey,,2478,7,0.0,0.0
6,2020-03-28,Wayne,Michigan,26163.0,2316,46,42.28515,-83.38361
7,2020-03-28,King,Washington,53033.0,2079,138,47.43248,-121.9594
8,2020-03-28,Rockland,New York,36087.0,1896,8,41.89899,-74.83049
9,2020-03-28,Bergen,New Jersey,34003.0,1838,35,40.94757,-74.02765


In [9]:
def render_map(show=True, min_cases=1, scale=3.0):
    df = df_recent

    df['text'] = df['county'] + ', ' + df['state'] + '<br>' + \
        (df['cases']).astype(str) + ' cases, ' + (df['deaths']).astype(str) + ' deaths'
    df_top = df[df['cases'] >= min_cases]
    df_top = df_top[df_top['county']!='Unknown']

    fig = go.Figure()

    fig.add_trace(go.Scattergeo(
        locationmode = 'USA-states',
        lon = df_top['lon'],
        lat = df_top['lat'],
        text = df_top['text'],
        name = '',
        marker = dict(
            size = df_top['cases']/scale,
            color = 'rgba(255, 0, 0, 0.2)',
            line_color='black',
            line_width=0.5,
            sizemode = 'area'
        ),
    ))
    
    fig.update_layout(
            width = 1000,
            height = 700,
            margin={"r":0,"t":0,"l":0,"b":0},
            showlegend = False,
            geo = dict(
                scope = 'usa',
                landcolor = 'rgb(230, 230, 230)',
            )
    )
    
    if show:
        fig.show(config={'scrollZoom': False})
        
    return(fig)
    
    
def render_map_with_address(addr=None, show=True, scale=3.0):
    fig = render_map(show=False)
    
    this_lat, this_lon = lat_lon_of_address(addr)
    this_county, this_state = county_state_of_address(addr)
    county_record = df_recent[(df_recent['county']==this_county) & (df_recent['state']==this_state)]
    this_text = '%s<br>County: %s' % (addr, np.array(county_record['text'])[0])
    
    td = pd.DataFrame()
    td['lat']=np.array([this_lat])
    td['lon']=np.array([this_lon])
    td['text']=np.array([this_text])
    fig.add_trace(go.Scattergeo(
        locationmode = 'USA-states',
        lon = td['lon'],
        lat = td['lat'],
        text = td['text'],
        name = '',
        marker = dict(
            size = 100/scale,
            color = 'rgba(0,255,0,0.2)',
            line_color='black',
            line_width=0.5,
            sizemode = 'area'
        ),
    ))

    fig.update_layout(
            width = 1000,
            height = 700,
            margin={"r":0,"t":0,"l":0,"b":0},
            showlegend = False,
            geo = dict(
                scope = 'usa',
                landcolor = 'rgb(230, 230, 230)',
            )
    )
    
    fig.update_layout(
        title={
            'text': "Data from The New York Times<br>https://github.com/nytimes/covid-19-data<br>%s" % most_recent_date_long,
            'y':0.05,
            'x':0.85,
            'xanchor': 'left',
            'yanchor': 'bottom'},
       font=dict(
            family="Times New Roman",
            size=6,
            color="#7f7f7f")
    )
    
    if show:
        fig.show(config={'scrollZoom': False})
    



In [11]:
render_map_with_address('Emerald St, Corpus Christi, TX')