## Mapping the New York Times Data

A first attempt to pull and plot the Covid-19 data collected by the New York Times.
See [The Times github repository](https://github.com/nytimes/covid-19-data).

The plot is rendered using a [plotly](https://plotly.com/python/) style that is reverse-engineered to be similar to the style of the [maps published on the Times site](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html). Hovering over a county shows the most recent cases/deaths. An interface is added that allows a user to type in an address, and see the most recent Covid-19 statistics of the US county of that address. The idea is to allow people to see what's going on near family and friends, without having to know what county they live in. This uses a geocoding API from [HERE.com](https://www.here.com/). The result looks something like this:

![example image](plot-example.png)

Next up will be timelines and forecasts of the cases and deaths over the coming days, incorporating some basic statistical modelling. 


In [5]:
import numpy as np
import pandas as pd
import json
import herepy
import plotly.graph_objects as go
import datetime
import dateutil.parser

geocoderApi = herepy.GeocoderApi('VbY-MyI6ZT9U8h-Y5GP5W1YaOzQuvNnL4aSTulNEyEQ')
df_counties = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv", dtype={"fips": str})
df_states = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv", dtype={"fips": str})


In [6]:
df_states
df_counties

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061,1,0
1,2020-01-22,Snohomish,Washington,53061,1,0
2,2020-01-23,Snohomish,Washington,53061,1,0
3,2020-01-24,Cook,Illinois,17031,1,0
4,2020-01-24,Snohomish,Washington,53061,1,0
...,...,...,...,...,...,...
440740,2020-08-16,Sweetwater,Wyoming,56037,276,2
440741,2020-08-16,Teton,Wyoming,56039,389,1
440742,2020-08-16,Uinta,Wyoming,56041,276,2
440743,2020-08-16,Washakie,Wyoming,56043,96,5


In [7]:
last_date = max([dateutil.parser.parse(d) for d in np.array(df_counties['date'])])
most_recent_date = last_date.strftime("%Y-%m-%d")
most_recent_date_long = last_date.strftime("%A %B %-d, %Y")
most_recent_date
most_recent_date_long

'Sunday August 16, 2020'

In [8]:
df_recent = df_counties[df_counties['date']==most_recent_date]
df_recent = df_recent.sort_values('cases', ascending=False)
df_recent = df_recent.reset_index().drop('index',1)
df_recent

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-08-16,New York City,New York,,234918,23628
1,2020-08-16,Los Angeles,California,06037,221950,5254
2,2020-08-16,Miami-Dade,Florida,12086,145306,2057
3,2020-08-16,Maricopa,Arizona,04013,129385,2596
4,2020-08-16,Cook,Illinois,17031,115960,4962
...,...,...,...,...,...,...
3221,2020-08-16,Arthur,Nebraska,31005,1,0
3222,2020-08-16,Elk,Kansas,20049,1,0
3223,2020-08-16,Wheeler,Nebraska,31183,1,0
3224,2020-08-16,Powder River,Montana,30075,1,0


In [9]:

def compute_lat_lon(df, geocoder):
    county = list(df['county'])
    state = list(df['state'])
    fips = list(df['fips'])
    cases = list(df['cases'])
    deaths = list(df['deaths'])
    lat = list(np.zeros(len(county)))
    lon = list(np.zeros(len(county)))

    num_to_print=20
    for i in range(len(county)):
        if type(fips[i])==str:
            response = geocoder.free_form('%s, %s' % (county[i], state[i]))
            result = response.as_json_string()
            res = eval(result)
            (clat, clon) = (res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude'],
                      res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude'])
            lat[i] = clat
            lon[i] = clon
            if (i <= num_to_print) | (i % int(len(county)/20) == 0):
                print("%s, %s: cases=%d, deaths=%d, lat=%f, lon=%f" % \
                        (county[i], state[i], cases[i], deaths[i], lat[i], lon[i]))
                if (i>=num_to_print):
                    print("...")            
    return (lat,lon)


def lat_lon_of_address(addr):
    response = geocoderApi.free_form(addr)
    type(response)
    result = response.as_json_string()
    res = eval(result)
    (lat, lon) = (res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude'],
                  res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude'])
    return (lat, lon)


def county_state_of_address(addr):
    response = geocoderApi.free_form(addr)
    type(response)
    result = response.as_json_string()
    res = eval(result)
    state = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][1]['value']
    county = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][2]['value']
    return (county, state)


In [16]:
recompute_geocodes = False
if (recompute_geocodes):
    lat, lon = compute_lat_lon(df_recent, geocoderApi)
    df_geo = df_recent
    df_geo['lat'] = lat
    df_geo['lon'] = lon
    lat_lon = df_recent
    lat_lon = lat_lon.drop('cases',1)
    lat_lon = lat_lon.drop('deaths',1)
    lat_lon = lat_lon.drop('date',1)
    lat_lon.to_csv('geo-counties.csv', header=True, index=False) 
    
df_geo = pd.read_csv("https://raw.githubusercontent.com/jdlafferty/covid-19/master/data/geo-counties.csv", dtype={"fips": str})
df_geo.head(20)

Unnamed: 0,county,state,fips,lat,lon
0,New York City,New York,,40.71455,-74.00714
1,Westchester,New York,36119.0,41.11909,-73.7887
2,Nassau,New York,36059.0,42.51642,-73.61127
3,Suffolk,New York,36103.0,40.96009,-72.83434
4,Cook,Illinois,17031.0,41.81392,-87.61546
5,King,Washington,53033.0,47.43248,-121.9594
6,Unknown,New Jersey,,0.0,0.0
7,Wayne,Michigan,26163.0,42.28515,-83.38361
8,Los Angeles,California,6037.0,34.05361,-118.2455
9,Bergen,New Jersey,34003.0,40.94757,-74.02765


In [17]:
df_recent.head(10)

Unnamed: 0,date,county,state,fips,cases,deaths,text
0,2020-08-16,New York City,New York,,234918,23628,"New York City, New York<br>234918 cases, 23628..."
1,2020-08-16,Los Angeles,California,6037.0,221950,5254,"Los Angeles, California<br>221950 cases, 5254 ..."
2,2020-08-16,Miami-Dade,Florida,12086.0,145306,2057,"Miami-Dade, Florida<br>145306 cases, 2057 deaths"
3,2020-08-16,Maricopa,Arizona,4013.0,129385,2596,"Maricopa, Arizona<br>129385 cases, 2596 deaths"
4,2020-08-16,Cook,Illinois,17031.0,115960,4962,"Cook, Illinois<br>115960 cases, 4962 deaths"
5,2020-08-16,Harris,Texas,48201.0,92253,1829,"Harris, Texas<br>92253 cases, 1829 deaths"
6,2020-08-16,Broward,Florida,12011.0,66447,980,"Broward, Florida<br>66447 cases, 980 deaths"
7,2020-08-16,Dallas,Texas,48113.0,63428,825,"Dallas, Texas<br>63428 cases, 825 deaths"
8,2020-08-16,Clark,Nevada,32003.0,52867,910,"Clark, Nevada<br>52867 cases, 910 deaths"
9,2020-08-16,Riverside,California,6065.0,46720,881,"Riverside, California<br>46720 cases, 881 deaths"


In [18]:
df_recent = pd.merge(df_recent, df_geo)
df_recent.shape

(1660, 9)

In [29]:
def get_location_of_address(addr, df):
    try:
        response = geocoderApi.free_form(addr)
        result = response.as_json_string()
        res = eval(result)
        (lat, lon) = (res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude'],
            res['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude'])
        state = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][1]['value']
        county = res['Response']['View'][0]['Result'][0]['Location']['Address']['AdditionalData'][2]['value']
        if df[(df['county']==county) & (df['state']==state)].shape[0] == 0:
            raise Exception('InvalidStateCounty')
        return ((lat, lon), (county, state))
    except:
        raise Exception('InvalidAddress')
    

def render_map(show=True, min_cases=10, scale=30.0):
    df = df_recent

    df['text'] = df['county'] + ', ' + df['state'] + '<br>' + \
        (df['cases']).astype(str) + ' cases, ' + (df['deaths']).astype(str) + ' deaths'
    df_top = df[df['cases'] >= min_cases]
    df_top = df_top[df_top['county']!='Unknown']

    fig = go.Figure()

    fig.add_trace(go.Scattergeo(
        locationmode = 'USA-states',
        lon = df_top['lon'],
        lat = df_top['lat'],
        text = df_top['text'],
        name = '',
        marker = dict(
            size = df_top['cases']/scale,
            color = 'rgba(255, 0, 0, 0.2)',
            line_color='black',
            line_width=0.5,
            sizemode = 'area'
        ),
    ))
    
    fig.update_layout(
            width = 1000,
            height = 700,
            margin={"r":0,"t":0,"l":0,"b":0},
            showlegend = False,
            geo = dict(
                scope = 'usa',
                landcolor = 'rgb(230, 230, 230)',
            )
    )
    
    if show:
        fig.show(config={'scrollZoom': False})
        
    return(fig)
    
    
def render_map_with_address(addr=None, show=True, scale=30.0):
    fig = render_map(show=False)
    
    try:
        ((this_lat, this_lon), (this_county, this_state)) = get_location_of_address(addr, df_recent)
        county_record = df_recent[(df_recent['county']==this_county) & (df_recent['state']==this_state)]
        this_text = '%s<br>County: %s' % (addr, np.array(county_record['text'])[0])
        td = pd.DataFrame()
        td['lat']=np.array([this_lat])
        td['lon']=np.array([this_lon])
        td['text']=np.array([this_text])
        fig.add_trace(go.Scattergeo(
            locationmode = 'USA-states',
            lon = td['lon'],
            lat = td['lat'],
            text = td['text'],
            name = '',
            marker = dict(
                size = 100/scale,
                color = 'rgba(0,255,0,0.2)',
                line_color='black',
                line_width=0.5,
                sizemode = 'area'
            ),
        ))
    except:
        print("Invalid address")

    fig.update_layout(
            width = 1000,
            height = 700,
            margin={"r":0,"t":0,"l":0,"b":0},
            showlegend = False,
            geo = dict(
                scope = 'usa',
                landcolor = 'rgb(230, 230, 230)',
            )
    )
    
    fig.update_layout(
        title={
            'text': "Data from The New York Times<br>https://github.com/nytimes/covid-19-data<br>%s" % most_recent_date_long,
            'y':0.05,
            'x':0.85,
            'xanchor': 'left',
            'yanchor': 'bottom'},
       font=dict(
            family="Times New Roman",
            size=6,
            color="#7f7f7f")
    )
    
    if show:
        fig.show(config={'scrollZoom': False})
    



In [30]:
render_map()