Required packages: pandas, numpy, bokeh

Additional notes: bokeh.sampledata requires downloading

In [1]:
import git
repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir
datadir = f"{homedir}/data/"

# MTA turnstile data from http://web.mta.info/developers/turnstile.html

In [3]:
import pandas as pd
df = pd.read_csv(datadir + 'us/mta/turnstile_200404.txt')

In [5]:
df.head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS
0,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/28/2020,00:00:00,REGULAR,7412829,2516511
1,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/28/2020,04:00:00,REGULAR,7412831,2516512
2,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/28/2020,08:00:00,REGULAR,7412837,2516526
3,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/28/2020,12:00:00,REGULAR,7412849,2516538
4,A002,R051,02-00-00,59 ST,NQR456W,BMT,03/28/2020,16:00:00,REGULAR,7412880,2516555


In [6]:
df['DATE']

0         03/28/2020
1         03/28/2020
2         03/28/2020
3         03/28/2020
4         03/28/2020
             ...    
205661    04/03/2020
205662    04/03/2020
205663    04/03/2020
205664    04/03/2020
205665    04/03/2020
Name: DATE, Length: 205666, dtype: object

# Hospitals

In [3]:
import pandas as pd
df = pd.read_csv(datadir + 'us/hospitals/beds_by_county.csv')

In [4]:
df.head()

Unnamed: 0,state,county,staffed_beds,licensed_beds,icu_beds,Name,FIPS
0,AK,Anchorage,1077,1014,60,"anchorage borough, ak",2020
1,AK,Bethel,34,50,0,"bethel census area, ak",2050
2,AK,Dillingham,16,16,0,"dillingham census area, ak",2070
3,AK,Fairbanks North Star,145,152,13,"fairbanks north star borough, ak",2090
4,AK,Juneau,45,73,9,"juneau city and borough, ak",2110


In [5]:
import numpy as np

def scaler(x):
    a = 0
    b = np.log(22917)
    return int(5*(np.log(x+1)-a)/(b-a))

In [8]:
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.us_states import data as states

EXCLUDED = ("ak", "hi")
state_xs = [states[code]["lons"] for code in states]
state_ys = [states[code]["lats"] for code in states]

county_xs=[counties[code]["lons"] for code in counties]
county_ys=[counties[code]["lats"] for code in counties]

colors = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"]

county_colors = []
for county_id in counties:
    try:
        staff_beds = df[(df['state']==counties[county_id]['state'].upper()) & (df['county']==counties[county_id]['name'])]['icu_beds']
        idx = scaler(staff_beds)
        county_colors.append(colors[idx])
    except:
        county_colors.append("black")

p = figure(title="US icu beds by county", toolbar_location="left",
           plot_width=1100, plot_height=700)

p.patches(county_xs, county_ys,
          fill_color=county_colors, fill_alpha=0.7,
          line_color="white", line_width=0.5)

p.patches(state_xs, state_ys, fill_alpha=0.0,
          line_color="#884444", line_width=2, line_alpha=0.3)

output_file("choropleth.html", title="choropleth.py example")

show(p)

Here is a list of counties not present in dataset:

In [9]:
missing = []
for county_id in counties:
    if len(df[(df['state']==counties[county_id]['state'].upper()) & (df['county']==counties[county_id]['name'])])!=1:
        missing.append(counties[county_id]['name'])

In [27]:
df['c'] = df['staffed_beds']/df['total_pop']

In [32]:
100*max(df['c'])+1

6.7630472421469205

In [13]:
df_population = pd.read_csv(datadir + 'us/demographics/county_populations.csv')

In [22]:
df=df.merge(df_population, left_on = 'FIPS', right_on = 'FIPS')

In [36]:
import numpy as np

def scaler(x):
    a = 0
    b = np.log(6.7630472421469205)
    return int(20*(np.log(100*x+1)-a)/(b-a))

In [37]:
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.us_states import data as states

EXCLUDED = ("ak", "hi")
state_xs = [states[code]["lons"] for code in states]
state_ys = [states[code]["lats"] for code in states]

county_xs=[counties[code]["lons"] for code in counties]
county_ys=[counties[code]["lats"] for code in counties]

colors = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"]

county_colors = []
for county_id in counties:
    try:
        staff_beds = df[(df['state']==counties[county_id]['state'].upper()) & (df['county']==counties[county_id]['name'])]['c']
        staff_beds = staff_beds
        idx = scaler(staff_beds)
        county_colors.append(colors[idx])
    except:
        county_colors.append("black")

p = figure(title="US staff beds per mille by county", toolbar_location="left",
           plot_width=1100, plot_height=700)

p.patches(county_xs, county_ys,
          fill_color=county_colors, fill_alpha=0.7,
          line_color="white", line_width=0.5)

p.patches(state_xs, state_ys, fill_alpha=0.0,
          line_color="#884444", line_width=2, line_alpha=0.3)

output_file("choropleth.html", title="choropleth.py example")

show(p)

Here is a list of counties not present in dataset:

In [103]:
missing = []
for county_id in counties:
    if len(df[(df['state']==counties[county_id]['state'].upper()) & (df['county']==counties[county_id]['name'])])!=1:
        missing.append(counties[county_id]['name'])

# Cases

In [2]:
import pandas as pd
df = pd.read_csv(datadir + 'us/covid/confirmed_cases.csv')

In [7]:
df.sample(10).iloc[:,:50]

Unnamed: 0,countyFIPS,County Name,State,stateFIPS,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20
1216,24003,Anne Arundel County,MD,24,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
319,9005,Litchfield County,CT,9,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
258,8013,Boulder County,CO,8,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
492,13189,McDuffie County,GA,13,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1717,31069,Garden County,NE,31,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2625,48113,Dallas County,TX,48,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2642,48147,Fannin County,TX,48,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1253,25027,Worcester County,MA,25,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
530,13267,Tattnall County,GA,13,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1144,22023,Cameron Parish,LA,22,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
