# Download US sequential data:

In [118]:
import urllib.request
import os


# URLs
confirmed_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'
counties_map_url = 'https://upload.wikimedia.org/wikipedia/commons/5/5f/USA_Counties_with_FIPS_and_names.svg'
county_population_url = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv'



# File names
confirmed_file_name = confirmed_url.split('/')[-1]
deaths_file_name = deaths_url.split('/')[-1]
counties_map_file_name = counties_map_url.split('/')[-1]
county_population_file_name = county_population_url.split('/')[-1]



# Folders
base_folder = os.getcwd()
download_folder = base_folder + '/data/'
map_folder = base_folder + '/map/'

if not os.path.exists(download_folder):
    os.makedirs(download_folder)
    
if not os.path.exists(map_folder):
    os.makedirs(map_folder)

    

# Communication "The Art Of Communication Is The Language Of Leadership." Wow, I'm inspired.
print('Fetching {} ...'.format(confirmed_file_name), end='')
urllib.request.urlretrieve(confirmed_url, download_folder + confirmed_file_name)
print('done')

print('Fetching {} ...'.format(deaths_file_name), end='')
urllib.request.urlretrieve(deaths_url, download_folder + deaths_file_name)
print('done')

print('Fetching {} ...'.format(counties_map_file_name), end='')
urllib.request.urlretrieve(counties_map_url, map_folder + counties_map_file_name)
print('done')

print('Fetching {} ...'.format(county_population_file_name), end='')
urllib.request.urlretrieve(county_population_url, download_folder + county_population_file_name)
print('done')


Fetching time_series_covid19_confirmed_US.csv ...done
Fetching time_series_covid19_deaths_US.csv ...done
Fetching USA_Counties_with_FIPS_and_names.svg ...done
Fetching co-est2019-alldata.csv ...done




# Import data from files
 - confirmed cases by county
 - population by county

In [119]:
import csv
import numpy as np


confirmed_file = download_folder + confirmed_file_name
population_file = download_folder + county_population_file_name

print("Opening " + confirmed_file + '\n')                        # open the confirmed cases file
confirmed_csv = open(confirmed_file, newline='')                 

print("Opening " + population_file + '\n')                       # open the population file
population_csv = open(population_file, newline='', encoding = 'ISO-8859-1')     # file contains latin chars          

covid_confirmed_us = csv.reader(confirmed_csv, delimiter=',')    # read data in
county_populations = csv.reader(population_csv, delimiter=',')

headers_confirmed = next(covid_confirmed_us)                     # iterate through the headers file, but save it
headers_population = next(county_populations)

row_length_confirmed = len(headers_confirmed)                    # number of items in a row from each dataset
row_length_population = len(headers_population)


# format for a county row: [Population, day0, day1, day2, ...]
# row number == county FIPS id
# Note: FIPS id is form SSCCC where S is state num, C is county num
#       The covid confirmed dataset includes territories (SS = 00)
#       and cruise ships (SS = 99)
#       The population dataset only includes the 50 states -> 'Murica proper ;) 
confirmed_data = np.zeros((100000,num_days+1), int)              # initialize numpy datasets



# fill in dataset with confirmed cases per day per county
first_day_idx = 11                                               # index of first day in row
confirmed_FIPS_idx = 4                                           # FIPS code is a county code used to edit map
num_days = row_length_confirmed - first_day_idx                  # number of days of data 

for i, row in enumerate(covid_confirmed_us):                     # Move the data to matrix. One county per row

    try:
        idx = int(float(row[confirmed_FIPS_idx]))                # get the index from the confirmed dataset row
        confirmed_data[idx][1:] = row[first_day_idx:]            # store the data for each data
    except:
        print("Can't find FIPS data for {}".format(row[10]))

        
        
# fill in dataset with populations per county
pop_2019_idx = 18

for i, row in enumerate(county_populations):
    
    try:
        idx = int(row[3])*1000 + int(row[4])
        #print(idx)
    except:
        print("Can't find FIPS data for {}, {}".format(row[5], row[6]))
    
    #print("{}: {}".format(idx, int(row[pop_2019_idx])))
    pop = int(row[pop_2019_idx])
    confirmed_data[idx][0] = pop
    

print(confirmed_data[66])    # Puerto Rico
print(confirmed_data[1001])  # Autauga, Alabama
print(confirmed_data[8041])  # El Paso, Colorado #represent
print(confirmed_data[99999]) # Diamond Princess Cruise ship


Opening /Users/ethanblagg/SynologyDrive/Documents/Projects/YT/Covid/data/time_series_covid19_confirmed_US.csv

Opening /Users/ethanblagg/SynologyDrive/Documents/Projects/YT/Covid/data/co-est2019-alldata.csv

Can't find FIPS data for Dukes and Nantucket,Massachusetts,US
Can't find FIPS data for Kansas City,Missouri,US
[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   3   3   5  12  14  15  27  29  32  37  45  51  55  56  58  69  77
  82  84  93 112 113 121 121 128 130]
[55869     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     

In [None]:
# old

import csv
import numpy as np


file = download_folder + confirmed_file_name
print("Opening " + file + '\n')


# open the csv file
with open(file, newline='') as csvfile:
    
    covid_confirmed_us = csv.reader(csvfile, delimiter=',') # read data in
    
    
    headers_confirmed = next(covid_confirmed_us)        # iterate through the headers file, but save it
    row_length = len(headers_confirmed)
    
    
    # get headers before first day of data (11)
    #for i, val in enumerate(headers):
    #    print("{}: {}".format(i,val))
    
    
    first_day_idx = 11                                  # index of first day in row
    FIPS_idx = 4                                        # FIPS code is a county code used to edit map
    num_days = row_length-first_day_idx                 # number of days of data     
    confirmed_data = np.zeros((100000,num_days), int) # rows = counties,  cols = [id, day0, day1...]
    
    
    # Move the data to matrix. One county per row
    for i, row in enumerate(covid_confirmed_us):
        
        #temp_row = np.zeros((1, num_days), int)
        
        try:
            idx = int(float(row[FIPS_idx]))                # place county id in 0th index
            #temp_row[0][:] = row[first_day_idx:]                         # copy days data to temp, after county id
            confirmed_data[idx][:] = row[first_day_idx:]
        except:
            print("Can't find FIPS data for {}".format(row[10]))
        
        
           
    
    print(confirmed_data[66])                                             # confirm everything looks a-ok
    print(confirmed_data[99999])



# Map of absolute cases

In [120]:
from bs4 import BeautifulSoup
import time

#out_file = 'Covid_confirmed_map_' + time.strftime("%y-%m-%d") + ".svg"
out_file = 'We\'re_all_gonna_die_' + time.strftime("%y-%m-%d") + ".svg"

svg = open(map_folder + counties_map_file_name, 'r').read()    # Load the SVG map
soup = BeautifulSoup(svg)                                      # Load into Beautiful Soup
paths = soup.findAll('path')                                   # Find counties


# display rules for each county. Note fill value will be added later, according to data
path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:'


for p in paths:
     
    if p['id'] not in ["State_Lines", "separator"]:
        
        try:
            idx = int(p['id'].split("_")[-1])
            #print(idx)
        except:
            continue
        
        
        color = "#d0d0d0"                                              # default if no data
        
        colors = ["#66ffff","#66ff66","#ffff66","#ffb366","#ff6666"]   # alarming, yet calming
        #colors = ["#b3ffff","#ccffcc","#ffff99","#ffcc99","#ff9999"]  # wussy
        #colors = ["#00FFFF","#00FF00","#FFFF00","#FF7F00","#FF0000"]  # bold
        
        
        if (confirmed_data[idx][num_days-1] < 1):
            color = colors[0]
            
        elif (confirmed_data[idx][num_days-1] < 10):
            color = colors[1]
            
        elif (confirmed_data[idx][num_days-1] < 100):
            color = colors[2]
            
        elif (confirmed_data[idx][num_days-1] < 1000):
            color = colors[3]
            
        else:
            color = colors[4]
                                     
        p['style'] = path_style + color

        
#print(soup.prettify())


f = open(map_folder + out_file, 'w')
f.write(soup.prettify())
f.close()


# Map of % of population with the covid

In [141]:
#out_file = 'Covid_confirmed_map_' + time.strftime("%y-%m-%d") + ".svg"
out_file = 'Your_mom_' + time.strftime("%y-%m-%d") + ".svg"

svg = open(map_folder + counties_map_file_name, 'r').read()    # Load the SVG map
soup = BeautifulSoup(svg)                                      # Load into Beautiful Soup
paths = soup.findAll('path')                                   # Find counties


# display rules for each county. Note fill value will be added later, according to data
path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:'


min_percent = 1
average_percent = 0
max_percent = 0
summ = 0
numm = 0
distribution = [0,0,0,0,0]

for p in paths:
     
    if p['id'] not in ["State_Lines", "separator"]:
        
        try:
            idx = int(p['id'].split("_")[-1])
            #print(idx)
        except:
            continue
        
        
        color = "#d0d0d0"                                              # default if no data
        
        colors = ["#66ffff","#66ff66","#ffff66","#ffb366","#ff6666"]   # alarming, yet calming
        #colors = ["#b3ffff","#ccffcc","#ffff99","#ffcc99","#ff9999"]  # wussy
        #colors = ["#00FFFF","#00FF00","#FFFF00","#FF7F00","#FF0000"]  # bold
        percents = [1e-5, 1e-4, 1e-3, 1e-2]
        
        
        percent_with_the_rona = float(confirmed_data[idx][num_days-1] / confirmed_data[idx][0])
        
        if ((percent_with_the_rona > 10e-6) & (percent_with_the_rona < min_percent)):
            min_percent = percent_with_the_rona         # update min percent
        
        if (percent_with_the_rona > max_percent):       # update max percent
            max_percent = percent_with_the_rona
            
        summ = summ + confirmed_data[idx][num_days-1]   # update average percent
        numm = numm + confirmed_data[idx][0]
        
            
        if (percent_with_the_rona < percents[0]):
            color = colors[0]
            distribution[0] += 1
            
        elif (percent_with_the_rona < percents[1]):
            color = colors[1]
            distribution[1] += 1
            
        elif (percent_with_the_rona < percents[2]):
            color = colors[2]
            distribution[2] += 1
            
        elif (percent_with_the_rona < percents[3]):
            color = colors[3]
            distribution[3] += 1
            
        else:
            color = colors[4]
            distribution[4] += 1
                                     
        p['style'] = path_style + color


average_percent = summ / numm
print("Min percent: {}".format(100*min_percent))
print("Avg percent: {}".format(100*average_percent))
print("Max percent: {}".format(100*max_percent))

for i,_ in enumerate(percents):
    print("Counties under {}%: {}".format(100*percents[i], distribution[i]))

          
print("Counties over {}%: {}".format(100*percents[-1], distribution[-1]) )



#print(soup.prettify())


f = open(map_folder + out_file, 'w')
f.write(soup.prettify())
f.close()





Min percent: 0.0015282341254680219
Avg percent: 0.1385546741841456
Max percent: 5.3433830292268825
Counties under 0.001%: 582
Counties under 0.01%: 329
Counties under 0.1%: 1900
Counties under 1.0%: 312
Counties over 1.0%: 18
