In [1]:
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
sns.set(style="whitegrid", palette="pastel", color_codes=True)
sns.mpl.rc("figure", figsize=(10,6))

In [3]:
shp_path = "./us_data/tl_2017_us_state.shp"
sf = shp.Reader(shp_path)

In [4]:
print(len(sf.shapes()))

56


In [5]:
def read_shapefile(sf):
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]
    
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    
    return df

In [6]:
df = read_shapefile(sf)
print(df.shape)

(56, 15)


In [7]:
df

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,STUSPS,NAME,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,coords
0,3,5,54,1779805,54,WV,West Virginia,0,G4000,A,62265662566,489840834,38.6472854,-80.6183274,"[(-81.747254, 39.095379), (-81.746354, 39.0965..."
1,3,5,12,294478,12,FL,Florida,0,G4000,A,138911437206,31398800291,28.4574302,-82.4091478,"[(-82.987477, 24.625379), (-82.987477, 24.6254..."
2,2,3,17,1779784,17,IL,Illinois,0,G4000,A,143784114293,6211277447,40.1028754,-89.1526108,"[(-91.185295, 40.637803), (-91.1751, 40.643027..."
3,2,4,27,662849,27,MN,Minnesota,0,G4000,A,206229176104,18944967530,46.3158148,-94.1996628,"[(-96.784381, 46.630503999999995), (-96.784337..."
4,3,5,24,1714934,24,MD,Maryland,0,G4000,A,25150696145,6980371026,38.9466584,-76.6744939,"[(-77.45880799999999, 39.22027), (-77.45866099..."
5,1,1,44,1219835,44,RI,Rhode Island,0,G4000,A,2677997539,1323452846,41.5974187,-71.5272723,"[(-71.67880699999999, 41.158912), (-71.676264,..."
6,4,8,16,1779783,16,ID,Idaho,0,G4000,A,214048160737,2393355752,44.3484222,-114.5588538,"[(-116.899711, 44.840609), (-116.89967, 44.840..."
7,1,1,33,1779794,33,NH,New Hampshire,0,G4000,A,23187445452,1028643155,43.6726907,-71.5843145,"[(-72.329899, 43.600214), (-72.329838, 43.6002..."
8,3,5,37,1027616,37,NC,North Carolina,0,G4000,A,125919712692,13470113896,35.53971,-79.1308636,"[(-82.416738, 36.072827), (-82.416601, 36.0730..."
9,1,1,50,1779802,50,VT,Vermont,0,G4000,A,23873457570,1031134839,44.0604795,-72.6733274,"[(-73.313283, 44.26413), (-73.312741, 44.26541..."


In [8]:
def fip_to_state(fip):
    fip = str(fip)
    if len(fip) < 2:
        fip = '0' + fip
     
    print(fip, df[df.GEOID == fip].index)
    return df[df.GEOID == fip].index.values[0]

In [9]:
def state_to_id(state):
    return df[df.NAME == state].index.values[0]

In [10]:
def plot_shape(id, s=None):
    plt.figure()
    ax = plt.axes()
    ax.set_aspect('equal')
    shape_ex = sf.shape(id)
    x_lon = np.zeros((len(shape_ex.points), 1))
    y_lat = np.zeros((len(shape_ex.points), 1))
    
    for ip in range(len(shape_ex.points)):
        x_lon[ip] = shape_ex.points[ip][0]
        y_lat[ip] = shape_ex.points[ip][1]
        
    plt.plot(x_lon, y_lat)
    plt.xlim(shape_ex.bbox[0], shape_ex.bbox[2])
    return x0, y0

In [11]:
def plot(title, ids, sf, x_lim=None, y_lim=None, figsize=(11,9), color='r'):
       
    plt.figure(figsize = figsize)
    fig, ax = plt.subplots(figsize=figsize)
    fig.suptitle(title, fontsize=16)
    

    s_id = 0
    for shape in sf.shapeRecords():
        if(s_id not in no_print):
            x = [i[0] for i in shape.shape.points[:]]
            y = [i[1] for i in shape.shape.points[:]]
            plt.plot(x, y, 'k')
        s_id = s_id + 1

    for id in ids:
        shape_ex = sf.shape(id)
        x_lon = np.zeros((len(shape_ex.points),1))
        y_lat = np.zeros((len(shape_ex.points),1))
        for ip in range(len(shape_ex.points)):
            x_lon[ip] = shape_ex.points[ip][0]
            y_lat[ip] = shape_ex.points[ip][1]
        ax.fill(x_lon, y_lat, color)
        
        x0 = np.mean(x_lon)
        y0 = np.mean(y_lat)
        plt.text(x0, y0, id, fontsize=10)
    
    if (x_lim != None) & (y_lim != None):     
        plt.xlim(x_lim)
        plt.ylim(y_lim)

In [12]:
def remove_dups(data):
    da = []
    for d in data:
        if(d in da):
            pass
        else:
            da.append(d)
    return da

In [13]:
def lists_to_listi(ls):
    li = []
    for e in ls:
        li.append(int(e))
    return li

In [14]:
from colour import Color
def colors_calc(data, c1, c2):
    vals = lists_to_listi(data.values())
    rd = remove_dups(sorted(vals))
    da = sorted(vals)
    cs = {}
    c = Color(c2)
    colors = list(c.range_to(Color(c1), len(rd)))

    prev = 0
    dups = 0
    for i in range(len(da)):
        if i == 0:
            cs[da[i]] = colors[i].hex
        else:
            if da[prev] != da[i]:
                cs[da[i]] = colors[(i - dups)].hex
            else:
                dups += 1
        prev = i

    cs[0] = "#ffffff"
#     Un-comment below to see a graphical representation of the gradient colors used.
#     sns.palplot(list(cs.values()), 0.1 * len(rd))
    return cs

In [15]:
def sid_to_s(sid):
    n = df['NAME']
    return n[sid]

In [16]:
def plot_states_data(sf, title, states, data, print_id=False, save_fig=True):
    colors = colors_calc(data, "green", "red")
    df = read_shapefile(sf)
    state_id = []
    
    no_print = ["Hawaii", "Guam", "Puerto Rico", "Alaska", "Northern Mariana Islands", "Virgin Islands", "American Samoa"]
    for i in range(len(states)):
        if(not (df[df.NAME == states[i]].empty) and states[i] not in no_print):
            state_id.append(df[df.NAME == states[i]].index.values[0])
    plot_map_fill_multiple_ids_tone(sf, title, state_id, data, print_id, colors, x_lim = None, y_lim = None, savefigb=save_fig)

In [17]:
def plot_map_fill_multiple_ids_tone(sf, title, state, data, print_id, colors, x_lim =None, y_lim =None, figsize = (11,9), savefigb=True):
    plt.figure(figsize=figsize)
    fig, ax = plt.subplots(figsize = figsize, dpi=75)
    fig.suptitle(str(title)[:10] + ' GDP per Case', fontsize=16)
    ax.set_aspect('equal')
    
    sip = []
    
    for i in range(len(state)):
        id = state[i]
        state_name = sid_to_s(id)
        cases = int(data[state_name])
        
#         print(state_name, id)
        
        shape_ex = sf.shape(id)
        x_lon = np.zeros((len(shape_ex.points),1))
        y_lat = np.zeros((len(shape_ex.points),1))
        
        for ip in range(len(shape_ex.points)):
            x_lon[ip] = shape_ex.points[ip][0]
            y_lat[ip] = shape_ex.points[ip][1]
        
        
        ax.fill(x_lon,y_lat, colors[cases])
        
        if print_id != False:
            x0 = np.mean(x_lon)
            y0 = np.mean(y_lat)
            ''' str(cases)'''
            plt.text(x0, y0,  str(cases), fontsize='smaller', va='center', ha='center')
    
    no_print = [40, 31, 41, 35, 36, 49, 34]
    s_id = 0
    for shape in sf.shapeRecords():
        if(s_id not in no_print):
            
            sn = sid_to_s(s_id)
            ss = None
            
            x = [i[0] for i in shape.shape.points[:]]
            y = [i[1] for i in shape.shape.points[:]]
            if ss is not None:
                if ss == title or title > ss:
#                     print(sn, "SIP")
                    ax.plot(x, y, c='#0000ff', linewidth=2)
                else:
                    ax.plot(x, y, 'k')
            else:
                if title == get_date('2020-04-05'):
                    ax.plot(x, y, c='#ff0000')
                else:
                    ax.plot(x, y, c='k')
        
        s_id = s_id + 1
    

        if (x_lim != None) & (y_lim != None):     
            plt.xlim(x_lim)
            plt.ylim(y_lim)
        if savefigb:
            fig.savefig('./photos/' + str(title)[:10])
            plt.close(fig)

        

In [18]:
import csv
import datetime

In [19]:
def get_date(s):
    return datetime.datetime.strptime(s, '%Y-%m-%d')

In [20]:
def get_date_from_covid(ind, file='us-states-covid.csv'):
    i = 0
    pd = get_date('2019-12-30')
    with open(file) as cf:
        cr = csv.reader(cf, delimiter=',')
        for r in cr:
            if r[0] == 'date':
                pass
            else:
                date = get_date(r[0])

                #  Next Day
                if date > pd:
                    if i == ind:
                        return date
                    i += 1
                    pd = date

In [21]:
def parse_covid(file):
    dates = []
    with open(file) as cf:
        cols = []
        cr = csv.reader(cf, delimiter=',')
        
        lc = 0
        prev_date = get_date('2019-12-30')
        i = -1

        for r in cr:
            if lc == 0:
                cols = r
                print(f'{cols}')
                lc += 1
            else:
                date = get_date(r[0])
                # print(prev_date, date)
                
                #  Next Day
                if date > prev_date:
                    # print('New day')
                    i += 1
                    prev_date = date
                    dates.append([])
                    dates[i].append([r[1], r[3]])

                # Same day
                elif date == prev_date:
                    dates[i].append([r[1], r[3]])
                else:
                    print("ERROR")
                    exit(0)
                    
                lc += 1
        print(f'Processed {lc} lines')
        return dates

In [22]:
def parse_gdp(file):
    s_gdp = {}
    with open(file) as cf:
        cols = []
        cr = csv.reader(cf, delimiter=',')
        lc = 0
        for r in cr:
            if lc == 0:
                cols = r
                lc += 1
            else:
                s = r[0]
                gdp = r[1]
                s_gdp[s] = gdp
                lc += 1
    return s_gdp

In [23]:
def get_gdp(state):
    g = parse_gdp('gdp-q4-2019.csv')
    if(state in g.keys()):
        return g[state]
    else:
        return 1

In [24]:
cases = parse_covid('us-states-covid.csv')

['date', 'state', 'fips', 'cases', 'deaths']
Processed 2722 lines


In [25]:
def create_dict(cases):
    ca = {}
    for c in cases:
        state = c[0]
        cases = c[1]
        gdp = get_gdp(state)
        if(cases != "0"):
            diff = float(gdp)/float(cases)
        else:
            diff = 1
        ca[c[0]] = diff
    return ca

In [26]:
def parse_date_entry(ca):
    states = []
    cases = []
    
    for c in ca:
        states.append(c[0])
        cases.append(int(c[1]))
    return states, cases

In [27]:
def show_covid(i, _cases):
    states, _ = parse_date_entry(_cases[i])
    cases = create_dict(_cases[i])
    date = get_date_from_covid(i)
    print(cases)
    plot_states_data(sf, date, states, cases, True, save_fig=True)

In [28]:
import time
start_time = time.time()
show_covid(len(cases)-1, cases)
print("--- %s seconds ---" % (time.time() - start_time))

{'Alabama': 46.09176841276093, 'Alaska': 174.79310344827587, 'American Samoa': 1, 'Arizona': 73.56279620853081, 'Arkansas': 68.62252663622526, 'California': 94.0065855531274, 'Colorado': 39.220957846823666, 'Connecticut': 14.584153419126924, 'Delaware': 27.83606557377049, 'District of Columbia': 50.64263751281175, 'Florida': 41.086062846580404, 'Georgia': 33.8986827126362, 'Guam': 0.0012330456226880395, 'Hawaii': 170.47750865051904, 'Idaho': 50.22283272283272, 'Illinois': 28.847054716262537, 'Indiana': 32.66583946602773, 'Iowa': 62.10141732283464, 'Kansas': 86.51058591826687, 'Kentucky': 71.33245901639344, 'Louisiana': 10.889817722138401, 'Maine': 78.21828571428571, 'Maryland': 31.738672902660042, 'Massachusetts': 15.241227959538884, 'Michigan': 17.181914993579102, 'Minnesota': 156.23765182186236, 'Mississippi': 26.690824468085108, 'Missouri': 58.00172205958326, 'Montana': 122.28175519630484, 'Nebraska': 78.33616504854369, 'Nevada': 47.103394255874676, 'New Hampshire': 62.0843123704215

<Figure size 792x648 with 0 Axes>