In [22]:
import pandas as pd
import subprocess
import datetime
from io import StringIO

result = subprocess.run(
    ['git', 
     'log', 
     '--date=iso',
     '--date-order', 
     '--reverse', 
     '--pretty="%H,%cd"', 
     '--',
     'tests-by-zcta.csv'], capture_output=True)

lines = [l.strip('"') for l in result.stdout.decode("utf-8").split("\n")][:-1]
commits = [(c[0], datetime.datetime.strptime(c[1], '%Y-%m-%d %H:%M:%S -0400')) for c in [c.split(',') for c in lines]]

In [23]:
# clean up by using only 'latest' date/time when there are multiple for a single day
date_set = {}
for sha, d in commits:
    if sha == "80df5dcbc293e03fe7af61683f4f6d3c1b940d5d":
        # the dataset for 5/18 was checked in on 5/19, so adjust the date for this log entry
        date_set[datetime.date(2020, 5, 18)] = sha
    else:
        date_set[d.date()] = sha

li = []
for d, sha in date_set.items():
    print(d, sha)
    command = ['git',
         'show',
         "{}:tests-by-zcta.csv".format(sha)]
    
    r = subprocess.run(command, capture_output=True)

    data = pd.read_csv(StringIO(r.stdout.decode("utf-8")))
    data = data.dropna()
    data['Date'] = pd.to_datetime(d)
    data['MODZCTA'] = data['MODZCTA'].astype(int)
    data.rename({'MODZCTA': 'Zipcode'}, axis='columns', inplace=True)
    # I removed the April 26th because it has bad data
    data = data.query('Date != "2020-04-26"')
    data = data.set_index('Date')
    li.append(data)

df = pd.concat(li)

2020-04-01 097cbd70aa00eb635b17b177bc4546b2fce21895
2020-04-03 0074809280d3f9ae0bd09ca62629fb21243ffc72
2020-04-04 0ae531d56696b7dfa01d1d1ad6286d7ae03350c7
2020-04-05 98a7fd1c5eccdae11d604dd98b2c4a2eafef059b
2020-04-07 55495966af131723fdbd1a4357c1f84adea03982
2020-04-08 e19db289166f73282d39dfcef0d47a324d654c07
2020-04-09 e1f1d9a63fac772e26a45220d3c8199a75938656
2020-04-10 3fdd59a195bff5c4473a2086093ed656702d6569
2020-04-11 8542fbf18049d804eb8de7594123c13e533d1a42
2020-04-12 d34e6aab1e0dd0e0125e74519489e7893d33c9dd
2020-04-13 1dc35df3a8d1c19587cf2cfe72567594ae079650
2020-04-14 b2104e26d781f6ebf4eff31afcd93cce887ff79b
2020-04-15 6c9e9954c58ddf7556fe52937c487af29bf6ceb4
2020-04-16 b08d47482771bd35b3f25106dfc076e0a2649d29
2020-04-17 21916256325a11aae77bbe69029085f43592f2d1
2020-04-18 d3a8994716870cfdbd6cc2fb356c31588446fc25
2020-04-19 498c34f8534bc865b15dd09e9b560bd457ef5b9b
2020-04-20 0a74f0850087758da3579886ae8b7365e182ed9e
2020-04-21 bbbb0f31f7a6f62b64012e8c0c5cb2d3495e6670
2020-04-22 9

In [28]:
# zipcodes = [10001]
zipcodes = [10001, 10018, 10016, 10010, 10011]
# zipcodes = [11203, 11239, 10001]

if len(zipcodes) == 0:
    filtered = df
if len(zipcodes) == 1:
    filtered = df.query(f'Zipcode == {zipcodes[0]} & Date != "2020-04-26"')
else:
    zipcode_string = ",".join(map(str, zipcodes))
    filtered = df.query(f'Zipcode in ({zipcode_string}) & Date != "2020-04-26"')

filtered

Unnamed: 0_level_0,Zipcode,Positive,Total,zcta_cum.perc_pos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [25]:
import plotly.graph_objects as go

for zipcode, data in filtered.groupby('Zipcode'):
    fig = go.Figure(
        data = [
            go.Bar(name='Positive', x=data.index, y=data.Positive, marker_color='rgb(28,164,252)'),
            go.Bar(name='Total', x=data.index, y=data.Total, marker_color='rgb(101,214,67)')
        ],
        layout=go.Layout(
            title=go.layout.Title(text=f'Testing: {zipcode}')
        )
    )
    fig.update_layout(barmode='stack')
    fig.show()

In [26]:
for zipcode, data in filtered.groupby('Zipcode'):
    fig = go.Figure(
        data = [
            go.Bar(name='Positive', x=data.index, y=data['zcta_cum.perc_pos'], marker_color='rgb(28,164,252)'),
        ],
        layout=go.Layout(
            title=go.layout.Title(text=f'% Positive: {zipcode}')
        )
    )
    fig.update_layout(barmode='stack')
    fig.show()

In [27]:
import plotly.express as px
import numpy as np
import time
import matplotlib.dates as mdates

for zipcode, data in filtered.groupby('Zipcode'):
    positive_rate = data.Positive.rolling(2).apply(lambda x: x.iloc[1] - x.iloc[0])[1:]
    testing_rate = data.Total.rolling(2).apply(lambda x:x.iloc[1] - x.iloc[0])[1:]
    
    z_positive_rate = np.polyfit(mdates.date2num(positive_rate.index.date), positive_rate, 3)
    f_positive_rate = np.poly1d(z_positive_rate)
    
    z_testing_rate = np.polyfit(mdates.date2num(testing_rate.index.date), testing_rate, 3)
    f_testing_rate = np.poly1d(z_testing_rate)

    fig = go.Figure(
        data=[
            go.Bar(name="Positive Rate", x=positive_rate.index.date, y=positive_rate, marker_color='rgb(28,164,252)'),
            go.Bar(name="Testing Rate", x=testing_rate.index.date, y=testing_rate, marker_color='rgb(101,214,67)')
        ],
        layout=go.Layout(
            title=go.layout.Title(text=f'Trends: {zipcode}')
        )
    )
    
    fig.add_trace(
        go.Line(name="Positive trend", x=positive_rate.index.date, y=f_positive_rate(mdates.date2num(positive_rate.index)), marker_color='rgb(61,182,252)')
    )
    fig.add_trace(
        go.Line(name="Testing trend", x=testing_rate.index.date, y=f_testing_rate(mdates.date2num(testing_rate.index.date)), marker_color='rgb(119,253,79)')
    )
    
    fig.show()





Polyfit may be poorly conditioned


Polyfit may be poorly conditioned


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.





Polyfit may be poorly conditioned


Polyfit may be poorly conditioned




Polyfit may be poorly conditioned


Polyfit may be poorly conditioned




Polyfit may be poorly conditioned


Polyfit may be poorly conditioned




Polyfit may be poorly conditioned


Polyfit may be poorly conditioned

