In [1]:
#https://data.acgov.org/datasets/AC-HCSA::alameda-county-covid-19-cases-and-deaths-over-time-1/data
import pandas as pd
import numpy as np

In [2]:
dfcov_source = pd.read_csv('1_data_input/Alameda_County_COVID-19_Cases_and_Deaths_Over_Time.csv')

In [3]:
####convert columns to appropriate data types
dfcov_source['Date'] = pd.to_datetime(dfcov_source['Date'])



In [4]:
####select columns of interest
#Berkeley and Alameda have seprate local health juristidction (LHJ), which is why the data is split abou bteween Bk and AC
#The total of these LHJ counts give the total alameda county counts
#we are interested in using a U chart on the count of total cases and deaths in Alameda county
dfcov_select = dfcov_source[['Date','BkLHJ_Cases','BkLHJ_Deaths','ACLHJ_Cases','ACLHJ_Deaths']].copy()
dfcov_select['total_cases'] = dfcov_select['BkLHJ_Cases'] + dfcov_select['ACLHJ_Cases']
dfcov_select['total_deaths'] = dfcov_select['BkLHJ_Deaths'] + dfcov_select['ACLHJ_Deaths']

#filter for only data after shelter in place order in bay area, which is 2020-03-19
#assume a stable period starts during Phase I after shelter in place
dfcov_filter = dfcov_select.loc[dfcov_select['Date'] > '2020-03-19']
dfcov = dfcov_filter[['Date', 'total_cases']].copy()





In [5]:
###Selecting the control to use
#My intitial guess was to use a C chart, however, the assumign this would required that the number of samples are fixed, which is a heavy assumption
#After some research, I decided to use an I-MR chart to account for subgroupings never being defined

dfcov['moving_range'] = dfcov['total_cases'] - dfcov['total_cases'].shift(periods=1)
dfcov['moving_range'] = dfcov['moving_range'].abs()
dfcov.tail(5)



Unnamed: 0,Date,total_cases,moving_range
66,2020-05-04 07:00:00+00:00,35,10.0
67,2020-05-05 07:00:00+00:00,45,10.0
68,2020-05-06 07:00:00+00:00,55,10.0
69,2020-05-07 07:00:00+00:00,43,12.0
70,2020-05-08 07:00:00+00:00,59,16.0


In [6]:
###calculate control limits and lock the limits on date for when this was created
dfcov_locklimits = dfcov.loc[dfcov['Date'] < '2020-05-07'].copy()

MR_BAR = dfcov_locklimits['moving_range'].mean()
X_BAR = dfcov_locklimits['total_cases'].mean()
N = 2
E2 = 2.66
D4 = 3.27
D3 = 0 
#calculate limits
I_UCL = X_BAR + E2*MR_BAR
I_LCL = X_BAR - E2*MR_BAR

MR_UCL = D4*MR_BAR
MR_LCL = D3*MR_BAR


In [7]:
#create dataframe to visualize control charts
dfcov_cc = dfcov.copy()
dfcov_cc['I_UCL'] = I_UCL
dfcov_cc['I_LCL'] = I_LCL
dfcov_cc['MR_UCL'] = MR_UCL
dfcov_cc['MR_LCL'] = MR_LCL
dfcov_cc['X_BAR'] = X_BAR
dfcov_cc['MR_BAR'] = MR_BAR


In [8]:
#flag points out of control 
dfcov_cc['I_control_flag'] = np.where((dfcov_cc['total_cases'] > dfcov_cc['I_UCL']) | (dfcov_cc['total_cases'] < dfcov_cc['I_LCL']) ,'out of control', 'in control')
dfcov_cc['MR_control_flag'] = np.where((dfcov_cc['moving_range'] > dfcov_cc['MR_UCL']) | (dfcov_cc['moving_range'] < dfcov_cc['MR_LCL']) ,'out of control', 'in control')


In [9]:
dfcov_cc.to_csv('2_data_output/ac_covid_control_chart.csv')

In [11]:
dfcov_cc

Unnamed: 0,Date,total_cases,moving_range,I_UCL,I_LCL,MR_UCL,MR_LCL,X_BAR,MR_BAR,I_control_flag,MR_control_flag
20,2020-03-19 07:00:00+00:00,8,,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
21,2020-03-20 07:00:00+00:00,21,13.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
22,2020-03-21 07:00:00+00:00,17,4.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
23,2020-03-22 07:00:00+00:00,28,11.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
24,2020-03-23 07:00:00+00:00,24,4.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
25,2020-03-24 07:00:00+00:00,20,4.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
26,2020-03-25 07:00:00+00:00,38,18.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
27,2020-03-26 07:00:00+00:00,39,1.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
28,2020-03-27 07:00:00+00:00,37,2.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
29,2020-03-28 07:00:00+00:00,21,16.0,74.271539,2.340706,44.213125,0.0,38.306122,13.520833,in control,in control
