In [1]:
#https://data.acgov.org/datasets/AC-HCSA::alameda-county-covid-19-cases-and-deaths-over-time-1/data
import pandas as pd
import numpy as np

In [2]:
dfcov_source = pd.read_csv('1_data_input/Alameda_County_COVID-19_Cases_and_Deaths_Over_Time.csv')

In [3]:
####convert columns to appropriate data types
dfcov_source['Date'] = pd.to_datetime(dfcov_source['Date'])



In [4]:
####select columns of interest
#Berkeley and Alameda have seprate local health juristidction (LHJ), which is why the data is split abou bteween Bk and AC
#The total of these LHJ counts give the total alameda county counts
#we are interested in using a U chart on the count of total cases and deaths in Alameda county
dfcov_select = dfcov_source[['Date','BkLHJ_Cases','BkLHJ_Deaths','ACLHJ_Cases','ACLHJ_Deaths']].copy()
dfcov_select['total_cases'] = dfcov_select['BkLHJ_Cases'] + dfcov_select['ACLHJ_Cases']
dfcov_select['total_deaths'] = dfcov_select['BkLHJ_Deaths'] + dfcov_select['ACLHJ_Deaths']

#filter for only data after shelter in place order in bay area, which is 2020-03-19
#assume a stable period starts during Phase I after shelter in place
dfcov_filter = dfcov_select.loc[dfcov_select['Date'] > '2020-03-19']
dfcov = dfcov_filter[['Date', 'total_cases']].copy()





In [20]:
###Selecting the control to use
#My intitial guess was to use a C chart, however, the assumign this would required that the number of samples are fixed, which is a heavy assumption
#After some research, I decided to use an I-MR chart to account for subgroupings never being defined

dfcov['moving_range'] = dfcov['total_cases'] - dfcov['total_cases'].shift(periods=1)
dfcov['moving_range'] = dfcov['moving_range'].abs()
dfcov.head(5)



Unnamed: 0,Date,total_cases,moving_range
20,2020-03-19 07:00:00+00:00,8,
21,2020-03-20 07:00:00+00:00,21,13.0
22,2020-03-21 07:00:00+00:00,17,4.0
23,2020-03-22 07:00:00+00:00,28,11.0
24,2020-03-23 07:00:00+00:00,24,4.0


In [21]:
###calculate control limits and lock the limits on date for when this was created
dfcov.locklimits = dfcov[dfcov['Date'] < '2020-05-07'].copy()

MR_BAR = dfcov.locklimits['moving_range'].mean()
X_BAR = dfcov.locklimits['total_cases'].mean()
N = 2
E2 = 2.66
D4 = 3.27
D3 = 0 
#calculate limits
I_UCL = X_BAR + E2*MR_BAR
I_LCL = X_BAR - E2*MR_BAR

MR_UCL = D4*MR_BAR
MR_LCL = D3*MR_BAR


In [22]:
#create dataframe to visualize control charts
dfcov_cc = dfcov.copy()
dfcov_cc['I_UCL'] = I_UCL
dfcov_cc['I_LCL'] = I_LCL
dfcov_cc['MR_UCL'] = MR_UCL
dfcov_cc['MR_LCL'] = MR_LCL
dfcov_cc['X_BAR'] = X_BAR
dfcov_cc['MR_BAR'] = MR_BAR


In [23]:
#flag points out of control 
dfcov_cc['I_control_flag'] = np.where((dfcov_cc['total_cases'] > dfcov_cc['I_UCL']) | (dfcov_cc['total_cases'] < dfcov_cc['I_LCL']) ,'out of control', 'in control')
dfcov_cc['MR_control_flag'] = np.where((dfcov_cc['moving_range'] > dfcov_cc['MR_UCL']) | (dfcov_cc['moving_range'] < dfcov_cc['MR_LCL']) ,'out of control', 'in control')


In [24]:
dfcov_cc.to_csv('2_data_output/ac_covid_control_chart.csv')