In [1]:
import numpy as np, matplotlib.pyplot as plt, pandas as pd
pd.set_option('display.max_rows', 8)
!date

import methods

%load_ext autoreload
%autoreload 2

Mon May 17 12:32:40 PDT 2021


# Calculations for abstract

## Load data

In [2]:
data = methods.load_data()

  mask |= (ar1 == a)


In [3]:
all_rows = (data['df'].date >= data['start_date']) & (data['df'].date <= data['end_date'])
sum(all_rows)

1822662

In [4]:
rows = all_rows & (data['df'].test_required == 1)
df = methods.subset_data(data, rows, exposure='hcw', outcome='test_positive')
len(df)

43430

In [5]:
data['df'].date.max()

Timestamp('2021-02-10 00:00:00')

## Calculate relative risk with uncertainty

In [6]:
%%time

# set random seed for reproducibility
np.random.seed(12345)
rr_draws = methods.sample_rr_draws(df)
rr_draws

CPU times: user 58 s, sys: 718 ms, total: 58.7 s
Wall time: 58.7 s


point_est    0.731588
draw_0       0.746737
draw_1       0.759099
draw_2       0.759189
               ...   
draw_996     0.727963
draw_997     0.740747
draw_998     0.719303
draw_999     0.751744
Length: 1001, dtype: float64

In [7]:
rr_mean, rr_lb, rr_ub = methods.my_summarize(rr_draws)

# Text for Key Points > Findings:

In [8]:
print(f'Findings: We estimate that, on {data["end_date"].strftime(methods.date_fmt)}, in the United States, there was a relative COVID incidence rate ratio '
      f'of {rr_mean:.2f} (95% UI {rr_lb:.2f} to {rr_ub:.2f}) '
      f'between healthcare workers and non-healthcare workers.\n')

Findings: We estimate that, on October 20, 2020, in the United States, there was a relative COVID incidence rate ratio of 0.73 (95% UI 0.68 to 0.80) between healthcare workers and non-healthcare workers.



# Text for Abstract > Design:

In [9]:
print(f'Analysis of cross-sectional data from a daily, web-based survey '
      f'of {sum(all_rows):,.0f} Facebook users from '
      f'{data["start_date"].strftime(methods.date_fmt)} to {data["end_date"].strftime(methods.date_fmt)}.')

Analysis of cross-sectional data from a daily, web-based survey of 1,822,662 Facebook users from September 8, 2020 to October 20, 2020.


# Text for Abstract > Results:

In [10]:
print(f'On {data["end_date"].strftime(methods.date_fmt)}, in the United States, there was a '
      f'relative COVID incidence ratio of {rr_mean:.2f} (95% UI {rr_lb:.2f} to {rr_ub:.2f})'
      ' between healthcare workers and workers in non-healthcare occupations.')

On October 20, 2020, in the United States, there was a relative COVID incidence ratio of 0.73 (95% UI 0.68 to 0.80) between healthcare workers and workers in non-healthcare occupations.


In [11]:
print(f'Among HCWs, {df[df.exposure==1].outcome.sum():,.0f} reported a positive result to a routine COVID test'
      f' ({df[df.exposure==1].outcome.mean()*100:,.1f}%),'
      f' while among non-HCWs, {df[df.exposure==0].outcome.sum():,.0f} reported a positive test result'
      f' ({df[df.exposure==0].outcome.mean()*100:,.1f}%),'
      f' for a relative COVID prevalence ratio of {rr_mean:.1f} (95% UI {rr_lb:.1f} to {rr_ub:.1f}).'
      )

Among HCWs, 588 reported a positive result to a routine COVID test (4.0%), while among non-HCWs, 1,557 reported a positive test result (5.4%), for a relative COVID prevalence ratio of 0.7 (95% UI 0.7 to 0.8).


In [12]:
print(f'Findings: We estimate that, on {data["end_date"].strftime(methods.date_fmt)}, in the United States, there was a relative COVID prevalence ratio '
      f'of {rr_mean:.2f} (95% UI {rr_lb:.2f} to {rr_ub:.2f}) '
      f'between healthcare workers and non-healthcare workers.\n')

Findings: We estimate that, on October 20, 2020, in the United States, there was a relative COVID prevalence ratio of 0.73 (95% UI 0.68 to 0.80) between healthcare workers and non-healthcare workers.



In [13]:
# # Design
# print(f'Analysis of cross-sectional data from a daily, web-based survey '
#       f'of {sum(rows):,.0f} Facebook users from '
#       f'{data_start_date.strftime(date_fmt)} to {most_recent_data_date.strftime(date_fmt)}.')

In [14]:
# Results

print(f'On {data["end_date"].strftime(methods.date_fmt)}, in the United States, there was a relative COVID prevalence ratio '
      f'of {rr_mean:.2f} (95% UI {rr_lb:.2f} to {rr_ub:.2f}) '
      f'between healthcare workers and non-healthcare workers.\n')

On October 20, 2020, in the United States, there was a relative COVID prevalence ratio of 0.73 (95% UI 0.68 to 0.80) between healthcare workers and non-healthcare workers.

