In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('corona_lab_tests_ver007.csv', encoding = 'latin-1', sep=",")        # from https://data.gov.il/dataset/covid-19

df['result'] = df['corona_result'].map({'חיובי' : 'pos',
                                        'שלילי' : 'neg',
                                        'בעבודה' : 'wip',
                                        'לא ודאי' : 'uncertain',
                                        'çéåáé' : 'pos',
                                        'ùìéìé' : 'neg',
                                        'áòáåãä' : 'wip',
                                        'ìà åãàé' : 'uncertain'
                                           })
df['test_date'] = pd.to_datetime(df['test_date'], format="%m/%d/%Y")
df['result_date'] = pd.to_datetime(df['result_date'], format="%m/%d/%Y")

In [None]:
# distribution of test results by lab
# Lab #3 is Hadassah hospital - from https://all-world.news/53180/ where it says they conducted 40K tests by 24 April, which is 20% of total for Israel (lab #3 is the biggest by volume, almost twice the next one, and by the data released on 24 April, conducted about 36K tests, which is close enough).
m = df
p=m.groupby(['lab_id', 'result']).count().unstack()
p['test_date'].plot(kind='barh', stacked=True)
#.plot(kind='bar')
plt.show()

In [None]:
# number of tests within each lab by date
m = df
# filter by labs with enough total tests
lab_counts=m['lab_id'].value_counts()
biglabs=lab_counts[lab_counts > 15000].index.tolist()
m = m[m['lab_id'].isin(biglabs)]
p=m.groupby(['test_date', 'lab_id']).count().unstack()
p['result_date'].plot(linewidth=2)
plt.show()

In [None]:
# mean days to test result
m = df
m['result_date'] = pd.to_datetime(m['result_date'])
m['test_date'] = pd.to_datetime(m['test_date'])
m['test duration'] = (m['result_date'].values - m['test_date'].values).astype('timedelta64[D]').astype('float')
lab_counts=m['lab_id'].value_counts()
biglabs=lab_counts[lab_counts > 10000].index.tolist()
m = m[m['lab_id'].isin(biglabs)]

p=m.groupby(['test_date', 'lab_id']).mean().unstack()
#p=m.groupby(['test_date']).mean().unstack()

ax = p['test duration'].plot(linewidth=2)
ax.set_ylabel('mean days to test result')
ax.set_xlabel('test date')
plt.show()

In [None]:
q=m.groupby(['test_date', 'lab_id']).mean()
q['lab_id'] = q.index.get_level_values('lab_id')
q['week'] = q.index.get_level_values('test_date').week

#ax = sns.boxenplot(x="lab_id", y="test duration", hue="week",data=q)

plt.figure
g, ax = plt.subplots(1, 1, figsize=(20, 10))
sns.violinplot(x="lab_id", y="test duration", inner="points", hue="week", data=q, ax=ax)
sns.despine(trim=True);
plt.show()

In [None]:
# without the lab id
q=m.groupby(['test_date', 'lab_id']).mean()
q['lab_id'] = q.index.get_level_values('lab_id')
q['week'] = q.index.get_level_values('test_date').week

plt.figure
g, ax = plt.subplots(1, 1, figsize=(20, 10))
sns.violinplot(x="week", y="test duration", inner="points", data=q, ax=ax)
sns.despine(trim=True);
plt.show()