In [3]:
import pandas as pd
import numpy as np

In [4]:
cm = pd.read_csv('data/COVID 19 Containment measures data.csv',parse_dates=['Date Start','Date end intended']).dropna(subset=['Country'])
completeness = pd.read_csv('data/Dataset completeness.csv')

In [5]:
to_include = completeness[~pd.isna(completeness['Complete up to date'])]['Country'].append(cm[cm['Country'].str.startswith('US')]['Country'])
cm = cm[cm['Country'].isin(to_include)]

In [8]:
MAX_COLS = {
    'isolation':{
        'contact isolation - symptoms':1,
        'cohort isolation - symptoms':1,
        'cluster isolation - symptoms':2,
        'blanket isolation - symptoms':3
    },
    'quarantine':{
        'contact isolation - no symptoms':1,
        'cohort isolation - no symptoms':2,
        'cluster isolation - no symptoms':3,
        'blanket curfew - no symptoms':4,
        'blanket isolation - no symptoms':5,
        'natural village quarantine': 6
    },
    'movement':{
        'domestic traveller quarantine':1,
        'domestic travel limitation': 1,
        'domestic travel ban':2,
        'total vehicle ban':2
    },
    'workplace':{
        'general nonessential business suspension':1,
        'limited nonessential business suspension':0.5,
        'closure of gathering places':0.5,
        'remote work':0.5
    },
    'travel':{
        'international traveller screening - risk countries':1,
        'international traveller screening - all countries':2,
        'international traveller quarantine - risk countries':3,
        'international traveller quarantine - all countries':4,
        'international travel ban - risk countries':5,
        'international travel ban - all countries':6
    },
    'testing':{'testing numbers total':np.nan},
    'tracing':{'contacts traced total':np.nan},
    'mask-wearing':{'public mask wearing data':np.nan},
    'hand-washing':{'public handwashing data':np.nan}
    
}

MIN_COLS = {
    'gatherings':['indoor gatherings banned',
                        'outdoor gatherings banned']
}

CUMSUM_COLS = {
    'healthcare_specialisation':['clinic specialisation',
                                'case transport',
                                'quarantine zone',
                                'hospital specialisation',
                                'healthcare entry screening',
                                'remote medical treatment',
                                'visiting in hospital banned'],
    'public_education':['risk communication',
                                      'community engagement',
                                      'coronavirus education activities',
                                      'phone line'],
    'easing_stay_at_home':['unemployment benefits extension',
                                    'eviction moratorium',
                                    'isolation allowance',
                                    'compulsory isolation'],
    'cleaning':['public transport cleaning',
                      'public facility cleaning'],
    'hygiene_misc':['funeral hygiene',
                                     'cash cleaning',
                                     'cash banned'],
    'public_interaction':['handshakes banned',
                                    'social distancing advice',
                                    'stay home advice',
                                    'space minimum',
                                    'outdoor person density',
                                    'indoor person density',
                                    'public venue screening',
                                    'handwashing encouragement',
                                    'public mask encouragement',
                                    'public mask supply',
                                    'public mask and hygiene supply',
                                    'public hand sanitizer supply'],
    'schools':['school closure',
                     'university closure',
                     'nursery school closure',
                     'remote schooling',
                     'public transport stopped'],
    'activity':['activity cancellation - other',
                            'sports cancellation',
                            'religious activity cancellation',
                            'religious activity limitations',
                            'weddings canceled',
                            'very large event cancellation or postponement',
                            'cultural activity limitation',
                            'remote cultural content',
                            'restaurant limitations',
                            'closure of gathering places'],
    'resumption':['public transport resumed',
                 'activity resumed',
                 'business resumed'],
    'diagnostic_criteria':['diagnostic criteria loosened', 'diagnostic criteria tightened']    
}

TEST_COLS = {    
    'testing_criteria':{'test all':1,
                       'test symptomatic':0.5,
                       'cluster testing':0.3,
                       'test contacts':0.1,
                       'test cohorts':0.2,
                       'test travellers':0.1,
                       'test medical staff':0.1,
                       'test vulnerable':0.1}
}

def default_values(kw):
    for k, v in {**MAX_COLS,**TEST_COLS}.items():
        if (kw in v) and (v[kw]!=np.nan):
            return v[kw]
    return np.nan

def keywords(kws_quants):
    res =  pd.DataFrame([(i,j[1]) 
                         for j in kws_quants.values 
                         for i in str(j[0]).split(', ')],
                        columns=['Keywords','Quantity'])
    res['Quantity'] = res['Keywords'].apply(default_values).fillna(res['Quantity'])
    return res

def sum_kws(kws_quants,tags):
    return pd.Series(kws_quants['Keywords'].unique()).isin(tags).sum()

def max_kws(kws_quants, tags):
    return kws_quants[kws_quants['Keywords'].isin(tags)]['Quantity'].max()

def min_kws(kws_quants,tags):
    return kws_quants[kws_quants['Keywords'].isin(tags)]['Quantity'].min()

def test_kws(kws_quants,tags):
    if 'test all' in kws_quants['Keywords']:
        return 1
    elif 'test symptomatic' in kws_quants['Keywords']:
        return 0.5
    else:
        return kws_quants[kws_quants['Keywords'].isin(tags)]['Quantity'].sum()


jdict = {**MAX_COLS,**MIN_COLS,**CUMSUM_COLS,**TEST_COLS}


In [9]:
index = {}

for cat, tags in MAX_COLS.items():
    for tag, value in tags.items():
        index[tag] = {"type": "min", "cat": cat, "value": value}

for cat, tags in MIN_COLS.items():
    for tag, value in tags:
        index[tag] = {"type": "min", "cat": cat, "value": value}

for cat, tags in CUMSUM_COLS.items():
    for tag in tags:
        index[tag] = {"type": "cum", "cat": cat, "value": value}

ValueError: too many values to unpack (expected 2)

In [7]:
def do_country(df):
    exp = pd.DataFrame([(id,keyword) 
        for id, item in df.iterrows()
        for keyword in str(item["Keywords"]).split(', ')],
        columns=['ID', 'Keyword']).set_index("ID")

    df = df.join(exp).set_index("Keyword")

    out = {}

    for cat, tags in MAX_COLS.items():
        thing = df.loc[df.index.isin(tags.keys())]
        out[cat] = thing

    return out

g = cm.sort_values("Date Start").groupby("Country")
df = g.get_group("Italy");
print(df.columns)
for key, thing in do_country(df).items():
    print(key)
    print(thing[["Date Start", "Description of measure implemented"]].to_records())
    print()

Index(['ID', 'Applies To', 'Country', 'Date Start', 'Date end intended',
       'Description of measure implemented', 'Exceptions', 'Implementing City',
       'Implementing State/Province', 'Keywords', 'Quantity', 'Source',
       'Target city', 'Target country', 'Target region', 'Target state'],
      dtype='object')
isolation
[]

quarantine
[('blanket curfew - no symptoms', '2020-03-08T00:00:00.000000000', 'avoiding moves from place of residence')
 ('cluster isolation - no symptoms', '2020-02-22T00:00:00.000000000', 'On 22 February, the government announced a new decree imposing the quarantine of more than 50,000 people from 11 different municipalities in Northern Italy. The quarantine zones are called the Red Zones and the areas in Lombardy and Veneto outside of them are called the Yellow Zones.[150] Penalties for violations range from a €206 fine to three months of imprisonment.[151] The Italian military and law enforcement agencies were instructed to secure and implement the lock

In [68]:
data_dict = {}

g = cm.sort_values("Date Start").groupby("Country")
for name, df in g:
    do_country()

Unnamed: 0,ID,Applies To,Country,Date Start,Date end intended,Description of measure implemented,Exceptions,Implementing City,Implementing State/Province,Keywords,Quantity,Source,Target city,Target country,Target region,Target state,Keyword
217,76,,Italy,2020-03-12,2020-03-25,"closure of shops (supermarkets, grocery, pharm...","supermarkets, grocery, pharmacy",,,general nonessential business suspension,,https://www.mzv.cz/rome/cz/viza_a_konzularni_i...,,,,,general nonessential business suspension
218,77,,Italy,2020-03-12,2020-03-25,closures of companies,"banks, transportations, financial and insuranc...",,,general nonessential business suspension,,https://www.mzv.cz/rome/cz/viza_a_konzularni_i...,,,,,general nonessential business suspension
219,78,,Italy,2020-03-08,NaT,avoiding moves from place of residence,,,,blanket curfew - no symptoms,,https://www.mzv.cz/rome/cz/viza_a_konzularni_i...,,,,,blanket curfew - no symptoms
220,80,,Italy,2020-03-08,2020-04-04,"stop sport activities (gyms, swimming pools, t...",,,,sports cancellation,,https://www.mzv.cz/rome/cz/viza_a_konzularni_i...,,,,,sports cancellation
221,79,,Italy,2020-03-08,2020-04-04,"stop cultural activities (cinemas, theaters, m...",,,,cultural activity limitation,,https://www.mzv.cz/rome/cz/viza_a_konzularni_i...,,,,,cultural activity limitation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,748,,Italy,2020-03-07,NaT,"Shopping centres had to be closed on weekends,...",,,,limited nonessential business suspension,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,,limited nonessential business suspension
784,751,,Italy,2020-03-08,NaT,Riots broke out in many penitentiaries through...,,,,"prisons closed to visitors, riots",,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,,prisons closed to visitors
784,751,,Italy,2020-03-08,NaT,Riots broke out in many penitentiaries through...,,,,"prisons closed to visitors, riots",,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,,riots
788,747,,Italy,2020-03-07,NaT,The decree also established the closure of all...,,,,sports cancellation,,https://en.wikipedia.org/wiki/2020_coronavirus...,,,,,sports cancellation
