In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
data_file = "./survey_data/COVID-19_protection_measure.csv"

In [6]:
data = pd.read_csv(data_file)

In [7]:
def child_child(data, x):
    return list(data[data["Age:"] == "Under 18"][x].dropna())
    
def adult_adult(data, x):
    return list(data[data["Age:"].isin(["Over 18 but under 60", "Over 60"])][x].dropna())
    
def child_adult(data, x):
    return list(data[data["Age:"] == "Under 18"][x].dropna())
    
def adult_child(data, x):
    return list(data[data["Age:"].isin(["Over 18 but under 60", "Over 60"])][x].dropna())

In [8]:
def contact_matrix(data, x, y):
    c_c = np.median(child_child(data, x))
    c_a = np.median(child_adult(data, y))
    a_a = np.median(adult_adult(data, y))
    a_c = np.median(adult_child(data, x))
    return [[c_c,c_a],[a_c,a_a]]

In [9]:
def attendance(main_counts, main_times, other_counts, other_times):

    df = pd.DataFrame({'times': list(main_times+other_times), 'counts': list(main_counts)+list(other_counts)}).groupby('times').sum()
    times = list(df.index)
    counts = list(df.counts)
    
    assert len(times) == len(counts)
    counts = np.array(counts)
    times = np.array(times)
    total = []
    for idx, i in enumerate(counts):
        for j in range(i):
            total.append(float(times[idx]))
    return np.mean(total), np.median(total)

## Overview

In [10]:
print ("There are {} participants".format(len(data)))

There are 309 participants


In [11]:
np.unique(data["Sex:"], return_counts=True)

(array(['Female', 'Male', 'Other'], dtype=object), array([152, 155,   2]))

In [12]:
np.unique(data["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([115, 102,  92]))

In [13]:
np.unique(data["Disability:"], return_counts=True)

(array(['No', 'Yes'], dtype=object), array([261,  48]))

## Face mask

In [14]:
np.unique(data["Do you have access to a face mask?"], return_counts=True)

(array(['No', 'Yes'], dtype=object), array([ 58, 251]))

In [15]:
np.unique(data[data["Do you have access to a face mask?"] == "No"]["Sex:"], return_counts=True)

(array(['Female', 'Male'], dtype=object), array([31, 27]))

In [16]:
np.unique(data[data["Do you have access to a face mask?"] == "No"]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([15, 26, 17]))

In [17]:
np.unique(data[data["Do you have access to a face mask?"] == "No"]["Disability:"], return_counts=True)
## Note: need to scale

(array(['No', 'Yes'], dtype=object), array([45, 13]))

## Learning centers

In [18]:
np.unique(data["When the learning centers were open, did you attend any formal education?"], return_counts=True)

(array(['No', 'Yes'], dtype=object), array([193, 116]))

In [19]:
np.unique(data[(data["When the learning centers were open, did you attend any formal education?"] == "Yes") & (data["Sex:"] == "Male")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([22,  3, 37]))

In [20]:
np.unique(data[(data["When the learning centers were open, did you attend any formal education?"] == "Yes") & (data["Sex:"] == "Female")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([13,  4, 35]))

In [21]:
np.unique(data[data["When the learning centers were open, did you attend any formal education?"] == "Yes"]["Disability:"], return_counts=True)
## Note: need to scale

(array(['No', 'Yes'], dtype=object), array([108,   8]))

In [22]:
main_times, main_counts = np.unique(data["When you attended formal education, how much time do you spend there?"].dropna(), return_counts=True)
main_times

array(['1 hour', '1 hour and 30 minutes', '2 hours', 'other'],
      dtype=object)

In [23]:
main_times = [60,90,120]
main_counts = main_counts[:-1]

In [24]:
other_times, other_counts = np.unique(data["Please specify"].dropna(), return_counts=True)
other_times

array(['3 hours', '3 hrs', '30 minutes ', '30minutes', '35 minutes ', '4',
       '4 hours', '40 minutes ', '50 minutes '], dtype=object)

In [25]:
other_times = [180,180,30,30,35,240,240,40,50]

In [26]:
lc_attendance_mean, lc_attendance_median = attendance(main_counts, main_times, other_counts, other_times)

In [27]:
lc_attendance_mean/60., lc_attendance_median/60.

(1.5610632183908044, 1.5)

In [28]:
lc_contact = contact_matrix(
    data = data,
    x = "When you attended formal education, approximately how many children do you come into contact with (for example, talk to)?",
    y = "When you attended formal education, approximately how many adults do you come into contact with (for example, talk to)?"
)

In [29]:
lc_contact

[[10.0, 2.0], [5.0, 3.5]]

## Food distribution center

In [30]:
np.unique(data[(data["Do you ever go to a food distribution center?"] == "Yes") & (data["Sex:"] == "Male")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([54, 29, 14]))

In [31]:
np.unique(data[(data["Do you ever go to a food distribution center?"] == "Yes") & (data["Sex:"] == "Female")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([33, 20,  5]))

In [32]:
main_times, main_counts = np.unique(data["When you go to a food distribution center, how much time do you spend there?"].dropna(), return_counts=True)
main_times

array(['0 minutes', '1 hour', '1 hour and 30 minutes', '2 hours', 'other'],
      dtype=object)

In [33]:
main_times = [60,90,120]
main_counts = main_counts[1:-1]

In [34]:
other_times, other_counts = np.unique(data["Please specify.1"].dropna(), return_counts=True)
other_times

array(['10', '10 minutes', '10 minutes ', '15', '15 minute ', '15min',
       '20', '20 minutes', '20 minutes ', '20/30minutes', '20min',
       '3 hour ', '3 hours', '30', '30 Min', '30 min', '30 minutes',
       '30 minutes ', '30 to 45 min', '30min', '4 hour', '4 hour ',
       '50 minutes ', 'Half an hour', 'Half an hour.'], dtype=object)

In [35]:
other_times = [10,10,10,15,15,15,20,20,20,25,20,180,180,30,30,30,30,30,37.5,30,240,240,50,30,30]

In [36]:
dc_attendance_mean, dc_attendance_median = attendance(main_counts, main_times, other_counts, other_times)

In [37]:
dc_attendance_mean/60., dc_attendance_median/60.

(1.1765350877192982, 1.0)

In [38]:
dc_contact = contact_matrix(
    data = data,
    x = "When you go to a food distribution center, approximately how many children do you come into contact with at the center (for example, talk to)?",
    y = "When you go to a food distribution center, approximately how many adults do you come into contact with at the center (for example, talk to)?"
)

In [39]:
dc_contact

[[5.0, 9.0], [2.0, 11.0]]

## E-voucher outlet

In [40]:
np.unique(data[(data["Do you ever go to an e-voucher outlet?"] == "Yes") & (data["Sex:"] == "Male")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([41, 22,  8]))

In [41]:
np.unique(data[(data["Do you ever go to an e-voucher outlet?"] == "Yes") & (data["Sex:"] == "Female")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([24, 16,  5]))

In [42]:
main_times, main_counts = np.unique(data["When you go to a food distribution center, how much time do you spend there?"].dropna(), return_counts=True)
main_times

array(['0 minutes', '1 hour', '1 hour and 30 minutes', '2 hours', 'other'],
      dtype=object)

In [43]:
main_times = [60,90,120]
main_counts = main_counts[1:-1]

In [44]:
other_times, other_counts = np.unique(data["Please specify.2"].dropna(), return_counts=True)
other_times

array(['10', '10 minute', '10 minutes', '10 minutes ', '10 to 20 min',
       '12minutes', '15', '15 minutes', '15 minutes ', '15min',
       '15minutes', '15sec', '2 hour ', '20', '20 min', '20 minutes',
       '20 minutes ', '20min', '20minutes', '23', '25mi', '30',
       '30 minutes', '30 minutes ', '30 to 45 min', '30min', '30minutes',
       '40minutes', '5 minute', '5 minutes', '5 to 10 min'], dtype=object)

In [45]:
other_times = [10,10,10,10,15,12,15,15,15,15,15,15,120,20,20,20,20,20,20,23,25,30,30,30,37.5,30,30,40,5,5,7.5]

In [46]:
voucher_attendance_mean, voucher_attendance_median = attendance(main_counts, main_times, other_counts, other_times)

In [47]:
voucher_attendance_mean/60., voucher_attendance_median/60.

(0.9916173570019724, 1.0)

In [48]:
voucher_contact = contact_matrix(
    data = data,
    x = "When you go to an outlet, approximately how many children do you come into contact with at the outlet (for example, talk to)?",
    y = "When you go to an outlet, approximately how many adults do you come into contact with at the outlet (for example, talk to)?",
)

In [49]:
voucher_contact

[[0.0, 6.0], [0.0, 9.0]]

## Community center

In [50]:
np.unique(data[(data["Do you ever go to a community center?"] == "Yes") & (data["Sex:"] == "Male")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([41, 25, 33]))

In [51]:
np.unique(data[(data["Do you ever go to a community center?"] == "Yes") & (data["Sex:"] == "Female")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([32, 27, 27]))

In [52]:
main_times, main_counts = np.unique(data["When you go to a community center, how much time do you spend there?"].dropna(), return_counts=True)
main_times

array(['1 hour', '1 hour and 30 minutes', '2 hours', 'other'],
      dtype=object)

In [53]:
main_times = [60,90,120]
main_counts = main_counts[:-1]

In [54]:
other_times, other_counts = np.unique(data["Please specify.3"].dropna(), return_counts=True)
other_times

array(['10 min', '15 minutes', '2.5 hours', '20', '20 minutes ',
       '25 minutes ', '3 hours', '30', '30 min', '30 minutes',
       '30 minutes ', '30 to 45 min', '30mins', '3o minutes', '4 hour',
       '4 hour ', '4 hours', '40min', '45 min', '50 minutes ',
       'half an hour', 'half an hour '], dtype=object)

In [55]:
other_times = [10,15,150,20,20,25,180,30,30,30,30,37.5,30,30,240,240,240,40,45,50,30,30]

In [56]:
community_attendance_mean, community_attendance_median  = attendance(main_counts, main_times, other_counts, other_times)

In [57]:
community_attendance_mean/60., community_attendance_median/60.

(1.249099099099099, 1.0)

In [58]:
community_contact = contact_matrix(
    data = data,
    x = "When you go to a community center, approximately how many children do you come into contact with at the center (for example, talk to)?",
    y = "When you go to a community center, approximately how many adults do you come into contact with at the center (for example, talk to)?"
)

In [59]:
community_contact

[[7.0, 4.5], [4.0, 8.0]]

## Religious center

In [60]:
np.unique(data[(data["Do you ever go to a religious meeting?"] == "Yes") & (data["Sex:"] == "Male")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([40, 41, 26]))

In [61]:
np.unique(data[(data["Do you ever go to a religious meeting?"] == "Yes") & (data["Sex:"] == "Female")]["Age:"], return_counts=True)

(array(['Over 18 but under 60', 'Over 60', 'Under 18'], dtype=object),
 array([37, 34, 18]))

In [62]:
main_times, main_counts = np.unique(data["When you go to a religious meeting, how much time do you spend there?"].dropna(), return_counts=True)
main_times

array(['0 minutes', '1 hour', '1 hour and 30 minutes', '2 hours', 'other'],
      dtype=object)

In [63]:
main_times = [60,90,120]
main_counts = main_counts[1:-1]

In [64]:
other_times, other_counts = np.unique(data["Please specify.4"].dropna(), return_counts=True)
other_times

array(['20', '20 Minutes ', '20minit', '20minutes', '25 minutes',
       '3 hours', '30', '30 min', '30 minutes', '30 minutes ',
       '30 to 45 min', '30min', '30minutes', '34minutes', '35minutes',
       '40 minutes', '40 minutes ', '45', '45 min'], dtype=object)

In [65]:
other_times = [20,20,20,20,25,180,30,30,30,30,37.5,30,30,34,35,40,40,45,45]

In [66]:
religious_attendance_mean, religious_attendance_median = attendance(main_counts, main_times, other_counts, other_times)

In [67]:
religious_attendance_mean/60., religious_attendance_median/60.

(1.1891156462585035, 1.0)

In [68]:
religious_contact = contact_matrix(
    data = data,
    x = "When you go to a religious meeting, approximately how many children do you come into contact with at the meeting (for example, talk to)?",
    y = "When you go to religious meeting, approximately how many adults do you come into contact with at the meeting (for example, talk to)?"
)

In [69]:
religious_contact

[[5.0, 9.0], [2.0, 10.0]]

## Hand pump and latrine

In [70]:
main_times, main_counts = np.unique(data["When you go to a water pump or latrine, how much time do you spend there?"].dropna(), return_counts=True)
main_times

array(['0 minutes', '1 hour', '1 hour and 30 minutes', '2 hours', 'other'],
      dtype=object)

In [71]:
main_times = [60,120,180]
main_counts = main_counts[1:-1]

In [72]:
other_times, other_counts = np.unique(data["Please specify.5"].dropna(), return_counts=True)
other_times

array(['10', '10  minutes ', '10 Minutes', '10 min', '10 minute',
       '10 minutes', '10 minutes ', '10 to 15 minutes ', '10 to 20 min',
       '10minutes', '12 minutes', '12 minutes ', '15', '15 minute',
       '15 minute \n', '15 minutes', '15 minutes ', '15min', '15mins',
       '15minutes', '17 minutes', '18 minutes ', '2 minutes',
       '2 to 3 min', '2 to 3 min ', '2.5 hours', '20', '20 min',
       '20 minute ', '20 minutes', '20 minutes ', '20 to 25 minutes ',
       '20min', '20mins in water pump and 15min in latrine', '20minutes',
       '24', '25 minute', '25 minutes', '25 minutes ', '25min',
       '29 minutes', '29minutes', '3 to 5 min', '30', '30 Minutes ',
       '30 minute ', '30 minutes', '30 mnt',
       '30mins in water pumps and 15min in latrine',
       '30mins in water pumps and 15mins in latrin',
       '30mins water pumps and 15mins in latrine', '30minutes',
       '35 minute ', '3min', '3–5 minutes ', '40 minutes', '40mnt',
       '45minutes', '5 Minutes', '

In [73]:
other_times = [10,10,10,10,10,10,10,12.5,15,10,12,12,15,15,15,15,15,15,15,15,17,18,2,2.5,2.5,150,20,20,20,20,20,22.5,20,17.5,20,24,25,25,25,25,29,29,4,30,30,30,30,30,22.5,22.5,22.5,30,35,3,4,40,40,45,5,5,7.5,7.5,50,7,30]

In [74]:
other_counts = other_counts[:-3]

In [75]:
pump_latrine_attendance_mean, pump_latrine_attendance_median = attendance(main_counts, main_times, other_counts, other_times)

In [76]:
pump_latrine_attendance_mean/60., pump_latrine_attendance_median/60. 

(0.7948412698412698, 0.5)

In [77]:
pump_latrine_contact = contact_matrix(
    data = data,
    x = "When you go to a water pump or latrine, approximately how many children do you come into contact with(for example, talk to)?",
    y = "When you go to water pump or latrine, approximately how many adults do you come into contact with(for example, talk to)?"
)

In [78]:
pump_latrine_contact

[[4.0, 3.0], [3.0, 5.0]]