In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math 
import random
from scipy import optimize
from scipy.special import factorial

In [2]:
from utils import read_age_gender

In [3]:
#fix seed in testing environment
# np.random.seed(seed=42) #fix random seed for reproducibility

In [4]:
################################ Camp Structure ################################

Nb = 8100          # Number of people in isoboxes.
mub = 10           # Isoboxes mean occupancy (people).
hb = Nb / mub      # Number of isoboxes. 
iba = 0.5          # Proportion of area covered by isoboxes.

Nt = 10600         # Number of people in tents.
mut = 4            # Tents occupancy of (people).
ht = 10600 / mut   # Number of tents.

fblocks = np.array([1,1])   # initial sectoring. Divide camp into (nxn) grid, each with its own food line.
N = Nb + Nt                 # Total population.


################################ Emperical Age and Sex Distribution ################################
age_and_gender = read_age_gender(N)


################################ Transmission parameters ################################

# Infection
twh = 0.5   # Probability of infecting each person in your household per day.
aip = 0.1   # Probability of infecting each person you meet per meeting (Fang et al.)
tr = 1      # Initial transmission reduction (relative to assumed per contact transmission rate, outside household only).


################################ Other parameters ################################


siprob = 0        # Probability of spotting symptoms, per person per day.
clearday = 7      # Days in quarantine after no virus shedding (i.e., recovery).
pac = 0.179       # Permanently asymptomatic cases (Mizumoto et al 2020 Eurosurveillance).
ss = 0.20         # Realtive strength of interaction between different ethnicities.


################################ Initial movement parameters ################################

# Note that the initial assumption is that
# everyone uses the larger radius some proportion of the time, which is
# __NOT__ the same as assuming that some people always use the larger radius,
# Nonetheless, I am setting the proportion equal to the number of males age 10-50 in the population.

lr1 = 0.02       # Smaller movement radius. Range around their household during lockdown or females and individuals age < 10.
lr2 = 0.1        # Larger movement radius. ie. Pople who violate lockdown enforcement or males over age 10.
lrtol = 0.02     # Scale interactions - two people with completely overlapping rages with this radius interact once per day



In [5]:
from abm import form_population_matrix

In [6]:
pop_matrix=form_population_matrix(N,hb,Nb,ht,Nt,pac,age_and_gender)

In [64]:
#col1
def create_household_column(num_hh_type1,num_ppl_type1,num_hh_type2,num_ppl_type2):
    ppl_hh_index_draw= np.concatenate((np.ceil(num_hh_type1*np.random.uniform(0,1,num_ppl_type1)), 
                       num_hh_type1+np.ceil(num_hh_type2*np.random.uniform(0,1,num_ppl_type2)))) 
    hh_index,ppl_to_hh_index = np.unique(ppl_hh_index_draw, return_inverse=True)  # ui - indices from the unique sorted array that would reconstruct rN
    assert hh_index[ppl_to_hh_index].all() == ppl_hh_index_draw.all()
    return np.sort(ppl_to_hh_index)
household_column=create_household_column(hb,Nb,ht,Nt)

In [37]:
#col2
def create_diseasestate_column(num_ppl,seed=1):
    initial_diseasestate=np.zeros(num_ppl)
    initial_diseasestate[np.random.choice(num_ppl, seed)] = 1
    return initial_diseasestate
disease_column=create_diseasestate_column(N)

In [38]:
#col3
def create_daystosymptoms_column(num_ppl):
    #weibull distribution parameters following (Backer et al. 2020 Eurosurveillance)
    k = (2.3/6.4)**(-1.086)
    L = 6.4 / (math.gamma(1 + 1/k))
    return np.array([random.weibullvariate(L,k) for ppl in np.arange(num_ppl)])
dsymptom_column=create_daystosymptoms_column(N)

In [39]:
#col4
def create_daycount_column(num_ppl):
    return np.zeros(num_ppl)
daycount_column=create_daycount_column(N)

In [40]:
#col5
def create_asymp_column(num_ppl,asymp_rate,age_column=None,num_ppl_chro=300):
    """
    num_ppl_pre: number of people with chronic diseases (pre-exisitng medical conditions) meaning they won't be asymptomatically infected
    """
    if age_column is not None:
        pass
    else:
        return np.random.uniform(0,1,N)<asymp_rate*(N/(N-num_ppl_chro))
asymp_column=create_asymp_column(N,pac)


In [41]:
sum(asymp_column)

3482

In [42]:
#col6-7 this might be unique to different camps
def create_age_column(age_data):
    return age_data
def create_gender_column(gender_data):
    return gender_data
age_column=create_age_column(age_and_gender[:,0])
gender_column=create_gender_column(age_and_gender[:,1])

In [43]:
#column8 might be unique to different camps this requires fitting a regression to age and chronic conditions
def create_chronic_column(num_ppl,age_column,num_ppl_chro=300):
    myfunction = lambda x: np.absolute(num_ppl_chro-np.sum((1+np.exp(-(x-11.69+.2191*age_column-0.001461*age_column**2))**(-1))))-num_ppl
    xopt = optimize.fsolve(myfunction, x0=[2])
    rchron = (1+np.exp(-(xopt-11.69+.2191*age_column-0.001461*age_column**2)))**(-1)
    chroncases = (np.random.uniform(np.min(rchron),1,num_ppl) < rchron)
    return chroncases
chronic_column=create_chronic_column(N,age_column)
    

In [44]:
def adjust_asymp_with_chronic(asymp_column,chronic_column):
    new_asymp_column=asymp_column.copy()
    new_asymp_column[chronic_column==1]=0
    return new_asymp_column
new_asymp_column=adjust_asymp_with_chronic(asymp_column,chronic_column)

In [45]:
sum(new_asymp_column)

3433

In [46]:
np.logical_and([gender_column==1], [age_column>=10])

array([[False,  True,  True, ..., False, False, False]])

In [50]:
def create_wanderer_column(gender_column,age_column):
    """
    Male of age greater than 10 are the wanderers in the camp
    """
    return np.logical_and([gender_column==1], [age_column>=10]).transpose()
wanderer_column=create_wanderer_column(gender_column,age_column)

In [65]:
pop_matrix=np.column_stack((household_column,disease_column,dsymptom_column,daycount_column,new_asymp_column,age_column,gender_column,chronic_column,wanderer_column))

In [87]:
assert pop_matrix.shape==(N, 9)

In [55]:
################################ Create population matrix (pop) ################################

# Columns:
# 0. Home number
# 1. Disease state: 0 = susceptible, 1 = exposed, 2 = presymptomatic, 3 = symptomatic, 4 = mild, 5 = severe, 6 = recovered. Similar states in quarentine are the same plus seven. In other words, this is a categorical variable with values between 0 and 13 inclusive.
# 2. Days to symptoms for this person
# 3. Days passed in current state
# 4. Whether this person will be asymptomatic
# 5. Age
# 6. Male: = 1 if male.
# 7. Chronic: = 1 if chronic disease?
# 8. Wanderer (Uses the larger radius).
# 
# pop is $N \times 9$

#  #### Columns 0-1 (House number and disease state)
# 
# - Randomly assign each person to a household (rN).
# - Draw a sample from uniform distribution between 1-810 (hb). Meaning we chose a random isobox.
# - Then repeat that 8100 (Nb) times. AKA assign each person to an isobox.
# - Do the same for the tent group.
# - Indices (ui) of the unique array that reconstruct the input array (rN).
# - The number of inidces should equal total population (N).
# - Note: Household column is not the huse itself but the index of that house hold.
# - Start each person as susceptible (col 1 = 0).
# - Randomly assign one person to be exposed to the virus (pop[someindex,1]=1).

rN = np.concatenate((np.ceil(hb*np.random.uniform(0,1,Nb)), hb+np.ceil(ht*np.random.uniform(0,1,Nt)))) 
U,ui = np.unique(rN, return_inverse=True)  # ui - indices from the unique sorted array that would reconstruct rN
assert U[ui].all() == rN.all()

pop_2 = np.column_stack((np.sort(ui), np.zeros((ui.shape)))) # Size N x 2 (18700 x 2).
assert pop_2.shape==(18700, 2)

pop_2[np.random.randint(0,N),1] = 1


# #### Columns 2-4 (symptoms)
# - Col 2: Days to first symptoms (if they develop symptoms) for each person, following (Backer et al. 2020 Eurosurveillance)
# - Col 3: Days passed in current state, 0 for now.
# - Col 4: Whether this person will be asymptomatic.

k = (2.3/6.4)**(-1.086)
L = 6.4 / (math.gamma(1 + 1/k))
pop_5 = np.column_stack((pop_2,
                         np.array([random.weibullvariate(L,k) for sample in np.arange(N)]),
                         np.zeros((N,1)), np.random.uniform(0,1,N)<pac*(N/(N-300))))

assert pop_5.shape==(18700, 5)


# #### Columns 5-6 (Age and sex)
# - Get N random samples from the distribution in the observed data with repetition.
# - Assign age and sex following the observed distribution in the camp.
# 
# I believe I treated the age and sex distributions as separate whereas I think I should treat
# the age and sex for a give individual together (if that makes sense?).
# i.e. I got the sex and age samples on their own where as the original Matlab code gets them
# from the age-sex pairs: age_and_sex(randsample(size(age_and_sex,1),N,true),:)
# 
# However, the distributions seem equal in the original program and here.
# 
# Billy: I have editted here by drawing samples from rows 



pop_7 = np.column_stack((pop_5,
                       age_and_gender[:,0],
                       age_and_gender[:,1]))
assert pop_7.shape==(18700, 7)


# #### Column 7-8 (Chronic States and wanderers)
# - Matching the relationship between age and chronic states in the data.
# - Males over the age of ten are wanderers.
# 
# There is a bug somewhere in the lambda function that makes the minimum (xopt) is too small compared to the original program (~1.62) which causes (rchron) to be 1 for all individuals ... which then makes the asymptomatic column all equal to zero.
# 
# Would it be sensible to put an upper limit on the age when defining wanderers? ie. 10 < age < 60
# 
# Billy: I am guessing the original equation is trying to find the xopt that makes myfunction-N equals to 0 according to the plot below because otherwise fmin will take the value xopt way to the left. But yeah I am confused in how this equation is produced here - we are aiming to assing 300 people or so with chronic conditions right?
# 
# Gonzalo: Fixed. We get ~300 counts.


myfunction = lambda x: np.absolute(300-np.sum((1+np.exp(-(x-11.69+.2191*pop_7[:,5]-0.001461*pop_7[:,5]**2))**(-1))))-N
xopt = optimize.fsolve(myfunction, x0=[2])
print(xopt)

x_s = np.linspace(-5, 5, 201)
y = [myfunction(x) for x in x_s]

rchron = (1+np.exp(-(xopt-11.69+.2191*pop_7[:,5]-0.001461*pop_7[:,5]**2)))**(-1)
chroncases = (np.random.uniform(np.min(rchron),1,N) < rchron) 

pop_8 = np.column_stack( (pop_7, chroncases))
assert pop_8.shape==(18700, 8)

# Ensure that people with chronic conditions are not asymptomatic, and correct number of asymptomatics above.
# In other words, if chronic conditions = 1 then not asymptomatic.
pop_8[ pop_8[:,7] == 1,4 ] = 0 
pop_9 = np.column_stack( (pop_8, np.logical_and([pop_8[:,6] == 1], [10 <= pop_8[:,5]]).transpose() ) ) # Wanderers
assert pop_9.shape==(18700, 9)

[1.55266208]
