In [6]:
import pandas as pd
import numpy as np
import networkx as nx
from common import create_triangle_list

In [7]:
df1 = pd.read_csv("../RawData/kenya-households/scc2034_kilifi_all_contacts_within_households.csv")
df1['time'] = df1[['day', 'hour']].apply(lambda x: f"{x.day} - {x.hour}", axis=1)
df2 = pd.read_csv("../RawData/kenya-households/scc2034_kilifi_all_contacts_across_households.csv")
df2['time'] = df2[['day', 'hour']].apply(lambda x: f"{x.day} - {x.hour}", axis=1)
df = pd.concat([df1, df2])

In [8]:
df2times = pd.unique(df2['time'])

In [9]:
## only use time stamps where data is available for both types of interactions
df = df[df['time'].isin(df2times)]

In [10]:
elist = []
tlist = []
slist = []
for time in df2times:
    tsplit = [int(i.strip()) for i in time.split("-")]
    timeval = 100* tsplit[0] + tsplit[1]
    curr_df = df[df['time'] == time][['m1', 'm2', 'duration']]
    
    for i, e in curr_df.iterrows():
        n1, n2 = min(e['m1'], e['m2']), max(e['m1'], e['m2'])
        elist.append([n1, n2, timeval])
        
    G = nx.from_pandas_edgelist(curr_df, 'm1', 'm2')
    
    tris = create_triangle_list(G)
    for t in tris:
        tlist.append(t.tolist() + [timeval])
        
    ccs = nx.find_cliques(G)
    for cc in ccs:
        slist.append(sorted(list(cc)) + [timeval])

In [11]:
e_df = pd.DataFrame(elist, columns=['node_1', 'node_2', 't'])
e_df = e_df[['node_1', 'node_2']].drop_duplicates()
    
t_df = pd.DataFrame(tlist, columns=['node_1', 'node_2', 'node_3', 't'])
t_df = t_df[['node_1', 'node_2', 'node_3']].drop_duplicates()

e_df.to_csv('../Data/cont-village/edges.csv', index=False)
t_df.to_csv('../Data/cont-village/triangles.csv', index=False)

slist.sort(key=lambda x: x[-1])
with open('../Data/cont-village/simplices.csv', 'w') as f:
    for item in slist:
        f.write("%s\n" % ",".join([str(i) for i in item]))

In [12]:
for group_type in ['h']:
    label_dict = {}
    for i, row in df.iterrows():
        label_dict[row['m1']] = row[f'{group_type}1']
        label_dict[row['m2']] = row[f'{group_type}2']

    label_df = pd.DataFrame.from_dict(label_dict, orient='index').reset_index()
    label_df.columns = ['id', 'group']
    all_labels = [ i for i in pd.unique(label_df['group'])]
    label_dict = {name: i for i, name in enumerate(all_labels)}
    label_df['group_code'] = label_df['group'].map(label_dict) 
    label_df[['id', 'group_code']].to_csv(f"../Data/cont-village/labels.csv", index=False)

In [19]:
df[(df['h1'] == 'L') & (df['m1'] == 3)]

Unnamed: 0,h1,m1,h2,m2,age1,age2,sex1,sex2,duration,day,hour,time
14993,L,3,L,7,3,3,F,F,20,1,14,1 - 14
15051,L,3,L,1,3,0,F,M,20,1,14,1 - 14
15425,L,3,L,7,3,3,F,F,40,2,12,2 - 12
15428,L,3,L,7,3,3,F,F,20,2,12,2 - 12
15432,L,3,L,7,3,3,F,F,20,2,12,2 - 12
...,...,...,...,...,...,...,...,...,...,...,...,...
137,L,3,E,13,3,3,F,F,20,2,12,2 - 12
138,L,3,E,13,3,3,F,F,20,2,12,2 - 12
139,L,3,E,13,3,3,F,F,20,2,12,2 - 12
140,L,3,E,6,3,3,F,M,20,2,12,2 - 12


In [14]:
label_df

Unnamed: 0,id,group,group_code
0,2,E,0
1,4,E,0
2,16,E,0
3,27,E,0
4,13,E,0
5,11,E,0
6,20,E,0
7,25,E,0
8,5,F,1
9,15,E,0
