In [1]:
import sys
import os
import pandas as pd
import pickle
from collections import Counter, OrderedDict
import numpy as np

sys.path.append("../code/")
from data_analysis import groups_at_time_t, group_size_dist
from data_analysis import get_transition_matrix, transition_matrix_to_df
from data_analysis import get_group_durations
from data_analysis import get_group_times, get_dis_agg_matrices, get_full_dis_agg_matrices, dis_agg_matrix_to_df
from data_analysis import get_group_similarity
from data_analysis import measure_social_memory, get_interevent_times, get_node_trajectory
from data_analysis import get_probs_leaving_group
from utils import get_Hs_from_groups_dict, get_cumulative_Gs_from_Hs, reduce_number_of_points

In [2]:
#Setting up directories for outputs
DIRs_TO_CREATE = ["results/Confs"]
        
for directory in DIRs_TO_CREATE:
    if not os.path.exists(directory):
        os.makedirs(directory)

In [2]:
dataset = "Confs"
contexts = ["conf16", "conf17", "conf18", "conf19"]

## Extracting group interactions

In [3]:
#Input
IN_PATH = '../data-processed/%s/'%dataset

#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset
if not os.path.exists(OUT_PATH): os.makedirs(OUT_PATH)
    
for context in contexts:
    print(context)

    IN_FNAME = "%s_processed.csv.gz"%context
    df = pd.read_csv(IN_PATH+IN_FNAME)
    
    groups_at_t_dict = {}
    for timestamp in list(df['timestamp'].unique()):
        groups_at_t_dict[timestamp] = groups_at_time_t(df, timestamp, dataset=dataset)
    
    #Saving
    OUT_FNAME = "groups_at_t_%s.p"%context
    pickle.dump(groups_at_t_dict, open(OUT_PATH+OUT_FNAME, "wb" ) )

conf16
conf17
conf18
conf19


## Computing group size distributions

In [4]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset
if not os.path.exists(OUT_PATH): os.makedirs(OUT_PATH)

for context in contexts:    
    print(context)
    #Reading
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )

    #Computing group dise distribution
    ks, Pks = group_size_dist(groups_at_t_dict)

    #Saving 
    gsize_df = pd.DataFrame({'k':ks,'Pk':Pks})
    FNAME = "Pk_%s.csv"%context
    gsize_df.to_csv(OUT_PATH+FNAME, header=True, index=False)

conf16
conf17
conf18
conf19


## Transition matrices

In [5]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset

for context in contexts:    
    print(context)
    #Reading
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    #Converting to xgi object
    Hs = get_Hs_from_groups_dict(groups_at_t_dict)
    #Computing transition matrix
    T = get_transition_matrix(Hs, max_k = 20, normed=True)
    #Converting it to a dataframe
    df_T = transition_matrix_to_df(T)
    #Saving
    OUT_FNAME = "T_%s.csv"%context
    df_T.to_csv(OUT_PATH+OUT_FNAME, header=True, index=False)            

conf16


  new_matrix = matrix / row_sums[:, np.newaxis]


conf17


  new_matrix = matrix / row_sums[:, np.newaxis]


conf18


  new_matrix = matrix / row_sums[:, np.newaxis]


conf19


  new_matrix = matrix / row_sums[:, np.newaxis]


## Group duration distributions

In [6]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset

for context in contexts:    
    print(context)
    #Reading
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    #Computing group durations
    durations = get_group_durations(groups_at_t_dict)
    #Saving
    OUT_FNAME = "gdurations_%s.p"%context
    pickle.dump( durations, open( OUT_PATH+OUT_FNAME, "wb" ) )            

conf16
conf17
conf18
conf19


## Group aggregation and disaggregation matrices


First of all, I compute group times, that is for each group I save info on members and times of group creation and destruction

In [7]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset

for context in contexts:    
    print(context)
    #Reading
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    print("Read. Computing groups and time...")
    #Computing times of groups start and end
    groups_and_times = get_group_times(groups_at_t_dict)
    print("Groups and times computed. Saving...")
    #Saving
    OUT_FNAME = "group_times_%s.p"%context
    pickle.dump(groups_and_times, open(OUT_PATH+OUT_FNAME, "wb" ) )

conf16
Read. Computing groups and time...
Groups and times computed. Saving...
conf17
Read. Computing groups and time...
Groups and times computed. Saving...
conf18
Read. Computing groups and time...
Groups and times computed. Saving...
conf19
Read. Computing groups and time...
Groups and times computed. Saving...


Computing matrices associated to size of the biggest sub-group joining/leaving

In [4]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset

for context in contexts:    
    print(context)
    #Reading groups and times
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    #Reading times of groups start and end
    FNAME = "group_times_%s.p"%context
    groups_and_times = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    print("Groups read. Computing matrices...")
    #Computing dis- and aggregation matrices 
    D, A = get_dis_agg_matrices(groups_at_t_dict, groups_and_times, max_k = 15, normed=True)
    #Converting them to dataframes
    df_D = dis_agg_matrix_to_df(D)
    df_A = dis_agg_matrix_to_df(A)
    print("Done. Saving...")
    #Saving
    OUT_FNAME = "D_%s.csv"%context
    df_D.to_csv(OUT_PATH+OUT_FNAME, header=True, index=False)   
    OUT_FNAME = "A_%s.csv"%context
    df_A.to_csv(OUT_PATH+OUT_FNAME, header=True, index=False)            

conf16
Groups read. Computing matrices...


  new_matrix = matrix / row_sums[:, np.newaxis]


Done. Saving...
conf17
Groups read. Computing matrices...


  new_matrix = matrix / row_sums[:, np.newaxis]


Done. Saving...
conf18
Groups read. Computing matrices...


  new_matrix = matrix / row_sums[:, np.newaxis]


Done. Saving...
conf19
Groups read. Computing matrices...
Done. Saving...


  new_matrix = matrix / row_sums[:, np.newaxis]


## Checking multi-membership

In [9]:
from collections import Counter, OrderedDict

IN_PATH = '../data-analysis/results/%s/'%dataset

deg_count_collection = {}
    
for context in contexts:    
    #Reading
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )

    Hs = get_Hs_from_groups_dict(groups_at_t_dict)

    #I store here the degree of all nodes at all times
    flatten_degrees = []

    for t, H in Hs.items():
        for n, k in H.degree().items():
            flatten_degrees.append(k)

    #Degree count
    deg_count = Counter(flatten_degrees)
    deg_count_collection[context] = deg_count

In [10]:
for context in contexts:
    dc = OrderedDict(deg_count_collection[context].most_common())
    print(context, dc[1]/sum(dc.values()))

conf16 0.6124441051368742
conf17 0.7598521979448185
conf18 0.7018039980497318
conf19 0.7751829826166514


## Measuring social memory

In [11]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset

for context in contexts:    
    print(context)
    #Reading
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    #Converting to xgi object
    Hs = get_Hs_from_groups_dict(groups_at_t_dict)
    print("Hypergraphs read.")
    #Reading times of groups start and end
    FNAME = "group_times_%s.p"%context
    groups_and_times = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    print("Groups and times read.")
    #Computing the cumulative networks of contacts
    Gs = get_cumulative_Gs_from_Hs(Hs)
    print("Cumulative contact graphs computed.")
    #Measuring 'social memory' dataframe
    memory_df = measure_social_memory(Hs, groups_at_t_dict, Gs, groups_and_times)
    print("Social memory dataframe computed. Saving...")
    #Saving
    OUT_FNAME = "social_memory_%s.csv.gz"%context
    memory_df.to_csv(OUT_PATH+OUT_FNAME, header=True, index=False, compression="gzip")            

conf16
Hypergraphs read.
Groups and times read.
Cumulative contact graphs computed.
Social memory dataframe computed. Saving...
conf17
Hypergraphs read.
Groups and times read.
Cumulative contact graphs computed.
Social memory dataframe computed. Saving...
conf18
Hypergraphs read.
Groups and times read.
Cumulative contact graphs computed.
Social memory dataframe computed. Saving...
conf19
Hypergraphs read.
Groups and times read.
Cumulative contact graphs computed.
Social memory dataframe computed. Saving...


## Computing inter-event times

In [12]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset

for context in contexts:    
    print(context)
    #Reading times of groups start and end
    FNAME = "group_times_%s.p"%context
    groups_and_times = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    print("Groups and times read.")
    interevent_times = get_interevent_times(groups_and_times)
    print("Interevent times computed. Saving...")
    #Saving
    OUT_FNAME = "interevent_times_%s.p"%context
    pickle.dump( interevent_times, open( OUT_PATH+OUT_FNAME, "wb" ) )            

conf16
Groups and times read.
Interevent times computed. Saving...
conf17
Groups and times read.
Interevent times computed. Saving...
conf18
Groups and times read.
Interevent times computed. Saving...
conf19
Groups and times read.
Interevent times computed. Saving...


## Computing trajectories across group sizes

In [13]:
#Input
IN_PATH = '../data-analysis/results/%s/'%dataset
#Output
OUT_PATH = '../data-analysis/results/%s/'%dataset

for context in contexts:    
    print(context)
    #Reading
    FNAME = "groups_at_t_%s.p"%context
    groups_at_t_dict = pickle.load(open(IN_PATH+FNAME, "rb" ) )
    #Converting to xgi object
    Hs = get_Hs_from_groups_dict(groups_at_t_dict)
    print("Hypergraphs read.")
    Traj, index_to_node = get_node_trajectory(Hs)
    print("Trajectory matrix computed.")
    #Saving
    OUT_FNAME = "trajectories_matrix_%s.p"%context
    pickle.dump( Traj, open( OUT_PATH+OUT_FNAME, "wb" ) )   
    OUT_FNAME = "trajectories_matrix_i2n%s.p"%context
    pickle.dump( index_to_node, open( OUT_PATH+OUT_FNAME, "wb" ) )   

conf16
Hypergraphs read.
Trajectory matrix computed.
conf17
Hypergraphs read.
Trajectory matrix computed.
conf18
Hypergraphs read.
Trajectory matrix computed.
conf19
Hypergraphs read.
Trajectory matrix computed.


## Computing the probabilities of leaving groups (to inform our Logistic function)
I need to compute the probability $p_n$ that a node leaves a group of size $k$ after a residence time there of $\tau$ timesteps. I will thus have a number of $p_k(\tau)$ curves for different $k$ values.

In [4]:
taus=np.arange(1,1000)
gsizes = [1,2,3,4,5,6,7,8,9,10]

In [5]:
for context in contexts:
    print(context)
    #Input
    IN_PATH = '../data-analysis/results/%s/'%dataset
    #Output
    OUT_PATH = '../data-analysis/results/%s/'%dataset

    #Loading group_duration
    IN_FNAME = 'gdurations_%s.p'%context
    durations = pickle.load(open(IN_PATH+IN_FNAME, "rb" ) )
    print("Computing probabilities...")
    prob_by_size = get_probs_leaving_group(durations, gsizes, taus)
    print("Done. Saving...")
    OUT_FNAME = "Prob_leaving_group_sizek_after_tau_%s.p"%context
    pickle.dump(prob_by_size, open(OUT_PATH+OUT_FNAME, "wb" ) )

conf16
Computing probabilities...
Done. Saving...
conf17
Computing probabilities...
Done. Saving...
conf18
Computing probabilities...
Done. Saving...
conf19
Computing probabilities...
Done. Saving...
