# Import Modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import torch
import scipy.sparse as sp
import torch.nn as nn
from node2vec import Node2Vec
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score
import itertools
import dgl

  from .autonotebook import tqdm as notebook_tqdm


# Import Data

In [6]:
# Import the data
# df = pd.read_csv('../RVF_ATX_PID_HZ-2020-07.tsv', sep='\t')
df = pd.read_csv('../RVF_ATX_PID_HZ-2020-08.tsv', sep='\t')
# filter out certain columns
df = df.drop(columns=['local_date', 'local_hour', 'gender', 'age', 'full_panel_reweighted_sag_score', 'home_zip'])
# dwell_time_threshold = 60
dwell_time_threshold = 0
df = df[df['dwell'] >= dwell_time_threshold]
df.head()

Unnamed: 0,persistentid,venueid,utc_date,utc_hour,dwell
0,2691ab5b6c9ba0ffe1fd7d09a623db93430c26f0b018f8...,40b52f80f964a52051001fe3,2020-08-15,18,8939
1,41d579df6be987aeaee224fc657e7f4e9fe9cff6e12403...,40b52f80f964a52051001fe3,2020-08-15,21,185
2,5cd56741546376e88b3e6d826d58eeaf5bafd33dcfc7f4...,40b52f80f964a52051001fe3,2020-08-23,19,2526
3,7918df0b005f4fb5b1ec9707f89da2477788aabf4a268d...,40b52f80f964a52051001fe3,2020-08-25,20,331
4,11ab84bc51dd4b8591715a5bd8818289b51eb5c56e65f7...,40b52f80f964a52051001fe3,2020-08-30,21,805


# Generate 31 Contact networks, one for each day

In [7]:
def add_edges_within_group(group, network):
    # group is a dataframe
    # G is a graph
    # add edges between all pairs of nodes in the group
    num_rows = group.shape[0]
    if num_rows == 1:
        return
    for i in range(num_rows):
        for j in range(i+1, num_rows):
            # node1 = group.iloc[i]['persistentid']
            # node2 = group.iloc[j]['persistentid']

            node1 = group['persistentid'].iloc[i]
            node2 = group['persistentid'].iloc[j]

            # if G.has_edge(node1, node2):
            #     G[node1][node2]['weight'] += 1
            # else:
            #     G.add_edge(node1, node2, weight=1)
            if node1 != node2:
                # G.add_edge(node1, node2)
                network.loc[len(network)] = [node1, node2]
            

for i in range(31):
    # date = '2020-07-' + str(i+1).zfill(2)
    date = '2020-08-' + str(i+1).zfill(2)
    # print(date)

    # filter the data for date
    df_date_filtered = df[df['utc_date'] == date]
    # print(len(df_date_filtered))

    # group the data by venue_id
    df_date_filtered_grouped_by_venue = df_date_filtered.groupby(['venueid', 'utc_hour'])

    network = pd.DataFrame(columns=['node1', 'node2'])
    df_date_filtered_grouped_by_venue.apply(add_edges_within_group, network)

    G = nx.from_pandas_edgelist(network, 'node1', 'node2')

    # save the graph G as a .gexf file
    target_file_location = f"contact_graphs_no_dwell_threshold/{date}.gexf"
    nx.write_gexf(G, target_file_location)


# def add_edges_within_group(group, G):
#     # group is a dataframe
#     # G is a graph
#     # add edges between all pairs of nodes in the group
#     num_rows = group.shape[0]
#     if num_rows == 1:
#         return
#     for i in range(num_rows):
#         for j in range(i+1, num_rows):
#             node1 = group.iloc[i]['persistentid']
#             node2 = group.iloc[j]['persistentid']
#             # if G.has_edge(node1, node2):
#             #     G[node1][node2]['weight'] += 1
#             # else:
#             #     G.add_edge(node1, node2, weight=1)
#             if node1 != node2:
#                 G.add_edge(node1, node2)
            

# for i in range(31):
#     date = '2020-07-' + str(i+1).zfill(2)
#     # print(date)

#     # filter the data for date
#     df_date_filtered = df[df['utc_date'] == date]
#     # print(len(df_date_filtered))

#     # group the data by venue_id
#     df_date_filtered_grouped_by_venue = df_date_filtered.groupby(['venueid', 'utc_hour'])

#     G = nx.Graph()
#     df_date_filtered_grouped_by_venue.apply(add_edges_within_group, G=G)

#     # save the graph G as a .gexf file
#     target_file_location = f"contact_graphs/{date}.gexf"
#     nx.write_gexf(G, target_file_location)



In [4]:
print(G.number_of_edges())
print(G.number_of_nodes())

4838
4102
