In [1]:
import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")

In [2]:
import numpy as np
import pandas as pd
import os
from os.path import join
from datetime import date, timedelta

In [3]:
def calculate_network_density(adj_matrix):
    return np.sum(adj_matrix) / (adj_matrix.shape[0] * (adj_matrix.shape[0] - 1))

In [4]:
def calculate_global_cc(adj_matrix):
    number_of_variables = adj_matrix.shape[0]
    #How many triplets/3 nodes that are connected to each other/ the total number of nodes that are connected to one another
    # Assume non-changing network, thus all nodes have all other nodes as possible neighbours
    #we count all of the nodes that are summed/triplets of nodes that can theoretically be connected with each other, network density is from a single node perspective. How
    #network density/total number of edges(constant denominator)
    nCr = number_of_variables * (number_of_variables - 1) / 2

    c_t_vector = list()
    for node_index in range(number_of_variables):
        luk = np.array(adj_matrix[node_index, :] != 1)
        neighbor_node_indices = np.arange(number_of_variables)[luk]

        ix_luk_grid = np.ix_(neighbor_node_indices, neighbor_node_indices)
        sub_matrix = adj_matrix[ix_luk_grid]

        pairs_among_neighbors = np.sum(sub_matrix) / 2
        c_t = pairs_among_neighbors / nCr

        c_t_vector.append(c_t)
    c_t_vector = np.array(c_t_vector)

    C_t = np.sum( np.multiply(nCr, c_t_vector) ) / ( nCr * len(c_t_vector) )
    return C_t

# For normal/untreated network

In [7]:
# for threshold in [0.4, 0.5, 0.6]:
threshold = 0.4
is_srd = False
start_date = date.fromisoformat("2020-04-15")
end_date   = date.fromisoformat("2021-03-16")

# netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc', 'assortativity'])
netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc'])

current_date = start_date
while current_date <= end_date:
    filepath = join('..', 'construct_network', 'data', 'processed','adjacency_matrices' f"threshold-{threshold}", f'{current_date}.pkl')
        
    filepath = os.path.abspath(filepath)
    print(f"{current_date=}")

    df = pd.read_pickle(f'../construct_network/data/processed/RSV_adjacency_matrices/threshold-{threshold}/{current_date}.pkl')

    # G = nx.from_numpy_matrix(df.to_numpy())
    # D_t = nx.function.density(G)
    D_t = calculate_network_density(df.to_numpy())
    GC_t= calculate_global_cc(df.to_numpy())

    print(end_date, D_t, GC_t)

    new_df = pd.DataFrame({
        "date": current_date,
        "density": D_t, 
        "global_cc": GC_t,
    }, index=[0])

    netstats_df = pd.concat([
        netstats_df, 
        new_df
    ],
    ignore_index=True)
    print(f"{netstats_df.shape=}")

    current_date += timedelta(days = 1)

netstats_df.to_csv(f'./processed/RSV-netstats-threshold-{threshold}.csv', index = False)

current_date=datetime.date(2020, 4, 15)
2021-03-16 0.4 0.1796825396825397
netstats_df.shape=(1, 3)
current_date=datetime.date(2020, 4, 16)
2021-03-16 0.38095238095238093 0.16825396825396827
netstats_df.shape=(2, 3)
current_date=datetime.date(2020, 4, 17)
2021-03-16 0.3333333333333333 0.1473015873015873
netstats_df.shape=(3, 3)
current_date=datetime.date(2020, 4, 18)
2021-03-16 0.3238095238095238 0.1403174603174603
netstats_df.shape=(4, 3)
current_date=datetime.date(2020, 4, 19)
2021-03-16 0.2571428571428571 0.13142857142857142
netstats_df.shape=(5, 3)
current_date=datetime.date(2020, 4, 20)
2021-03-16 0.19047619047619047 0.10857142857142857
netstats_df.shape=(6, 3)
current_date=datetime.date(2020, 4, 21)
2021-03-16 0.19047619047619047 0.10857142857142857
netstats_df.shape=(7, 3)
current_date=datetime.date(2020, 4, 22)
2021-03-16 0.19047619047619047 0.1180952380952381
netstats_df.shape=(8, 3)
current_date=datetime.date(2020, 4, 23)
2021-03-16 0.1523809523809524 0.10095238095238095
netst

  netstats_df = pd.concat([


2021-03-16 0.10476190476190476 0.07555555555555556
netstats_df.shape=(18, 3)
current_date=datetime.date(2020, 5, 3)
2021-03-16 0.09523809523809523 0.06603174603174604
netstats_df.shape=(19, 3)
current_date=datetime.date(2020, 5, 4)
2021-03-16 0.09523809523809523 0.06603174603174604
netstats_df.shape=(20, 3)
current_date=datetime.date(2020, 5, 5)
2021-03-16 0.10476190476190476 0.07301587301587302
netstats_df.shape=(21, 3)
current_date=datetime.date(2020, 5, 6)
2021-03-16 0.10476190476190476 0.07301587301587302
netstats_df.shape=(22, 3)
current_date=datetime.date(2020, 5, 7)
2021-03-16 0.11428571428571428 0.07682539682539682
netstats_df.shape=(23, 3)
current_date=datetime.date(2020, 5, 8)
2021-03-16 0.10476190476190476 0.06984126984126984
netstats_df.shape=(24, 3)
current_date=datetime.date(2020, 5, 9)
2021-03-16 0.10476190476190476 0.07238095238095238
netstats_df.shape=(25, 3)
current_date=datetime.date(2020, 5, 10)
2021-03-16 0.09523809523809523 0.0673015873015873
netstats_df.shape=(26

# For SRD network

In [36]:
# for threshold in [0.4, 0.5, 0.6]:
threshold = 0.5
is_srd = True
start_date = date.fromisoformat("2020-01-31")
end_date   = date.fromisoformat("2022-02-03")

# netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc', 'assortativity'])
netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc'])

current_date = start_date
while current_date <= end_date:

    filepath = join('..', '..', 'data', 'processed', 'adjacency_matrices', f"threshold-{threshold}", f'{current_date}.pkl')
    if is_srd:
        filepath = join('..', '..', 'data', 'processed', 'adjacency_matrices', f"threshold-{threshold}-srd", f'{current_date}.pkl')
        
    filepath = os.path.abspath(filepath)
    print(f"{current_date=}")

    df = pd.read_pickle(filepath)

    G = nx.from_numpy_matrix(df.to_numpy())
    D_t = nx.function.density(G)
    # C_t = nx.degree_assortativity_coefficient(G)
    # T_t = nx.transitivity(G)

    new_df = pd.DataFrame({
        "date": current_date,
        "density": D_t, 
        "global_cc": T_t,
        "assortativity": C_t,
    }, index=[0])

    netstats_df = pd.concat([
        netstats_df, 
        new_df
    ],
    ignore_index=True)
    print(f"{netstats_df.shape=}")

    current_date += timedelta(days = 1)

netstats_df.to_csv(f'../../data/processed/netstats/netstats-threshold-{threshold}-srd.csv', index = False)

current_date=datetime.date(2020, 1, 31)
netstats_df.shape=(1, 4)
current_date=datetime.date(2020, 2, 1)
netstats_df.shape=(2, 4)
current_date=datetime.date(2020, 2, 2)
netstats_df.shape=(3, 4)
current_date=datetime.date(2020, 2, 3)
netstats_df.shape=(4, 4)
current_date=datetime.date(2020, 2, 4)
netstats_df.shape=(5, 4)
current_date=datetime.date(2020, 2, 5)
netstats_df.shape=(6, 4)
current_date=datetime.date(2020, 2, 6)
netstats_df.shape=(7, 4)
current_date=datetime.date(2020, 2, 7)
netstats_df.shape=(8, 4)
current_date=datetime.date(2020, 2, 8)
netstats_df.shape=(9, 4)
current_date=datetime.date(2020, 2, 9)
netstats_df.shape=(10, 4)
current_date=datetime.date(2020, 2, 10)
netstats_df.shape=(11, 4)
current_date=datetime.date(2020, 2, 11)
netstats_df.shape=(12, 4)
current_date=datetime.date(2020, 2, 12)
netstats_df.shape=(13, 4)
current_date=datetime.date(2020, 2, 13)
netstats_df.shape=(14, 4)
current_date=datetime.date(2020, 2, 14)
netstats_df.shape=(15, 4)
current_date=datetime.date(2