In [44]:
import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")

In [45]:
import numpy as np
import pandas as pd
import os
from os.path import join
from datetime import date, timedelta

In [46]:
def calculate_network_density(adj_matrix):
    return np.sum(adj_matrix) / (adj_matrix.shape[0] * (adj_matrix.shape[0] - 1))

In [37]:
def calculate_global_cc(adj_matrix):
    number_of_variables = adj_matrix.shape[0]
    #How many triplets/3 nodes that are connected to each other/ the total number of nodes that are connected to one another
    # Assume non-changing network, thus all nodes have all other nodes as possible neighbours
    #we count all of the nodes that are summed/triplets of nodes that can theoretically be connected with each other, network density is from a single node perspective. How
    #network density/total number of edges(constant denominator)
    nCr = number_of_variables * (number_of_variables - 1) / 2

    c_t_vector = list()
    for node_index in range(number_of_variables):
        luk = np.array(adj_matrix[node_index, :] != 1)
        neighbor_node_indices = np.arange(number_of_variables)[luk]

        ix_luk_grid = np.ix_(neighbor_node_indices, neighbor_node_indices)
        sub_matrix = adj_matrix[ix_luk_grid]

        pairs_among_neighbors = np.sum(sub_matrix) / 2
        c_t = pairs_among_neighbors / nCr

        c_t_vector.append(c_t)
    c_t_vector = np.array(c_t_vector)

    C_t = np.sum( np.multiply(nCr, c_t_vector) ) / ( nCr * len(c_t_vector) )
    return C_t

# For normal/untreated network

In [49]:
# for threshold in [0.4, 0.5, 0.6]:
threshold = 0.6
is_srd = False
start_date = date.fromisoformat("2020-04-15")
end_date   = date.fromisoformat("2021-03-16")

# netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc', 'assortativity'])
netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc'])

current_date = start_date
while current_date <= end_date:
    filepath = join('..', 'construct_network', 'data', 'processed','d_corr_adjacency_matrices' f"threshold-{threshold}", f'{current_date}.pkl')
        
    filepath = os.path.abspath(filepath)
    print(f"{current_date=}")

    df = pd.read_pickle(f'../construct_network/data/processed/d_corr_adjacency_matrices/threshold-{threshold}/{current_date}.pkl')

    # G = nx.from_numpy_matrix(df.to_numpy())
    # D_t = nx.function.density(G)
    D_t = calculate_network_density(df.to_numpy())
    GC_t= calculate_global_cc(df.to_numpy())

    print(end_date, D_t, GC_t)

    new_df = pd.DataFrame({
        "date": current_date,
        "density": D_t, 
        "global_cc": GC_t,
    }, index=[0])

    netstats_df = pd.concat([
        netstats_df, 
        new_df
    ],
    ignore_index=True)
    print(f"{netstats_df.shape=}")

    current_date += timedelta(days = 1)

netstats_df.to_csv(f'./processed/d_corr-netstats-threshold-{threshold}.csv', index = False)

current_date=datetime.date(2020, 4, 15)
2021-03-16 0.4142857142857143 0.15555555555555556
netstats_df.shape=(1, 3)
current_date=datetime.date(2020, 4, 16)
2021-03-16 0.40476190476190477 0.15619047619047619
netstats_df.shape=(2, 3)
current_date=datetime.date(2020, 4, 17)
2021-03-16 0.2904761904761905 0.15301587301587302
netstats_df.shape=(3, 3)
current_date=datetime.date(2020, 4, 18)
2021-03-16 0.22380952380952382 0.1434920634920635
netstats_df.shape=(4, 3)
current_date=datetime.date(2020, 4, 19)
2021-03-16 0.19523809523809524 0.14793650793650795
netstats_df.shape=(5, 3)
current_date=datetime.date(2020, 4, 20)
2021-03-16 0.24285714285714285 0.1676190476190476
netstats_df.shape=(6, 3)
current_date=datetime.date(2020, 4, 21)
2021-03-16 0.22380952380952382 0.16
netstats_df.shape=(7, 3)
current_date=datetime.date(2020, 4, 22)
2021-03-16 0.23333333333333334 0.1580952380952381
netstats_df.shape=(8, 3)
current_date=datetime.date(2020, 4, 23)
2021-03-16 0.20476190476190476 0.15047619047619049
n

  netstats_df = pd.concat([


2021-03-16 0.14761904761904762 0.11682539682539683
netstats_df.shape=(46, 3)
current_date=datetime.date(2020, 5, 31)
2021-03-16 0.11904761904761904 0.09968253968253968
netstats_df.shape=(47, 3)
current_date=datetime.date(2020, 6, 1)
2021-03-16 0.10952380952380952 0.09333333333333334
netstats_df.shape=(48, 3)
current_date=datetime.date(2020, 6, 2)
2021-03-16 0.10952380952380952 0.09333333333333334
netstats_df.shape=(49, 3)
current_date=datetime.date(2020, 6, 3)
2021-03-16 0.11904761904761904 0.10031746031746032
netstats_df.shape=(50, 3)
current_date=datetime.date(2020, 6, 4)
2021-03-16 0.12857142857142856 0.10603174603174603
netstats_df.shape=(51, 3)
current_date=datetime.date(2020, 6, 5)
2021-03-16 0.12857142857142856 0.10603174603174603
netstats_df.shape=(52, 3)
current_date=datetime.date(2020, 6, 6)
2021-03-16 0.14761904761904762 0.11555555555555555
netstats_df.shape=(53, 3)
current_date=datetime.date(2020, 6, 7)
2021-03-16 0.12857142857142856 0.10603174603174603
netstats_df.shape=(5

# For SRD network

In [8]:
# for threshold in [0.4, 0.5, 0.6]:
threshold = 0.5
is_srd = True
start_date = date.fromisoformat("2020-01-31")
end_date   = date.fromisoformat("2022-02-03")

# netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc', 'assortativity'])
netstats_df = pd.DataFrame(columns = ['date', 'density', 'global_cc'])

current_date = start_date
while current_date <= end_date:

    filepath = join('..', '..', 'data', 'processed', 'd_corr_RSV_adjacency_matrices', f"threshold-{threshold}", f'{current_date}.pkl')
    if is_srd:
        filepath = join('..', '..', 'data', 'processed', 'd_corr_RSV_adjacency_matrices', f"threshold-{threshold}-srd", f'{current_date}.pkl')
        
    filepath = os.path.abspath(filepath)
    print(f"{current_date=}")

    df = pd.read_pickle(filepath)

    G = nx.from_numpy_matrix(df.to_numpy())
    D_t = nx.function.density(G)
    # C_t = nx.degree_assortativity_coefficient(G)
    # T_t = nx.transitivity(G)

    new_df = pd.DataFrame({
        "date": current_date,
        "density": D_t, 
        "global_cc": T_t,
        "assortativity": C_t,
    }, index=[0])

    netstats_df = pd.concat([
        netstats_df, 
        new_df
    ],
    ignore_index=True)
    print(f"{netstats_df.shape=}")

    current_date += timedelta(days = 1)

netstats_df.to_csv(f'../../data/processed/netstats/RSV-netstats-threshold-{threshold}-srd.csv', index = False)

current_date=datetime.date(2020, 1, 31)


FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\Cheska\\Documents\\GitHub\\-Thesis-Infodemiology-GT\\covid-19-google-trends-network\\data\\processed\\d_corr_RSV_adjacency_matrices\\threshold-0.5-srd\\2020-01-31.pkl'