# Average centrality_correlations for each snap and analyse result

In [None]:
from pathlib import Path

import pandas as pd
import ydata_profiling

## Load data

In [None]:
network_type = "tl123_edges"

edas = {}

for i in range(1, 4, 1):
    eda_path = Path(f"analysis/{network_type}/{i}_eda.csv")
    eda = pd.read_csv(eda_path, index_col=0)
    eda = eda.reset_index().rename(columns={"index": "node"})
    eda["snap_present"] = 1
    edas[f"{i}"] = eda


## Average results

In [None]:
# Merge DataFrames
merged_df = pd.concat([edas["1"], edas["2"], edas["3"]], keys=["snapshot1", "snapshot2", "snapshot3"])

In [None]:
# Aggregate genders
_ga = []
for snap, eda in edas.items():
    _ga.append(eda[["node", "metric_Gender"]])
genders_aggregated = pd.concat(_ga, axis=0).sort_values("node").drop_duplicates().reset_index(drop=True).set_index("node")

genders_aggregated

In [None]:
# Select only numerical columns for grouping and mean calculation
numerical_columns = merged_df.select_dtypes(include="number").columns
average_values = merged_df[numerical_columns].groupby("node").mean()

# Add column to present numer of snaphsots that particular node was present in
average_values["snap_present"] = merged_df[["snap_present", "node"]].groupby("node").sum()

average_values

In [None]:
# Merge EDA and genders
average_values_with_sex = average_values.join(genders_aggregated, on="node")

## Perform analysis

In [None]:
report = ydata_profiling.ProfileReport(
    average_values_with_sex,
    title=f"EDA of averaged snapshots",
    infer_dtypes=False,
    explorative=True,
    correlations={
        "auto": {"calculate": True},
        "pearson": {"calculate": True},
        "spearman": {"calculate": True},
    },
)

## Save output

In [None]:
average_values_with_sex.to_csv(f"analysis/{network_type}/avg_eda.csv")
report.to_file(f"analysis/{network_type}/avg_eda.html")