In [None]:
import copy
from pathlib import Path

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import ydata_profiling

## Select paths and load data

In [None]:
reports_path = Path("analysis")
reports_path.mkdir(exist_ok=True, parents=True)

In [None]:
snapshot = 3``
net_path = Path(f"networks/{snapshot}_ego_edges.csv")
nodes_path = Path(f"networks/{snapshot}_nodes.csv")

In [None]:
nodes_df = pd.read_csv(nodes_path, index_col=0)
nodes_df.head()

In [None]:
_edges = pd.read_csv(net_path, index_col=0)
net = nx.from_pandas_edgelist(_edges, create_using=nx.DiGraph, edge_attr="weight")

In [None]:
len(net.edges), len(net.nodes)

In [None]:
nx.draw_networkx(net, pos=nx.spring_layout(net))  

## Centrality computations

In [None]:
lst_metrics = []

_unweighted_net = copy.deepcopy(net)
for (n1, n2, d) in _unweighted_net.edges(data=True):
    d.clear()

lst_metrics.append({"in_degree_weighted": dict(net.in_degree(weight="weight"))})
lst_metrics.append({"in_degree": dict(net.in_degree())})

lst_metrics.append({"out_degree_weighted": dict(net.out_degree(weight="weight"))})
lst_metrics.append({"out_degree": dict(net.out_degree())})

lst_metrics.append({"betweenness_weighted": nx.betweenness_centrality(net, weight="weight")})
lst_metrics.append({"betweenness": nx.betweenness_centrality(net)})

lst_metrics.append({"closeness": nx.closeness_centrality(net)})

lst_metrics.append({"pagerank_weighhted": nx.pagerank(net, weight="weight")})

lst_metrics.append({"pagerank": nx.pagerank(_unweighted_net)})

lst_metrics.append({"voterank": {n: idx for idx, n in enumerate(nx.voterank(net))}})

In [None]:
dict_metrics = {k:v for lm in lst_metrics for k, v in lm.items()}
df_metrics = pd.DataFrame(dict_metrics)

df_metrics.head()

## Merge with node attrs

In [None]:
investigated_node_attrs = nodes_df[["metric_postsojournOPI", "metric_presojournOPI", "metric_level gained"]]

investigated_node_attrs.head()

In [None]:
merged_df = pd.merge(investigated_node_attrs, df_metrics, left_index=True, right_index=True)

merged_df.head()

## Compute correlations

In [None]:
report = ydata_profiling.ProfileReport(merged_df, title=f"EDA of snapshot {snapshot}")
report.to_file(reports_path / f"{snapshot}_eda.html")