### Imports
---

In [1]:
import pandas as pd
import networkx as nx
from pyvis import network
import matplotlib.pyplot as plt

#utils
from src.utils.dataset import get_dataset
from src.utils.visualization import generate_network

### Dataframe preparation
---

In [2]:
transactions_df = get_dataset().head(500)

In [3]:
transactions_df.head(2)

Unnamed: 0,sender,receiver,amount
0,6456,9069,465.05
1,7516,9543,564.64


In [4]:
transactions_df.shape

(500, 3)

In [5]:
relationships_df = transactions_df.groupby(["sender", "receiver"], as_index=False).agg({"amount":  "sum"})
relationships_df.columns = ['sender','receiver','sum']
relationships_df = relationships_df.rename(columns={"sum": "value"})

In [6]:
relationships_df["title"] = relationships_df.apply(lambda df: f"from: {df.sender}\nto: {df.receiver}\namount (BRL): {df.value}", axis=1)

In [7]:
relationships_df.head(2)

Unnamed: 0,sender,receiver,value,title
0,14,2909,80.91,from: 14.0\nto: 2909.0\namount (BRL): 80.91
1,14,7000,80.91,from: 14.0\nto: 7000.0\namount (BRL): 80.91


### Network building
---

In [11]:
relationship_network = nx.from_pandas_edgelist(relationships_df, source="sender", target="receiver", edge_attr=["title","value"])

In [12]:
degree_dict_G = dict(relationship_network.degree)
nx.set_node_attributes(relationship_network, degree_dict_G, "value")

In [13]:
title_dict_G = {}
for node in list(relationship_network.nodes):
    title_dict_G[node] = f"id: {node}"
nx.set_node_attributes(relationship_network, title_dict_G, "title")

In [14]:
generate_network(relationship_network, "pyvis_network.html", physics=False)

pyvis_network.html


### Setting groups
---

In [11]:
group_dict_H = {}
for group, nodes in enumerate(sorted(list(nx.connected_components(relationship_network)), key=len, reverse=True), start=1):
    for node in nodes:
        group_dict_H[node] = group

In [12]:
groups = pd.DataFrame({"source":group_dict_H.keys(), "group":group_dict_H.values()}).sort_values(by=["group","source"])
group_summary = groups.groupby("group", as_index=False).agg({"source":"count"}).rename(columns={"source":"num_of_nodes"})

In [13]:
nx.set_node_attributes(relationship_network, group_dict_H, "group")

degree_dict_H = dict(relationship_network.degree)
nx.set_node_attributes(relationship_network, degree_dict_H, "value")

neighbor_dict_H = {}
for node in list(relationship_network.nodes):
    neighbor_dict_H[node] = ','.join(str(relationship_network.neighbors(node)))

title_dict_H = {}
for node in list(relationship_network.nodes):
    title_dict_H[node] = f"id: {node}\ngroup: {group_dict_H[node]}\ndegree: {degree_dict_H[node]}"
nx.set_node_attributes(relationship_network, title_dict_H, "title")

In [None]:
generate_network(relationship_network, "pyvis_network.html", physics=True)