# 2. Análise inicial

## 2.1. Imports

In [1]:
import os

from pathlib import Path

import duckdb
import igraph as ig
import pandas as pd

from event import Event

from dotenv import load_dotenv

load_dotenv()

PROJECT_DIR = Path("~/tramita").expanduser()
DB_PATH = PROJECT_DIR / os.getenv("SILVER_DUCKDB_PATH", "")
OUT_DIR = PROJECT_DIR / "data" / "gold"
OUT_DIR.mkdir(exist_ok=True)
ACCESS_DIR = OUT_DIR / "accessory_data"
ACCESS_DIR.mkdir(exist_ok=True)

NODES_PATH_PARQUET = OUT_DIR / "nodes.parquet"
EDGES_PATH_PARQUET = OUT_DIR / "edges.parquet"
NODES_PATH_CSV = OUT_DIR / "nodes.csv"
EDGES_PATH_CSV = OUT_DIR / "edges.csv"

## 2.1. Instanciamento do grafo

In [2]:
edges_df = pd.read_csv(EDGES_PATH_CSV)
edges_df = edges_df.rename(columns={'source': 'from', 'target': 'to'})
edges_df.head()

Unnamed: 0,from,to,etype
0,CD:160655,CP:538196,autoria
1,CD:141488,CP:559138,autoria
2,CD:73584,CP:593065,autoria
3,CD:160518,CP:601739,autoria
4,CD:151208,CP:614512,autoria


In [3]:

nodes_df = pd.read_csv(NODES_PATH_CSV)
nodes_df = nodes_df.rename(columns={"tag": "name"})
nodes_df.head()

Unnamed: 0,name,label,partido,type
0,CP:2187087,PL 5029/2019,,Proposicao
1,CP:2190408,PL 2/2019,,Proposicao
2,CP:2190417,PL 10/2019,,Proposicao
3,CP:2190423,PL 15/2019,,Proposicao
4,CP:2190450,PL 21/2019,,Proposicao


In [4]:
edge_tuples = list(zip(edges_df['from'], edges_df['to']))
g = ig.Graph.TupleList(
    edge_tuples,
    directed=False,
    vertex_name_attr="name"
)

In [5]:
for col in nodes_df.columns:
    if col != "name":
        g.vs[col] = nodes_df.set_index("name").loc[g.vs["name"], col].tolist()

In [6]:
for col in edges_df.columns:
    if col not in ("from", "to"):
        g.es[col] = edges_df[col].tolist()

In [7]:
print(g.summary())

IGRAPH UN-T 30938 51547 -- 
+ attr: label (v), name (v), partido (v), type (v), etype (e)


In [8]:
summary = g.summary()
vcount, ecount = g.vcount(), g.ecount()
density = g.density()
components = g.components()
component_sizes = pd.Series([len(c) for c in components], name="size").to_frame()
component_sizes["component_id"] = component_sizes.index
component_sizes = component_sizes[["component_id","size"]].sort_values("size", ascending=False).reset_index(drop=True)

print(f"|V| = {vcount}")
print(f"|E| = {ecount}")
print(f"Densidade: {density}")
print(f"{len(components)} componentes conexos.")
component_sizes

|V| = 30938
|E| = 51547
Densidade: 0.00010771171924045933
102 componentes conexos.


Unnamed: 0,component_id,size
0,0,28735
1,5,1374
2,6,60
3,62,45
4,13,42
...,...,...
97,81,2
98,70,2
99,93,2
100,87,2


In [9]:
comp_id_map = {}
for cid, comp in enumerate(components):
    for vid in comp:
        comp_id_map[vid] = cid
        
degree_all   = g.degree()
eigenvector  = g.eigenvector_centrality()
community_method = None
cl = g.community_leiden(objective_function="modularity")
membership = cl.membership
vertex_metrics = pd.DataFrame({
    "id":                 g.vs["name"],
    "type":               g.vs["type"],
    "label": g.vs["label"],
    "degree":             degree_all,
    "eigenvector":        eigenvector,
    "community_id":       membership,
    "component_id":       [comp_id_map[i] for i in range(vcount)],
})

  eigenvector  = g.eigenvector_centrality()


In [10]:
def top_n(df: pd.DataFrame, col: str, n: int = 10, node_types: list | None = None) -> pd.DataFrame:
    sub = df if node_types is None else df[df["type"].isin(node_types)]
    return sub.sort_values(col, ascending=False).head(n).reset_index(drop=True)


In [11]:
top10_deps_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Deputado"])
top10_sens_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Senador"])
top10_orgs_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Orgao"])
top10_ents_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Ente"])


In [12]:
top10_deps_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,CD:204467,Deputado,ROSA NEIDE SANDES DE ALMEIDA,235,1.0,2,0
1,CD:178970,Deputado,JOAO SOMARIVA DANIEL,259,0.99099,2,0
2,CD:178986,Deputado,NILTO IGNACIO TATTO,253,0.986401,2,0
3,CD:74160,Deputado,PATRUS ANANIAS DE SOUZA,199,0.983884,2,0
4,CD:160535,Deputado,DIONILSO MATEUS MARCON,183,0.977833,2,0
5,CD:204393,Deputado,ANTONIO RIBEIRO,202,0.953757,2,0
6,CD:160610,Deputado,VALMIR CARLOS DA ASSUNÇÃO,195,0.9506,2,0
7,CD:204555,Deputado,JOSÉ RICARDO WENDLING,186,0.944689,2,0
8,CD:204480,Deputado,ROGÉRIO CORREIA DE MOURA BAPTISTA,191,0.938111,2,0
9,CD:204370,Deputado,CELIO ALVES DE MOURA,158,0.927276,2,0


In [13]:
top10_sens_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,SS:4639,Senador,JOSÉ ROBERTO OLIVEIRA FARO,3,0.01063,14,0
1,SS:5386,Senador,MARIA AUXILIADORA SEABRA REZENDE,4,0.000388,14,0
2,SS:5672,Senador,ALAN RICK MIRANDA,9,0.000213,14,0
3,SS:5793,Senador,HIRAN MANUEL GONÇALVES DA SILVA,3,0.000161,14,0
4,SS:5352,Senador,ROGÉRIO CARVALHO SANTOS,97,0.000107,14,0
5,SS:5902,Senador,EDUARDO OVÍDIO BORGES DE VELLOSO VIANNA,3,6.7e-05,14,0
6,SS:4642,Senador,EFRAIM DE ARAÚJO MORAIS FILHO,8,2.8e-05,14,0
7,SS:5736,Senador,TEREZA CRISTINA CORREA DA COSTA DIAS,8,5e-06,14,0
8,SS:5502,Senador,FRANCISCO PLÍNIO VALÉRIO TOMAZ,64,4e-06,14,0
9,SS:4770,Senador,IZALCI LUCAS FERREIRA,63,3e-06,14,0


In [14]:
top10_orgs_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,CO:100292,Orgao,CÂMARA DOS DEPUTADOS,2,2.018846e-05,1,0
1,CO:78,Orgao,SENADO FEDERAL,894,3.695125e-06,14,0
2,CO:4,Orgao,MESA DIRETORA DA CÂMARA DOS DEPUTADOS,2,8.684532e-09,1,0
3,CO:538490,Orgao,COMISSÃO ESPECIAL DESTINADA A ACOMPANHAR AS AÇ...,1,4.341332e-09,1,0
4,CO:6066,Orgao,COMISSÃO DE TURISMO,1,4.341332e-09,1,0
5,CO:2014,Orgao,COMISSÃO DE SAÚDE,8,0.0,106,18
6,CO:81,Orgao,SUPERIOR TRIBUNAL DE JUSTIÇA,4,0.0,92,11
7,CO:5438,Orgao,COMISSÃO DE LEGISLAÇÃO PARTICIPATIVA,41,0.0,95,13
8,CO:101347,Orgao,PROCURADORIA-GERAL DA REPÚBLICA,2,0.0,99,15
9,CO:253,Orgao,PODER EXECUTIVO,591,0.0,62,5


In [15]:
top10_ents_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,SE:2,Ente,CÂMARA DOS DEPUTADOS,366,0.001376565,1,0
1,SE:1,Ente,SENADO FEDERAL,2,5.420371e-08,14,0
2,SE:7352398,Ente,CPI DA PANDEMIA,5,3.499044e-11,14,0
3,SE:3947422,Ente,COMISSÃO DE DIREITOS HUMANOS E LEGISLAÇÃO PART...,6,2.333195e-11,14,0
4,SE:3927825,Ente,COMISSÃO DE MEIO AMBIENTE,2,2.331192e-11,14,0
5,SE:55226,Ente,COMISSÃO DIRETORA,2,2.331192e-11,14,0
6,SE:9999990,Ente,SUPERIOR TRIBUNAL DE JUSTIÇA,3,0.0,92,11
7,SE:55126,Ente,PRESIDÊNCIA DA REPÚBLICA,615,0.0,62,5
8,SE:55143,Ente,TRIBUNAL DE JUSTIÇA DO DISTRITO FEDERAL E TERR...,4,0.0,137,34
9,SE:7352682,Ente,FORUM NACIONAL DE COMITÊS HIDROGRÁFICAS BRASIL,1,0.0,62,5
