# 2. Análise inicial

## 2.1. Imports

In [1]:
import os

from pathlib import Path

import duckdb
import igraph as ig
import pandas as pd

from event import Event

from dotenv import load_dotenv

load_dotenv()

PROJECT_DIR = Path("~/tramita").expanduser()
DB_PATH = PROJECT_DIR / os.getenv("SILVER_DUCKDB_PATH", "")
OUT_DIR = PROJECT_DIR / "data" / "gold"
OUT_DIR.mkdir(exist_ok=True)
ACCESS_DIR = OUT_DIR / "accessory_data"
ACCESS_DIR.mkdir(exist_ok=True)

NODES_PATH_PARQUET = OUT_DIR / "nodes.parquet"
EDGES_PATH_PARQUET = OUT_DIR / "edges.parquet"
NODES_PATH_CSV = OUT_DIR / "nodes.csv"
EDGES_PATH_CSV = OUT_DIR / "edges.csv"

## 2.1. Instanciamento do grafo

In [2]:
edges_df = pd.read_csv(EDGES_PATH_CSV)
edges_df = edges_df.rename(columns={'source': 'from', 'target': 'to'})
edges_df.head()

Unnamed: 0,from,to,etype
0,CD:160655,CP:538196,autoria
1,CD:141488,CP:559138,autoria
2,CD:73584,CP:593065,autoria
3,CD:160518,CP:601739,autoria
4,CD:151208,CP:614512,autoria


In [3]:

nodes_df = pd.read_csv(NODES_PATH_CSV)
nodes_df = nodes_df.rename(columns={"tag": "name"})
nodes_df.head()

Unnamed: 0,name,label,partido,type
0,CP:2187087,PL 5029/2019,,Proposicao
1,CP:2190408,PL 2/2019,,Proposicao
2,CP:2190417,PL 10/2019,,Proposicao
3,CP:2190423,PL 15/2019,,Proposicao
4,CP:2190450,PL 21/2019,,Proposicao


In [4]:
edge_tuples = list(zip(edges_df['from'], edges_df['to']))
g = ig.Graph.TupleList(
    edge_tuples,
    directed=False,
    vertex_name_attr="name"
)

In [5]:
for col in nodes_df.columns:
    if col != "name":
        g.vs[col] = nodes_df.set_index("name").loc[g.vs["name"], col].tolist()

In [6]:
for col in edges_df.columns:
    if col not in ("from", "to"):
        g.es[col] = edges_df[col].tolist()

In [7]:
print(g.summary())

IGRAPH UN-T 30938 51532 -- 
+ attr: label (v), name (v), partido (v), type (v), etype (e)


In [8]:
summary = g.summary()
vcount, ecount = g.vcount(), g.ecount()
density = g.density()
components = g.components()
component_sizes = pd.Series([len(c) for c in components], name="size").to_frame()
component_sizes["component_id"] = component_sizes.index
component_sizes = component_sizes[["component_id","size"]].sort_values("size", ascending=False).reset_index(drop=True)

print(f"|V| = {vcount}")
print(f"|E| = {ecount}")
print(f"Densidade: {density}")
print(f"{len(components)} componentes conexos.")
component_sizes

|V| = 30938
|E| = 51532
Densidade: 0.00010768037550001651
105 componentes conexos.


Unnamed: 0,component_id,size
0,0,28726
1,6,1374
2,7,60
3,65,45
4,14,42
...,...,...
100,84,2
101,73,2
102,96,2
103,90,2


In [9]:
comp_id_map = {}
for cid, comp in enumerate(components):
    for vid in comp:
        comp_id_map[vid] = cid
        
degree_all   = g.degree()
eigenvector  = g.eigenvector_centrality()
community_method = None
cl = g.community_leiden(objective_function="modularity")
membership = cl.membership
vertex_metrics = pd.DataFrame({
    "id":                 g.vs["name"],
    "type":               g.vs["type"],
    "label": g.vs["label"],
    "degree":             degree_all,
    "eigenvector":        eigenvector,
    "community_id":       membership,
    "component_id":       [comp_id_map[i] for i in range(vcount)],
})

In [10]:
def top_n(df: pd.DataFrame, col: str, n: int = 10, node_types: list | None = None) -> pd.DataFrame:
    sub = df if node_types is None else df[df["type"].isin(node_types)]
    return sub.sort_values(col, ascending=False).head(n).reset_index(drop=True)


In [11]:
top10_deps_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Deputado"])
top10_sens_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Senador"])
top10_orgs_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Orgao"])
top10_ents_eigen = top_n(vertex_metrics, "eigenvector", 10, node_types=["Ente"])


In [12]:
top10_deps_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,CD:204467,Deputado,ROSA NEIDE SANDES DE ALMEIDA,235,1.0,2,0
1,CD:178970,Deputado,JOAO SOMARIVA DANIEL,259,0.99099,2,0
2,CD:178986,Deputado,NILTO IGNACIO TATTO,253,0.986401,2,0
3,CD:74160,Deputado,PATRUS ANANIAS DE SOUZA,199,0.983884,2,0
4,CD:160535,Deputado,DIONILSO MATEUS MARCON,183,0.977833,2,0
5,CD:204393,Deputado,ANTONIO RIBEIRO,202,0.953757,2,0
6,CD:160610,Deputado,VALMIR CARLOS DA ASSUNÇÃO,195,0.9506,2,0
7,CD:204555,Deputado,JOSÉ RICARDO WENDLING,186,0.94469,2,0
8,CD:204480,Deputado,ROGÉRIO CORREIA DE MOURA BAPTISTA,191,0.938111,2,0
9,CD:204370,Deputado,CELIO ALVES DE MOURA,158,0.927276,2,0


In [13]:
top10_sens_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,SS:345,Senador,Flávio Arns,77,5.360081e-10,62,0
1,SS:5748,Senador,Veneziano Vital do Rêgo,80,5.005303e-10,62,0
2,SS:825,Senador,Paulo Paim,112,4.554665e-10,62,0
3,SS:5895,Senador,Jorge Kajuru,79,4.082226e-10,62,0
4,SS:22,Senador,Esperidião Amin,62,3.710263e-10,62,0
5,SS:5985,Senador,Nelsinho Trad,57,3.61957e-10,62,0
6,SS:5979,Senador,Leila Barros,67,3.558566e-10,62,0
7,SS:1173,Senador,Wellington Fagundes,44,3.553413e-10,62,0
8,SS:5953,Senador,Fabiano Contarato,63,3.498572e-10,62,0
9,SS:475,Senador,Confúcio Moura,70,3.220067e-10,62,0


In [14]:
top10_orgs_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,CO:100292,Orgao,CÂMARA DOS DEPUTADOS,2,2.018856e-05,7,0
1,CO:78,Orgao,Senado Federal,894,3.588246e-06,62,0
2,CO:4,Orgao,Mesa Diretora da Câmara dos Deputados,2,8.684608e-09,7,0
3,CO:6066,Orgao,Comissão de Turismo,1,4.34137e-09,7,0
4,CO:538490,Orgao,Comissão especial destinada a acompanhar as aç...,1,4.34137e-09,7,0
5,CO:2014,Orgao,Comissão de Saúde,8,0.0,93,19
6,CO:81,Orgao,Superior Tribunal de Justiça,4,0.0,81,12
7,CO:5438,Orgao,Comissão de Legislação Participativa,41,0.0,83,14
8,CO:101347,Orgao,Procuradoria-Geral da República,2,0.0,88,16
9,CO:253,Orgao,Poder Executivo,591,0.0,61,6


In [15]:
top10_ents_eigen

Unnamed: 0,id,type,label,degree,eigenvector,community_id,component_id
0,SE:2,Ente,Câmara dos Deputados,366,0.001376569,7,0
1,SE:1,Ente,Senado Federal,2,5.263602e-08,62,0
2,SE:7352398,Ente,CPI da Pandemia,5,3.397862e-11,62,0
3,SE:3947422,Ente,Comissão de Direitos Humanos e Legislação Part...,6,2.265728e-11,62,0
4,SE:55226,Ente,Comissão Diretora,2,2.26378e-11,62,0
5,SE:3927825,Ente,Comissão de Meio Ambiente,2,2.26378e-11,62,0
6,SE:9999990,Ente,Superior Tribunal de Justiça,3,0.0,81,12
7,SE:55126,Ente,Presidência da República,615,0.0,61,6
8,SE:55143,Ente,Tribunal de Justiça do Distrito Federal e Terr...,4,0.0,115,36
9,SE:7352682,Ente,Forum Nacional de Comitês Hidrográficas Brasil,1,0.0,61,6
