# 5. Análise de comunidades por casa legislativa

## 5.1. Preparação

### 5.1.1. Imports

In [3]:

import os

from pathlib import Path
from collections import defaultdict
import itertools
# import duckdb
import numpy as np
import pandas as pd
import igraph as ig

from event import Event

from dotenv import load_dotenv

load_dotenv()
pd.set_option('display.max_rows', 20)

PROJECT_DIR = Path("~/tramita").expanduser()
DB_PATH = PROJECT_DIR / os.getenv("SILVER_DUCKDB_PATH", "")
OUT_DIR = PROJECT_DIR / "data" / "gold"
OUT_DIR.mkdir(exist_ok=True)
ACCESS_DIR = OUT_DIR / "accessory_data"
ACCESS_DIR.mkdir(exist_ok=True)

NODES_PATH_PARQUET = OUT_DIR / "nodes.parquet"
EDGES_PATH_PARQUET = OUT_DIR / "edges.parquet"
NODES_PATH_CSV = OUT_DIR / "nodes.csv"
EDGES_PATH_CSV = OUT_DIR / "edges.csv"

# with duckdb.connect(DB_PATH, read_only=True) as con:

COLLAB_NODES_CSV = OUT_DIR / "nodes_collab.csv"
COLLAB_EDGES_CSV = OUT_DIR / "edges_collab.csv"

### 5.1.2. Funções auxiliares

In [4]:
def build_graph(node_df: pd.DataFrame, edge_df: pd.DataFrame) -> ig.Graph:
    """
    Builds an igraph.Graph from a node and edge list.
    The node DataFrame must have a 'name' column with the unique identifiers.
    The edge DataFrame must have 'from' and 'to' columns.
    Any other columns will be absorbed as attributes.
    """
    edge_tuples = list(zip(edge_df['from'], edge_df['to']))
    g = ig.Graph.TupleList(
        edge_tuples,
        directed=False,
        vertex_name_attr="name",
        weights=True,
    )
    for col in node_df.columns:
        if col != "name":
            g.vs[col] = node_df.set_index("name").loc[g.vs['name'], col].tolist()

    for col in edge_df.columns:
        if col not in ("from", "to"):
            g.es[col] = edge_df[col].tolist()
    
    return g


def prune_graph(
    node_df: pd.DataFrame,
    edge_df: pd.DataFrame,
    tag_col: str = "name",
    from_col: str = "from",
    to_col: str = "to",
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Removes any orphan nodes, then edges, until every node has degree at least 1."""
    def _prune_nodes(node_df: pd.DataFrame, edge_df: pd.DataFrame) -> pd.DataFrame:
        return node_df[
            (node_df[tag_col].isin(edge_df[from_col]))
            | (node_df[tag_col].isin(edge_df[to_col]))
        ]
        
        
    def _prune_edges(node_df: pd.DataFrame, edge_df: pd.DataFrame) -> pd.DataFrame:
        return edge_df[
            (edge_df[from_col].isin(node_df[tag_col]))
            & (edge_df[to_col].isin(node_df[tag_col]))
        ]

    N = len(node_df)
    M = len(edge_df)

    while True:
        n = len(node_df)
        m = len(edge_df)
        node_df = _prune_nodes(node_df, edge_df)
        edge_df = _prune_edges(node_df, edge_df)
        if n == len(node_df) and m == len(edge_df):
            print(f"{N - n} node(s) dropped.")
            print(f"{M - m} edges(s) dropped.")
            return node_df, edge_df
        
def gini_normalized(node_df: pd.DataFrame, community_column: str = "leiden_mod") -> pd.Series:
    count_pivot_df = node_df.pivot_table(
        index='cod_partido', 
        columns=community_column, 
        aggfunc='size', 
        fill_value=0
    )
    proportion_pivot_df = count_pivot_df.div(count_pivot_df.sum(axis=1), axis=0)

    K = count_pivot_df.shape[1]  # no. of communities

    def sample_spread(row: pd.Series) -> float:
        k = len(row)
        spr = 0.0
        for u in row:
            for v in row:
                spr += abs(u - v)
        return spr / (2*k)
        
        
    sample_gini_df = proportion_pivot_df.apply(sample_spread, axis=1)

    return 1 - K/(K-1) * sample_gini_df

def add_leiden_column(node_df, g):
    g.vs['leiden_mod'] = g.community_leiden('modularity', 'weight').membership
    return node_df.join(
        pd.DataFrame({'name': g.vs['name'], 'leiden_mod': g.vs['leiden_mod']}).set_index('name', drop=True),
        on="name"
    ).copy()

### 5.1.3. Carga dos dados

In [5]:
collab_cut_node_df = pd.read_pickle("collab_cut_node_df.pkl")
collab_cut_edge_df = pd.read_pickle("collab_cut_edge_df.pkl")

## 5.2. Análise

### 5.2.1. Detecção de comunidades por casa legislativa

Vamos separar entre Câmara e Senado

In [10]:
house_node_df = collab_cut_node_df[collab_cut_node_df['type'].eq("Deputado")].drop("leiden_mod_all", axis=1).copy()
senate_node_df = collab_cut_node_df[collab_cut_node_df['type'].eq("Senador")].drop("leiden_mod_all", axis=1).copy()

In [11]:
house_edge_df = collab_cut_edge_df[collab_cut_edge_df['from'].str.startswith("CD")].copy()
senate_edge_df = collab_cut_edge_df[collab_cut_edge_df['from'].str.startswith("SS")].copy()

In [12]:
house_node_df, house_edge_df = prune_graph(house_node_df, house_edge_df)

4 node(s) dropped.
0 edges(s) dropped.


In [13]:
senate_node_df, senate_edge_df = prune_graph(senate_node_df, senate_edge_df)

8 node(s) dropped.
0 edges(s) dropped.


In [14]:
house_g = build_graph(house_node_df, house_edge_df)
house_node_df = add_leiden_column(house_node_df, house_g)

In [15]:
senate_g = build_graph(senate_node_df, senate_edge_df)
senate_node_df = add_leiden_column(senate_node_df, senate_g)

In [16]:
senate_node_df

Unnamed: 0,name,label,partido,type,cod_partido,uf,ideol_partido,regiao,leiden_mod
27550,SS:5537,DÁRIO ELIAS BERGER,PSB/SC,Senador,PSB,SC,centre-left,sul,2
27551,SS:5639,GUARACY BATISTA DA SILVEIRA,PP/TO,Senador,PP,TO,right,norte,5
27552,SS:5942,MARCOS RIBEIRO DO VAL,PODEMOS/ES,Senador,PODE,ES,centre-right,sudeste,3
27554,SS:6027,ANTÔNIO MECIAS PEREIRA DE JESUS,REPUBLICANOS/RR,Senador,REPUBLICANOS,RR,right,norte,0
27555,SS:6340,JAIME MAXIMINO BAGATTOLI,PL/RO,Senador,PL,RO,far-right,norte,5
...,...,...,...,...,...,...,...,...,...
27671,SS:5899,VANDERLAN VIEIRA CARDOSO,PSD/GO,Senador,PSD,GO,centre,centro-oeste,5
27672,SS:5936,CARLOS FRANCISCO PORTINHO,PL/RJ,Senador,PL,RJ,far-right,sudeste,2
27673,SS:5959,EANN STYVENSON VALENTIM MENDES,PSDB/RN,Senador,PSDB,RN,centre,nordeste,1
27674,SS:6008,ALEXANDRE LUIZ GIORDANO,MDB/SP,Senador,MDB,SP,centre,sudeste,5


In [17]:
house_g.write_graphml("house_collab.graphml")
senate_g.write_graphml("senate_collab.graphml")