In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import pickle
from src.network_builder.pre_processing import pre_processing

In [2]:
congresspeople = pd.read_csv('data/enriched_congresspeople.csv')
proposals = pd.read_csv('data/proposals/proposals.csv')
authors = pd.read_csv('data/authors/authors.csv')

In [3]:
authors = authors[authors['type'] == 'deputados']
congresspeople = pre_processing(congresspeople)
congresspeople = congresspeople[['id', 'idLegislatura', 'election_year',
                                 'nomeEleitoral', 'education', 'gender', 
                                 'siglaUf', 'siglaPartido', 'region', 'occupation', 
                                 'marital_status', 'ethnicity', 'age']]


In [4]:
# Create authors dict. It must be an empty list for each proposal then we append the authors. One per year
authors_dict = {}

for year in range(2000, 2024):
    authors_dict[year] = {}
    for index, row in authors[authors['year'] == year].iterrows():
        # authors_dict[year] = authors_dict[year].get(row['idProposicao'], []) + [row['id']]
        if row['idProposicao'] in authors_dict[year]:
            authors_dict[year][row['idProposicao']].append(row['id'])
        else:
            authors_dict[year][row['idProposicao']] = [row['id']]

    # remake the dict and get only the proposals with more than one author and less than 10
    # authors_dict[year] = {k: v for k, v in authors_dict[year].items() if len(v) > 1 and len(v) < 10}

In [6]:
features = ['education', 'gender','siglaUf', 'siglaPartido',
            'region', 'occupation', 'marital_status', 'ethnicity', 'age']
congresspeople

Unnamed: 0,id,idLegislatura,election_year,nomeEleitoral,education,gender,siglaUf,siglaPartido,region,occupation,marital_status,ethnicity,age
0,65551,52,2002,WAGNER LAGO,SUPERIOR COMPLETO,M,MA,PDT,Nordeste,judge/lawyer/prosecutor,married,,58
1,139285,57,2022,Lídice da Mata,SUPERIOR COMPLETO,F,BA,PSB,Nordeste,politician,divorced,PARDA,66
2,139285,53,2006,Lídice da Mata,SUPERIOR COMPLETO,F,BA,PSB,Nordeste,politician,divorced,PARDA,50
3,139285,56,2018,Lídice da Mata,SUPERIOR COMPLETO,F,BA,PSB,Nordeste,politician,divorced,PARDA,62
4,196357,55,2014,DEJORGE PATRÍCIO,ENSINO MÉDIO COMPLETO,M,RJ,PRB,Sudeste,politician,single,PARDA,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4409,163831,54,2010,FERNANDO JORDÃO,SUPERIOR COMPLETO,M,RJ,PMDB,Sudeste,businessperson/entrepreneur,married,,58
4410,73720,55,2014,CESAR SOUZA,ENSINO MÉDIO INCOMPLETO,M,SC,PSD,Sul,businessperson/entrepreneur,divorced,BRANCA,57
4411,180214,57,2022,Pastor Diniz,SUPERIOR COMPLETO,M,RR,UNIÃO,Norte,health professional,married,PARDA,42
4412,172029,54,2010,FRANCISCO CHAGAS,SUPERIOR COMPLETO,M,SP,PT,Sudeste,politician,divorced,BRANCA,54


In [20]:
class NetworkBuilder():
    '''
    This class builds the network from the data.
    It starts with the congresspeople as nodes. Then we add edges between congresspeople
    who have co-authored a proposal. The weight of the edge is the number of proposals
    We also have other attributes for the edges, such as party affiliation, race, etc.
    '''

    def __init__(self, congresspeople, coauthors, features, period):
        self.G = nx.Graph()
        # self.add_nodes(congresspeople)
        # for feature in features:
        #     print(feature)
        #     relations = self.createRelation(congresspeople, feature)
        #     self.G.add_edges_from(relations, key=feature)
        self.add_nodes_attributes(congresspeople, features)
        self.add_edges_proposals(coauthors)
        self.save_graph(self.G, f'data/networks/{period}network.gpickle')

    def save_graph(self, G, path):
        '''
        Save the graph in a pickle file
        '''
        with open(path, 'wb') as f:
            pickle.dump(G, f)

    @staticmethod
    def createRelation(df, target_column):
        '''
        Given a dataframe and a target column, it returns a list of all tuples possible
        '''
        relations = []
        for relation in df[target_column].unique():
            target_df = df[df[target_column] == relation]
            for index, row in target_df.iterrows():
                for index, row in target_df.loc[index+1:].iterrows():
                    relations.append((row['id'], target_df.iloc[0]['id']))
        return relations

    def add_nodes(self, congresspeople):
        for row, data in congresspeople.iterrows():
            self.G.add_node(data['id'])

    def add_nodes_attributes(self, congresspeople, features):
        for row, data in congresspeople.iterrows():
            self.G.add_node(data['id'], **data[features].to_dict())

    def add_edges_proposals(self, authors_dict):
        for proposal in authors_dict:
            authors = authors_dict[proposal]
            coauthors_len = len(authors_dict[proposal])
            for i in range(coauthors_len):
                for j in range(i+1, coauthors_len):
                    if not self.G.has_edge(authors[i], authors[j]):
                        self.G.add_edge(authors[i], authors[j], weight=1)
                    else:
                        self.G[authors[i]][authors[j]]['weight'] += 1

    def getGraph(self):
        return self.G

In [None]:
networks_yearly = {}
for year in authors_dict.keys():
    print(year)
    idLegislatura = (year - 1999) // 4 + 51
    congress = congresspeople[congresspeople['idLegislatura'] == idLegislatura]

In [None]:
networks_termly = {}
for idLegislatura in range(51, 58):
    print(idLegislatura)
    election_year = {57: 2022, 56: 2018, 55: 2014,
                 54: 2010, 53: 2006, 52: 2002,
                 51: 1998}
    congress = congresspeople[congresspeople['idLegislatura'] == idLegislatura]
    
    years = list(range(election_year[55] + 1, election_year[55] + 5))
    authors_dict_term = {}
    for year in years:
        authors_dict_term.update(authors_dict[year])

    network = NetworkBuilder(congress, authors_dict_term, features, idLegislatura)
    networks_termly[idLegislatura] = network.getGraph()