   # 1.1 Imports and constants

In [37]:
import json   # reads scrapped data
import pandas as pd # helps with file management and visualization
from uuid import uuid4 # creates new uuids for Posts

PROV = 'sen'

JSON_PATH =  'datasets/senado/json/'

AGENTS_PATH = 'datasets/slp/agents/'
MEMBERSHIPS_PATH = 'datasets/slp/memberships/'
ORGANIZATIONS_PATH = 'datasets/slp/organizations/'
POSTS_PATH = 'datasets/slp/posts/'

FILE_SUFFIX = '55_56'
SENADO_URI = '81311052-e5b6-46fe-87ba-83865fa0ffb0' 


 # 1.2 Getting the parties

In [38]:
filename = 'parties.csv'
file_path = '{:}{:}'.format(ORGANIZATIONS_PATH, filename)
df_parties = pd.read_csv(file_path, sep= ';', encoding= 'utf-8', index_col=None)
df_parties.set_index('sigla', inplace=True)


parties_d = df_parties['slp:resource_uri'].to_dict()                        
print({k:parties_d[k]
    for k in ['PT', 'PMDB', 'DEM']})

{'PT': '7b2138a6-df61-11e7-baf1-c82a144c0a85', 'PMDB': '7b210cfa-df61-11e7-9864-c82a144c0a85', 'DEM': '7b20ed7e-df61-11e7-bb65-c82a144c0a85'}


# 2. Reads json and stores in an array of dicts

In [39]:
filename = 'senator_with_memberships-{:}.json'.format(FILE_SUFFIX) 
file_path = '{:}{:}'.format(JSON_PATH, filename)
with open(file_path, mode='r') as f:
    senatorsstr = f.read()
f.close()

senators_with_memberships = json.loads(senatorsstr)

# 3. Iterates saving the agents

In [40]:
mapping = {'skos:prefLabel': 'sen:CodigoParlamentar',
  'foaf:name':'sen:NomeCompletoParlamentar',
  'rdfs:label':'sen:NomeParlamentar',
  'agent_resource_uri':'slp:resource_uri'           
}

agents = [] 
for swm_d in senators_with_memberships:
    agent_d = {mapping[k]: swm_d.get(k, None) for k in mapping}
    if agent_d:
        agents.append(agent_d)

filename = 'senators-{:}-{:}.csv'.format(PROV, FILE_SUFFIX) 
file_path = '{:}{:}'.format(AGENTS_PATH, filename)
df = pd.DataFrame.from_dict(agents)    
df.to_csv(file_path, sep=';', encoding='utf-8', index=None)
df.head()

Unnamed: 0,sen:CodigoParlamentar,sen:NomeCompletoParlamentar,sen:NomeParlamentar,slp:resource_uri
0,5573,Abel Rebouças São José,Abel Rebouças,828a0e78-b457-47de-8112-a7cce8d24563
1,739,Ciro Nogueira Lima Filho,Ciro Nogueira,59888f36-89fc-40e9-adf7-e95bb4906eb2
2,5108,José Aparecido dos Santos,Cidinho Santos,94be5f0e-2cc9-488b-bb32-335a44eb4f1b
3,5136,Cesar Antonio de Souza,Cesar Antonio de Souza,40260ab9-9261-46b9-8ab3-3c569dfa8b24
4,5623,Christopher Belchior Goulart,Christopher Goulart,ad36f1e7-f24a-4f51-9f44-c45468e0d4a1


 # 4.1 Iterates saving memberships to the senate

In [41]:
mapping = {
    'skos:prefLabel': 'sen:CodigoMandato',
    'natureza': 'sen:UfParlamentar',
    'legislatura': 'NumeroLegislatura', 
    'startDate': 'DataInicio', 
    'finishDate': 'DataFim',
    'membership_resource_uri':'slp:resource_uri',
    'role_resource_uri':'org:role'
}

memberships = [] 
for swm_d in senators_with_memberships:
    terms = swm_d['terms']
    if terms:
        for term_d in terms:
            memberships_d = {mapping[k]: term_d.get(k, None) for k in mapping}
            if memberships_d:
                memberships.append(memberships_d)


filename = 'memberships_with_senate-{:}-{:}.csv'.format(PROV, FILE_SUFFIX) 
file_path = '{:}{:}'.format(MEMBERSHIPS_PATH, filename)
df = pd.DataFrame.from_dict(memberships)    
df.to_csv(file_path, sep=';', encoding='utf-8', index=None)
df.head()            

Unnamed: 0,DataFim,DataInicio,NumeroLegislatura,org:role,sen:CodigoMandato,sen:UfParlamentar,slp:resource_uri
0,2019-01-31,2015-02-01,55,d69fcc38-21a6-41ee-8c0f-90be24aff805,492,BA,61e28c9c-c93b-40fc-a73d-3a963632df24
1,2023-01-31,2019-02-01,56,d69fcc38-21a6-41ee-8c0f-90be24aff805,492,BA,669716c0-cf09-4d77-896b-8497c3943b34
2,2015-01-31,2011-02-01,54,d69fcc38-21a6-41ee-8c0f-90be24aff805,458,PI,869d897d-3ab6-448a-ba58-8c18a2aa4421
3,2019-01-31,2015-02-01,55,d69fcc38-21a6-41ee-8c0f-90be24aff805,458,PI,155310d6-40c8-4c04-9234-6f312193b877
4,2015-01-31,2011-02-01,54,d69fcc38-21a6-41ee-8c0f-90be24aff805,456,MT,3e9c9056-8f75-4d44-b5a3-da45f12caa33


 # 4.2 Saves Posts postedIn Senado

In [42]:
df = df['org:role'].to_frame()
df['slp:resource_uri'] =[str(uuid4()) 
                            for _ in range(len(memberships))] 

filename = 'posts_senado-{:}-{:}.csv'.format(PROV,FILE_SUFFIX)
file_path = '{:}{:}'.format(POSTS_PATH, filename)
df.to_csv(file_path, sep=';', encoding='utf-8', index=None)
df.head()            

Unnamed: 0,org:role,slp:resource_uri
0,d69fcc38-21a6-41ee-8c0f-90be24aff805,664a811a-8e13-444b-91c4-0a7ccb108588
1,d69fcc38-21a6-41ee-8c0f-90be24aff805,eb6e9f82-c30f-471f-b30d-5f781153a5c0
2,d69fcc38-21a6-41ee-8c0f-90be24aff805,31fb0de2-bdf8-4c37-a1e5-8b9a9191d53e
3,d69fcc38-21a6-41ee-8c0f-90be24aff805,86e50ea4-88bb-491c-94e5-31e6a4d56fe2
4,d69fcc38-21a6-41ee-8c0f-90be24aff805,e5c9a3af-b8e5-4e47-84bb-ce109ab001c8


 # 5.1 Iterates saving party affiliations

In [43]:
mapping = {
    'sigla': 'sen:SiglaPartido',
    'startDate': 'sen:DataFiliacao' ,
    'finishDate': 'sen:DataDesfiliacao',
    'role_resource_uri': 'org:role',
    'resource_uri': 'slp:resource_uri'
}

memberships = [] 
for swm_d in senators_with_memberships:
    affiliations = swm_d['affiliations']
    if affiliations:
        for affiliation_d in affiliations:
            memberships_d = {mapping[k]: affiliation_d.get(k, None) for k in mapping}
            if memberships_d:
                memberships.append(memberships_d)


filename = 'memberships_with_parties-{:}-{:}.csv'.format(PROV, FILE_SUFFIX) 
file_path = '{:}{:}'.format(MEMBERSHIPS_PATH, filename)
df = pd.DataFrame.from_dict(memberships)    
df.to_csv(file_path, sep=';', encoding='utf-8', index=None)
df.head()            

Unnamed: 0,org:role,sen:DataDesfiliacao,sen:DataFiliacao,sen:SiglaPartido,slp:resource_uri
0,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2015-02-01,PDT,0577ae6b-9fa4-4ff0-ac19-9dd5cd3598d1
1,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2004-02-14,PP,b2ec603a-26b8-4e59-8a59-624d2f0973ac
2,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,2004-02-13,1999-02-01,PFL,3cf1400f-9fe7-4552-99ac-d0a4a52a1510
3,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2011-02-03,PR,da9f5ea7-2636-4240-af94-94e0df597ae9
4,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2011-02-03,DEM,89a85f62-3dce-499c-a72a-0917f5bf5240


 # 5.2 Saves Posts postIn Party

In [46]:
posts = []
for membership in memberships:
    sigla = membership['sen:SiglaPartido']
    if not(sigla == 'S/Partido'):
        post_d = {
        'slp:resource_uri': str(uuid4()),
        'org:role': membership['org:role'],
        'org:postIn': parties_d[membership['sen:SiglaPartido']]          
        }          
        posts.append(post_d)

filename = 'posts_parties-{:}-{:}.csv'.format(PROV, FILE_SUFFIX)
file_path = '{:}{:}'.format(POSTS_PATH, filename)
df.to_csv(file_path, sep=';', encoding='utf-8', index=None)
df.head()                

Unnamed: 0,org:role,sen:DataDesfiliacao,sen:DataFiliacao,sen:SiglaPartido,slp:resource_uri
0,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2015-02-01,PDT,0577ae6b-9fa4-4ff0-ac19-9dd5cd3598d1
1,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2004-02-14,PP,b2ec603a-26b8-4e59-8a59-624d2f0973ac
2,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,2004-02-13,1999-02-01,PFL,3cf1400f-9fe7-4552-99ac-d0a4a52a1510
3,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2011-02-03,PR,da9f5ea7-2636-4240-af94-94e0df597ae9
4,ebf7f8f0-1b0a-4662-9ed8-c9bb19936ec0,,2011-02-03,DEM,89a85f62-3dce-499c-a72a-0917f5bf5240
