In [1]:
from source.read_data import MockDB
import pandas as pd
from pprint import pprint
from collections import OrderedDict

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
db = MockDB()

In [4]:
schema = db.schema

In [5]:
schema_legislacao = {key : val for key, val in schema.items() if 'legislation' in key}

In [6]:
pprint(schema_legislacao)

{'legislation_annotations': ['id',
                             'quote',
                             'ranges',
                             'text',
                             'legislation_draft_version_id',
                             'author_id',
                             'hidden_at',
                             'created_at',
                             'updated_at',
                             'comments_count',
                             'range_start',
                             'range_start_offset',
                             'range_end',
                             'range_end_offset',
                             'context',
                             None,
                             None,
                             None,
                             None,
                             None,
                             None,
                             None],
 'legislation_answers': ['id',
                         'legislation_question_id',
                  

In [7]:
schema_legislacao.keys()

dict_keys(['legislation_processes', 'legislation_questions', 'legislation_draft_versions', 'legislation_process_translations', 'legislation_evaluations', 'legislation_question_options', 'legislation_answers', 'legislation_topics', 'legislation_assessments', 'legislation_draft_version_translations', 'legislation_topic_levels', 'legislation_annotations', 'legislation_proposals', 'legislation_question_option_translations', 'legislation_question_translations', 'legislation_topic_votes'])

In [8]:
pprint(schema_legislacao['legislation_questions'])

['id',
 'legislation_process_id',
 'answers_count',
 'hidden_at',
 'created_at',
 'updated_at',
 'comments_count',
 'author_id']


In [9]:
pprint(schema_legislacao['legislation_answers'])

['id',
 'legislation_question_id',
 'legislation_question_option_id',
 'user_id',
 'hidden_at',
 'created_at',
 'updated_at']


In [10]:
pprint(schema_legislacao['legislation_processes'])

['id',
 'start_date',
 'end_date',
 'debate_start_date',
 'debate_end_date',
 'draft_publication_date',
 'allegations_start_date',
 'allegations_end_date',
 'result_publication_date',
 'hidden_at',
 'created_at',
 'updated_at',
 'debate_phase_enabled',
 'allegations_phase_enabled',
 'draft_publication_enabled',
 'result_publication_enabled',
 'published',
 'proposals_phase_start_date',
 'proposals_phase_end_date',
 'proposals_phase_enabled',
 'proposals_description',
 'draft_start_date',
 'draft_end_date',
 'draft_phase_enabled',
 'homepage_enabled',
 'background_color',
 'font_color',
 'topics_phase_start_date',
 'topics_phase_end_date',
 'topics_phase_enabled']


In [11]:
def processes_with_name(db):
    
    processess = db['legislation_processes']
    names = db['legislation_process_translations']
    
    merged = pd.merge(
                    names, 
                    processess,
                    left_on = 'legislation_process_id',
                    right_on = 'id',
                    how='inner',
                    suffixes = ('_process_names', '_process')
                     )
    
    return merged

In [12]:
def filter_processes_columns(processes_with_names):
    
    columns = [
        'legislation_process_id',
        'title',
        'summary',
        'start_date',
        'end_date'
    ]
    
    
    return processes_with_names[columns].copy()

In [13]:
processes = filter_processes_columns(
                processes_with_name(db)
            )

In [14]:
def legis_topics_by_process(db, processes):
    
    topics = db['legislation_topics']
    
    
    merged = pd.merge(
                    topics, 
                    processes,
                    on = 'legislation_process_id',
                    how='inner',
                    suffixes = ('_topic', '_process')
                     )
    return merged

In [15]:
def votes_by_topic(db, topics):
    
    votes = db['legislation_topic_votes']
    
    merged = pd.merge(
                    topics, 
                    votes,
                    left_on = 'id',
                    right_on = 'legislation_topic_id',
                    how='inner',
                    suffixes = ('_topic', '_votes')
                     )
    return merged

In [16]:
def proposals_by_process(db, processes):
    
    proposals = db['legislation_proposals']
    
    merged = pd.merge(
                    proposals, 
                    processes,
                    on = 'legislation_process_id',
                    how='inner',
                    suffixes = ('_proposal', '_process')
                     )
    return merged

In [17]:
def proposals_data_set(db, processes):
    
    proposals = proposals_by_process(db, processes)
    
    #verificar depois se acrescento votos
    renamed_columns = OrderedDict({
        'legislation_process_id' : 'process_id',
        'title_process' : 'process_title',
        'title_process' : 'process_summary',
        'start_date' : 'process_start_date',
        'end_date': 'process_end_date',
        'id' : 'proposal_id',
        'title_proposal' : 'proposal_title',
        'summary_proposal' : 'proposal_summary',
        'description' : 'proposal_description',
        'author_id' : 'proposal_author_id'
        
    })
    
    proposals = proposals.rename(renamed_columns, axis = 1)
    proposals = proposals[renamed_columns.values()].copy()
    
    return proposals

In [18]:
def questions_by_process(db, processess):
    
    questions = db['legislation_questions']
    
    merged = pd.merge(
                    questions, 
                    processess,
                    on = 'legislation_process_id',
                    how='inner',
                    suffixes = ('_questions', '_process')
                     )
    return merged

In [19]:
def answers_by_question(db, questions):
    
    answers = db['legislation_answers']
    
    merged = pd.merge(
                    questions, 
                    answers,
                    left_on = 'id',
                    right_on = 'legislation_question_id',
                    how='inner',
                    suffixes = ('_questions', '_answers')
                     )
    return merged

In [20]:
def answers_data_set(db, processes):
    
    questions = questions_by_process(db, processes)
    answers = answers_by_question(db, questions)
    
    renamed_columns = OrderedDict({
        'legislation_process_id' : 'process_id',
        'title' : 'process_title',
        'summary' : 'process_summary',
        'start_date' : 'process_start_date',
        'end_date': 'process_end_date',
        'id_questions' : 'question_id',
        'answers_count' : 'question_answers_count',
        'id_answers' : 'answer_id',
        'author_id' : 'question_author_id',
        'legislation_question_option_id' : 'question_option_id',
        'user_id' : 'answer_author_id'
        
    })
    
    answers = answers.rename(renamed_columns, axis = 1)
    answers = answers[renamed_columns.values()].copy()
    
    return answers

In [21]:
answers = answers_data_set(db, processes)

In [22]:
proposals = proposals_data_set(db, processes)

In [23]:
proposals.keys()

Index(['process_id', 'process_summary', 'process_start_date',
       'process_end_date', 'proposal_id', 'proposal_title', 'proposal_summary',
       'proposal_description', 'proposal_author_id'],
      dtype='object')

In [24]:
topics = legis_topics_by_process(db, processes)