In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def get_candidates_data(url):
    gov = requests.get(url)
    governor_data = BeautifulSoup(gov.content, 'lxml')
    
    precintos = governor_data.findAll('group')

    precintos_info = {}

    for precinto in precintos:
        pre_name = precinto.find('description').find('es').text


        candidates = precinto.findAll('option')

        for idx, candidate in enumerate(candidates):
            try:
                candidate_name = candidate.find('name').find('es').text
                candidate_party = candidate.find('pe').find('es').text
                candidate_votes = candidate.find('votes').text
            except:
                continue

            if idx > 0:
                precintos_info[pre_name].update({f'{candidate_name}, {candidate_party}': candidate_votes})
            else:
                precintos_info[pre_name] = {candidate_name: candidate_votes}

    #print(precintos_info)
    df = pd.DataFrame(precintos_info).T
    df = df.reset_index()
    df = df.melt(id_vars=['index'])
    
    df['candidate'] = df['variable'].str.split(', ', expand = True)[0]
    df['party'] = df['variable'].str.split(', ', expand = True)[1]
    
    df.drop('variable', inplace=True, axis = 1)
    df.columns = pd.Index(['precinto', 'votos', 'persona', 'partido'])
    
    return df

In [3]:
def get_precinct_info(url):
    gov = requests.get(url)
    governor_data = BeautifulSoup(gov.content, 'lxml')
    
    precintos = governor_data.findAll('group')

    precintos_info = {}

    for precinto in precintos:
        pre_name = precinto.find('description').find('es').text
        pre_url = precinto.find('name').find('es').text
    
        precinto_url = f'http://elecciones2016.ceepur.org/Escrutinio_General_77/data/{pre_url}'
        #print(precinto_url)
        precinto_page = requests.get(precinto_url)
        precinto_page = BeautifulSoup(precinto_page.content, 'lxml')

        qty = precinto_page.findAll('qty')
        qty.pop(4)
        qty = [text.text for text in qty]

        desc = precinto_page.findAll('desc')
        desc = [text.find('es').text for text in desc]

        votes_info = dict(zip(desc, qty))

        precintos_info[pre_name] = votes_info
        
    df = pd.DataFrame(precintos_info).T
    df = df.reset_index()
    df = df.melt(id_vars=['index'])

    return df

### Gobernador

In [4]:
gov_url = 'http://elecciones2016.ceepur.org/Escrutinio_General_77/data/GOBERNADOR_Precintos.xml'

In [6]:
governor_data = get_candidates_data(gov_url)

### Comisionado

In [7]:
com_url = 'http://elecciones2016.ceepur.org/Escrutinio_General_77/data/COMISIONADO_RESIDENTE_Precintos.xml'

In [8]:
com_data = get_candidates_data(com_url)

### Representante

In [9]:
repr_url = 'http://elecciones2016.ceepur.org/Escrutinio_General_77/data/REPRESENTANTES_POR_DISTRITO_Precintos.xml'

In [10]:
repr_data = get_candidates_data(repr_url)

### Senadores

In [11]:
senator_url = 'http://elecciones2016.ceepur.org/Escrutinio_General_77/data/SENADORES_POR_DISTRITO_Precintos.xml'

In [12]:
senator_data = get_candidates_data(senator_url)

In [14]:
governor_data.to_csv('governor_2016.csv', index=False)