# Political data extraction

In [1]:
import requests

from bs4 import BeautifulSoup as bs

import pandas as pd

In [2]:
years = ['2022', '2019', '2015', '2011', '2009', '2005', '2002', '1999', '1995']
tags = ['Resultados_por_círculo_eleitoral', 'Tabela_de_resultados_por_círculos_eleitorais', 'Tabela_de_resultados']

In [3]:
df = pd.DataFrame(columns=['year','city', 'political_party', 'votes'])

for year in years:
    print(year)
    url = f'https://pt.wikipedia.org/wiki/Elei%C3%A7%C3%B5es_legislativas_portuguesas_de_{year}'
    request = requests.get(url)
    soup = bs(request.text, 'html.parser')
    
    for tag in tags:
        s = soup.find("span", id=tag)
        if s:
            try:
                for table_index in range(len(s.find_all_next('table'))):
                    for idx in range(len(s.find_all_next('table')[table_index].find_all('td'))):
                        if s.find_all_next('table')[table_index].find_all('td')[idx].find('a'):
                            
                            city = s.find_all_next('table')[table_index].find_previous_sibling('h3').find('span', attrs={'class': 'mw-headline'}).text 
                            partido = s.find_all_next('table')[table_index].find_all('td')[idx].find('a').text
                            votos = s.find_all_next('table')[table_index].find_all('td')[idx].find_next('td').text
                            temp = {'year': year, 'city' : city, 'political_party' : partido, 'votes': float(votos.replace('\n', '').replace(' ', ''))}
                            df = pd.concat([df, pd.DataFrame(temp, index=[idx])], ignore_index=True)
            except:
                continue

2022
2019
2015
2011
2009
2005
2002
1999
1995


In [4]:
def bucketize_political_year(year):
    if year == 1995:
        return [1995, 1996, 1997, 1998, 1999, 2000, 2001]
    elif year == 2002:
        return [2002, 2003, 2004]
    elif year == 2005:
        return [2005, 2006, 2007, 2008]
    elif year == 2009:
        return [2009, 2010]
    elif year == 2011:
        return [2011, 2012, 2013, 2014]
    elif year == 2015:
        return [2015, 2016, 2017, 2018]
    elif year == 2019:
        return [2019, 2020, 2021]
    elif year == 2022:
        return [2022]

In [8]:
df['year_bucket'] = df['year'].apply(lambda val: bucketize_political_year(int(val)))
total_votes = df.groupby(['city', 'year'])['votes'].sum().reset_index(name='total_votes')
data_political_parties_p = pd.merge(df, total_votes, how='left', on=['city', 'year'])
data_political_parties_p['%'] = data_political_parties_p.apply(lambda col:  (col['votes'] / col['total_votes']) *100, axis=1)

In [10]:
data_political_parties_p.head(10)

Unnamed: 0,year,city,political_party,votes,year_bucket,total_votes,%
0,2022,Açores,Partido Socialista,36025.0,[2022],78980.0,45.612813
1,2022,Açores,Aliança Democrática,28520.0,[2022],78980.0,36.110408
2,2022,Açores,CHEGA,4986.0,[2022],78980.0,6.312991
3,2022,Açores,Bloco de Esquerda,3589.0,[2022],78980.0,4.544188
4,2022,Açores,Iniciativa Liberal,3454.0,[2022],78980.0,4.373259
5,2022,Açores,Coligação Democrática Unitária,1250.0,[2022],78980.0,1.582679
6,2022,Açores,Pessoas–Animais–Natureza,1156.0,[2022],78980.0,1.463662
7,2022,Aveiro,Partido Socialista,144044.0,[2022],347888.0,41.40528
8,2022,Aveiro,Partido Social Democrata,130192.0,[2022],347888.0,37.423539
9,2022,Aveiro,CHEGA,20546.0,[2022],347888.0,5.905924


In [11]:
data_political_parties_p.tail(10)

Unnamed: 0,year,city,political_party,votes,year_bucket,total_votes,%
1032,1995,Viseu,Coligação Democrática Unitária,3887.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",208261.0,1.866408
1033,1995,Europa,Partido Socialista,9067.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",20854.0,43.478469
1034,1995,Europa,Partido Social Democrata,8740.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",20854.0,41.910425
1035,1995,Europa,Coligação Democrática Unitária,1652.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",20854.0,7.921742
1036,1995,Europa,Partido Popular,1143.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",20854.0,5.480963
1037,1995,Europa,Movimento Partido da Terra,252.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",20854.0,1.208401
1038,1995,Fora da Europa,Partido Social Democrata,14085.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",17710.0,79.531338
1039,1995,Fora da Europa,Partido Socialista,2608.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",17710.0,14.726143
1040,1995,Fora da Europa,Partido Popular,771.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",17710.0,4.353473
1041,1995,Fora da Europa,Coligação Democrática Unitária,246.0,"[1995, 1996, 1997, 1998, 1999, 2000, 2001]",17710.0,1.389046


In [12]:
data_political_parties_p.to_csv('../data/scraping_political_parties.csv.gz', compression='gzip', index=False)