In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import functools

In [2]:
def date_parser(date_str):
    
    month_dict = {'stycznia' : 1,
                  'lutego' : 2,
                  'marca' : 3,
                  'kwietnia' : 4,
                  'maja' : 5,
                  'czerwca' : 6,
                  'lipca' : 7,
                  'sierpnia': 8,
                  'września' : 9,
                  'października': 10,
                  'listopada' : 11,
                  'grudnia': 12                 
                 }
    day, month, year, *_ = date_str.split()
    return pd.datetime(int(year), int(month_dict[month]), int(day))

In [3]:
def get_1voting_list(nr_kadencji=8):
    """Return dataframe of votings
    for selected parliamentary term"""
    
    source_url = 'https://www.sejm.gov.pl/Sejm8.nsf/'
    req_url = source_url+f'agent.xsp?symbol=posglos&NrKadencji={nr_kadencji}'
    try:
        r = requests.get(req_url)
    except Exception as e:
        print(e)
    else:
        if r.status_code != 200:
            print(f'Status code: {r.status_code}')
            return None
        html = r.text
        soup = BeautifulSoup(html, 'lxml')
        links_list = []
        for tr in soup.table.find_all('tr'):
            a = tr.a
            link = '' if a == None else source_url+tr.a.get('href')
            links_list.append(link)
        voting_df = pd.read_html(req_url, encoding='utf-8')[0]
        voting_df.fillna(method='ffill', inplace=True)
        voting_df = voting_df.iloc[:,:3]
        voting_df['Link'] = links_list[1:]
        voting_df['Data'] = voting_df.iloc[:,1].apply(date_parser)
        voting_df['Nr kadencji'] = nr_kadencji
        voting_df['Nr pos. Sejmu'] = voting_df['Nr pos. Sejmu'].astype('int64')
        return voting_df

In [4]:
def get_voting_list(kadencje=[9,8,7]):
    """Return dataframe of votings
    of selected parliamentary terms"""

    df = pd.DataFrame()
    return functools.reduce(lambda df1,kadencja: df1.append(get_1voting_list(kadencja)),kadencje,df)     

In [5]:
voting_df = get_voting_list()

In [6]:
voting_df.to_csv('voting_list.csv')