# Example: Fetching Different Data from Swiss Parliament Webservice

### Import Modules

In [None]:
import requests
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

### Fetching Data from Webservice

In [None]:
###########################################
# 01. Fetch Affair IDs by Legislative Period
###########################################

# legislative periods of interest 
legislative_periods = [49,50,51,52]

# web service URL and headers
base_url = 'https://ws-old.parlament.ch/votes/affairs'
headers = {'User-Agent': 'Mozilla/5.0', # required to mimic browser request
           'language': 'de'}

all_dfs = []

# repeat GET requests for each legislative period
for period in legislative_periods:
    params = {
        'legislativePeriodFilter': period,
        'pageNumber': 1,
        'format': 'json'
        }

    # loop through affairs pages
    all_pages = []

    while True:
        response = requests.get(base_url, params=params, headers=headers)
        
        # extract JSON and convert to df
        data = response.json()
        df_page = pd.DataFrame(data)

        all_pages.append(df_page)

        # loop progression
        print(f"Period {period} - Collecting data for page {params['pageNumber']}")
        
        # go to next page if it exists
        if df_page['hasMorePages'].iloc[-1] == True:
            params['pageNumber'] += 1
        else:
            break

    # combine all pages
    df = pd.concat(all_pages, ignore_index=True)
    df['legislature'] = period
    df.drop(columns='hasMorePages', inplace=True)

    # append
    all_dfs.append(df)

# concatenate
all_ids_df = pd.concat(all_dfs, ignore_index=True)

In [None]:
#################################
# 02. Fetch Votes for Each Affair
#################################

# set max workers
max_workers = 40 # frequently used for I/O-bound tasks

# select affair ids for which votes should be fetched
affair_ids = pd.read_parquet('all_affair_ids.parquet')['id']

# Function to fetch votes for a single affair
def fetch_votes(affair_id):
    params = {'format': 'json', 'pageNumber': 1}
    headers = {'User-Agent': 'Mozilla/5.0', 'language': 'de'}
    base_url = f'https://ws-old.parlament.ch/votes/affairs/{affair_id}'
    pages = []

    try:
        while True:
            # fetch JSON & convert it to df
            response = requests.get(base_url, params=params, headers=headers)
            response.raise_for_status()
            data = response.json()
            df_page = pd.DataFrame(data)
            pages.append(df_page)

            # check whether there are more pages
            if df_page['affairVotes'].iloc[-1].get('hasMorePages', False):
                params['pageNumber'] += 1
            else:
                break

        # combine data from all pages
        df = pd.concat(pages, ignore_index=True)

        # extract fields to differentiate between subvotes for same affair (e.g. first vote vs final vote)
        df['date'] = df['affairVotes'].apply(lambda x: x.get('date'))
        df['divisionText'] = df['affairVotes'].apply(lambda x: x.get('divisionText'))
        return df

    except Exception as e:
        print(f"Failed to fetch votes for affair_id {affair_id}: {e}")
        return None

# Use ThreadPoolExecutor to parallelize HTTPS requests
all_dfs = []

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    # submit all jobs and map futures to affair_ids
    futures = {executor.submit(fetch_votes, aid): aid for aid in affair_ids}

    # Collect completed results with progress bar
    for future in tqdm(as_completed(futures), total=len(futures)):
        result = future.result()
        if result is not None:
            all_dfs.append(result)

# concatenate
all_votes_df = pd.concat(all_dfs, ignore_index=True)

In [None]:
#####################################
# 03. Get Information on Each Affair
#####################################

# set max workers
max_workers = 3 # if the number is too high, the request get blocked

# select affair ids for which votes should be fetched
affair_ids = pd.read_parquet('all_affair_ids.parquet')['id']

# Function to fetch JSON summary of single affair
def fetch_affair_summary(affair_id): 
    params = {'format': 'json', 'pageNumber': 1}
    base_url = f'https://ws-old.parlament.ch/affairs/{affair_id}'

    try: 
        # fetch JSON
        response = requests.get(base_url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
        return data

    except Exception as e:
        print(f"Failed to fetch summary for affair_id {affair_id}: {e}")
        return None

# Use ThreadPoolExecutor to parallelize HTTPS requests
all_affair_summaries = []

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    # submit all jobs and map futures to affair_ids
    futures = {executor.submit(fetch_affair_summary, aid): aid for aid in affair_ids}

    # Collect completed results with progress bar
    for future in tqdm(as_completed(futures), total=len(futures)):
        result = future.result()
        if result is not None:
            all_affair_summaries.append(result)

# Combine summary data for all affairs in single df and save it
all_affair_summaries_df = pd.DataFrame(all_affair_summaries)

In [None]:
################################################
# 04. Get All Councillors by Legislative Periods
#################################################

# legislative periods of interest 
legislative_periods = [47,48,49,50,51,52]

# web service URL and headers
base_url = 'https://ws-old.parlament.ch/councillors/historic'
headers = {'User-Agent': 'Mozilla/5.0', # required to mimic browser request
           'language': 'de'}

all_dfs = []

# repeat GET requests for each legislative period
for period in legislative_periods:
    params = {
        'legislativePeriodFromFilter': period,
        'pageNumber': 1,
        'format': 'json'
        }

    # loop through councillor pages
    all_pages = []

    while True:
        response = requests.get(base_url, params=params, headers=headers)
        
        # extract JSON and convert to df
        data = response.json()
        df_page = pd.DataFrame(data)

        all_pages.append(df_page)

        # loop progression
        print(f'Period {period} - Collecting data for page {params['pageNumber']}')
        
        # go to next page if it exists
        if df_page['hasMorePages'].iloc[-1] == True:
            params['pageNumber'] += 1
        else:
            break

    # combine all pages
    df = pd.concat(all_pages, ignore_index=True)
    df['legislatureFrom'] = period
    df.drop(columns='hasMorePages', inplace=True)

    # append
    all_dfs.append(df)

# concatenate
all_ids_df = pd.concat(all_dfs, ignore_index=True)

In [None]:
#################################
# 06. Fetch commission information
#################################

# legislative periods of interest 
legislative_periods = [52]

# web service URL and headers
base_url = 'https://ws-old.parlament.ch/committees'
headers = {'User-Agent': 'Mozilla/5.0', # required to mimic browser request
           'language': 'de'}

all_dfs = []

# GET request for committee ids
params = {
        'currentOnly': 'true',
        'pageNumber': 1,
        'format': 'json'
        }

# loop through councillor pages
all_pages = []

while True:
    response = requests.get(base_url, params=params, headers=headers)
    
    # extract JSON and convert to df
    data = response.json()
    df_page = pd.DataFrame(data)

    all_pages.append(df_page)
    
    # go to next page if it exists
    if df_page['hasMorePages'].iloc[-1] == True:
        params['pageNumber'] += 1
    else:
        break

# combine all pages
df = pd.concat(all_pages, ignore_index=True)

# filter to only keep national council committees
df['council'] = df['council'].apply(lambda x: x['abbreviation'])
df = df[df['council'] == 'NR']

# extract ids of relevant committees
committee_ids = df['id']