In [12]:
import requests
from bs4 import BeautifulSoup


ALL_DATA = {}
for year in range(2022, 2008, -2):
    print(year)
    url = f"https://en.wikipedia.org/wiki/{year}_United_States_House_of_Representatives_elections"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table', {'class': 'wikitable'})
    rows = table.find_all('tr')[2:]  # Skip header rows

    data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 9:
            party = cols[1].text.strip()
            def clean_value(value, convert_func=None):
                cleaned = value.text.strip().strip('%')
                if cleaned in ("—", "–", "-"):
                    return None
                return convert_func(cleaned.replace(',', '')) if convert_func else cleaned

            votes = clean_value(cols[2], int)
            percentage = clean_value(cols[3], float)
            change = clean_value(cols[4])
            seats_prev = clean_value(cols[5])
            seats_curr = clean_value(cols[6])
            seat_change = clean_value(cols[7])
            strength = clean_value(cols[8])
            
            data.append({
                'party': party,
                'votes': votes,
                'percentage': percentage,
                'change': change,
                'seats_prev': seats_prev,
                'seats_curr': seats_curr,
                'seat_change': seat_change,
                'strength': strength
            })

    ALL_DATA[year] = data

2024
2022
2020
2018
2016
2014
2012
2010


In [14]:
import json

with open('wiki_scrape.json', 'w') as f:
    json.dump(ALL_DATA, f, indent=4)
