In [1]:
import requests

# results 2016
url = "https://en.wikipedia.org/wiki/2016_United_States_presidential_election#Results_by_state"
response = requests.get(url)
response

<Response [200]>

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.content)

In [3]:
table1= soup.find('div', style='overflow:auto')

In [4]:
# filtering all td tags to get state & percentages as list 

table_list1 = []
for td in table1.find_all('td'):
        table_list1.append(td.text.strip())  

In [5]:
# indexing the elements

for index, element in enumerate(table_list1):
    index,element

In [6]:
# searching for elements with letters, group them into sublists with state name as first index (listname:states_data)
import re       

states_data = []
current_state = []

for element in table_list1:
    if re.search(r'[a-zA-Z]', element):  
        if current_state:
            states_data.append(current_state)
        current_state = [element]  
    else:
        current_state.append(element)
if current_state:
    states_data.append(current_state)

In [7]:
# filtering the list, create dicts with: states (index[0]) hillary votes (index[2]) donald votes (index[5])
result_votes_2016 = {}

for entry in states_data:
        state = entry[0]
        hillary_votes = entry[2]
        trump_votes = entry[5]
        
        result_votes_2016[state] = {
        "Hillary Clinton": hillary_votes,
        "Donald Trump": trump_votes
    }

#print(result_votes_2016)

In [8]:
# dataframe: states as indexes, percentages are saved as strings!

df_results_2016 = pd.DataFrame.from_dict(result_votes_2016, orient='index')
df_results_2016.index.name = 'State'
df_results_2016.columns = ['Hillary Clinton', 'Donald Trump']

In [28]:
# scraping for total percentages 2016

values = soup.find_all('th', style="text-align:right")
total_values_2016 = []

for th in values:
    text = th.get_text(strip=True)  
    if "%" in text: 
        total_values_2016.append(text)
total_values_2016 = total_values_2016[:2]

total_values_2016

['48.18%', '46.09%']

In [10]:
# adding total votes as last row 

df_results_2016 = pd.DataFrame.from_dict(result_votes_2016, orient='index')

df_results_2016.index.name = 'State'
df_results_2016.columns = ['Hillary Clinton', 'Donald Trump']
total_votes = ['48.18%', '46.09%']
df_results_2016.loc['National'] = total_values_2016

#print(df_results_2016)  

In [11]:
df_results_2016.index.value_counts().sum()

57

In [12]:
# results 2020

url = "https://en.m.wikipedia.org/wiki/2020_United_States_presidential_election#Results"
response = requests.get(url)
response

<Response [200]>

In [13]:
soup2 = BeautifulSoup(response.content)
table2 = soup2.find('div', style='overflow:auto')

In [14]:
# all percentages & states

table_list2 = []
for td in table2.find_all('td'):
        table_list2.append(td.text.strip())

In [15]:
for index, element in enumerate(table_list2):
    index, element

In [16]:
# creating sublists with state as first index
states_data2 = []
current_state2 = []

for element in table_list2:
    if re.search(r'[a-zA-Z]', element):  
        if current_state2:
            states_data2.append(current_state2)
        current_state2 = [element]  
    else:
        current_state2.append(element)
if current_state2:
    states_data2.append(current_state2)

In [17]:
#filtering state, b votes & d votes, save in dict

result_votes_2020 = {}

for entry in states_data2:
    if len(entry) > 5:
        state = entry[0]
        biden_votes = entry[2]
        trump_votes = entry[5]
        
    result_votes_2020[state] = {
        "Joseph Biden": biden_votes,
        "Donald Trump": trump_votes
    }
#print(result_votes_2020)

In [32]:
# scraping for total percentages

values2 = soup2.find_all('th', style="text-align:right")
percent_values2 = []
for th in values2:
    text = th.get_text(strip=True)  
    if "%" in text: 
        percent_values2.append(text)
total_percentage_2020 = percent_values2[:2]

total_percentage_2020

['51.31%', '46.85%']

In [19]:
df_results_2020 = pd.DataFrame.from_dict(result_votes_2020, orient='index')

df_results_2020.index.name = 'State'
df_results_2020.columns = ['Joseph Biden', 'Donald Trump']
df_results_2020 = df_results_2020.drop(['[o]', '[p]'], axis=0)

df_results_2020.loc['National'] = total_percentage_2020
#print(df_results_2020)

In [20]:
df_results_2020.value_counts().sum()

57

In [21]:
#df_results_2016.to_csv("df_results_2016.csv")

In [22]:
#df_results_2020.to_csv("df_results_2020.csv")

In [38]:
# function table scraping 

import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

def table_scraper(year):
    result_votes_year = {} 
    response = requests.get(f"https://en.wikipedia.org/wiki/{year}_United_States_presidential_election#Results_by_state")
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('div', style='overflow:auto')                                      # find table
    if table is None:
        print(f"no results found for year: {year}.")
        return None
    table_list = [td.text.strip() for td in table.find_all('td')]                        # loop trough tables list and get td tags
    states_data = []                                                                     # create list with numbers by states                                                                
    current_state = []   
    for element in table_list:
        if re.search(r'[a-zA-Z]', element):                                              # look for values with letters (states)
            if current_state:
                states_data.append(current_state)                                        # grouping states data into sublists   
            current_state = [element]  
        else:
            current_state.append(element)     
    if current_state:
        states_data.append(current_state)
    result_votes_year = {                                                              # create with dict with indexes
    entry[0]: {
        "Democratic Candidate": entry[2],
        "Republican Candidate": entry[5]
    }
    for entry in states_data if len(entry) > 5 }                                       # create data frame
    df_results_year = pd.DataFrame.from_dict(result_votes_year, orient='index')
    df_results_year.index.name = 'State'
    df_results_year.columns = ['Democratic Candidate', 'Republican Candidate']

    # comprehension to get national votes 
    df_results_year.loc['National'] = [value for value in (th.get_text(strip=True) for th in soup.find_all('th', style="text-align:right") if "%" in th.get_text(strip=True)) if value != '%'][:2]
    
    return df_results_year


In [44]:
table_scraper(2016)

Unnamed: 0_level_0,Democratic Candidate,Republican Candidate
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,36.57%,62.03%
[o],3947,"−120,068"
Alaska,42.77%,52.83%
[p],6904,"−36,173"
Arizona,49.36%,49.06%
Arkansas,34.78%,62.40%
California,63.48%,34.32%
Colorado,55.40%,41.90%
Connecticut,59.26%,39.19%
Delaware,58.74%,39.77%


In [41]:
table_scraper(2008)

Unnamed: 0_level_0,Democratic Candidate,Republican Candidate
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,813479,1266546
Alaska,123594,193841
Arizona,1034707,1230111
Arkansas,422310,638017
California,8274473,5011781
Colorado,1288633,1073629
Connecticut,997772,629428
Delaware,255459,152374
District of Columbia,245800,17367
Florida,4282074,4045624


In [None]:
# function works with "National votes" for 2016 & 2020
# without "National votes" for 2012
# also works for 2008 but takes the number of votes not percentages because the table is slightly different 