# U.S. Elections 2024

This is a scraper for (unofficial) election night results from the [New York City Board of Elections](https://enr.boenyc.gov/index.html), specifically for election districts in the South Bronx. 

In [1]:
# importing libraries
import pandas as pd
from bs4 import BeautifulSoup
import requests
from random import randrange
import time
from io import StringIO

### Assembly districts

In [2]:
# fetch URL

# main link for the Bronx assembly districts
url = "https://enr.boenyc.gov/CD26825AD2.html" 
response = requests.get(url)
response

<Response [200]>

In [3]:
soup = BeautifulSoup(response.text, "html.parser")

### Targeting links for each of the assembly districts

In [4]:
assembly_district = soup.find_all("table", class_="underline")
targets = soup.find_all("td")
len(targets)

175

In [5]:
# initializing
base_url = "https://enr.boenyc.gov/"
assembly_districts_links = [] # holds links to each assembly district
assembly_districts_num = [] # holds assembly district number

for district in targets:
    try:
        if district != None and "AD" in district.find("a").get("title"):
            num = district.find("a").get_text()
            link = base_url + district.find("a").get("href")
            assembly_districts_num.append(num)
            assembly_districts_links.append(link)
    except:
        pass

# convert to df
assembly_districts_df = pd.DataFrame(zip(assembly_districts_num, assembly_districts_links),
                  columns=["assembly_district", "link"])
assembly_districts_df

Unnamed: 0,assembly_district,link
0,AD 77,https://enr.boenyc.gov/CD26825AD772.html
1,AD 77,https://enr.boenyc.gov/CD26825AD772.html
2,AD 77,https://enr.boenyc.gov/CD26825AD772.html
3,AD 78,https://enr.boenyc.gov/CD26825AD782.html
4,AD 79,https://enr.boenyc.gov/CD26825AD792.html
5,AD 80,https://enr.boenyc.gov/CD26825AD802.html
6,AD 81,https://enr.boenyc.gov/CD26825AD812.html
7,AD 82,https://enr.boenyc.gov/CD26825AD822.html
8,AD 83,https://enr.boenyc.gov/CD26825AD832.html
9,AD 84,https://enr.boenyc.gov/CD26825AD842.html


In [6]:
assembly_districts_df.to_csv("bx-assembly districts.csv", encoding="UTF-8", index=False)

## Targeting links for the each of the electoral districts

In [7]:
# initializing headers
header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "none",
        "Sec-Fetch-User": "?1"
}

In [8]:
# initializing a snoozer, to be safe
def snoozer(start_time, end_time):
    '''
    This function creates a snoozer that can be used when scraping.
    It requires `from random import randrange` and `import time`. 
    
    Parameters: 
    start_time (int) = start time of range, in seconds
    end_time (int) = end time of range, in seconds
    '''
    timer = randrange(start_time, end_time)
    print(f"Snoozing for {timer} seconds...")
    time.sleep(timer)

In [9]:
# downloading tables

counter = 0
errors_list = []
all_dfs = []

for ad_num, target_url in zip(assembly_districts_num, assembly_districts_links):
    counter += 1
    print(f"Fetching table {counter} out of {len(assembly_districts_num)} from {ad_num}...")
    
    # request each of the urls
    response = requests.get(target_url, headers=header)
    if 200 <= response.status_code < 400:
        try:
            # get all dfs per url
            # temp_dfs = pd.read_html(response.text)
            html_content = StringIO(response.text)
            temp_dfs = pd.read_html(html_content)

            if len(temp_dfs) > 2:
                df = temp_dfs[2]
    
                # drop unnecessary columns and rows                
                df = df.drop(columns=[2, 4, 6, 8, 10, 12])\
                .drop([0, 1])\
                .drop(df.index[-1])\
                .reset_index(drop=True)
        
                # define new columns
                df.columns = [
                    "electoral_district", "reported", "Harris-Walz_Democratic",
                    "Trump-Vance_Republican", "Trump-Vance_Conservative", 
                    "Harris-Walz_Working_Families", "write_in"
                ]

                # add new column for assembly district
                df["assembly_district"] = ad_num
   
        except Exception as e:
            print(f"Error fetching {target_url}: {e}")
            errors_list.append(e)

    all_dfs.append(df)

    if counter <= len(assembly_districts_num) - 1: # so it does not snooze when the downloads are all done
        snoozer(12, 34)
    print("") # adds a line break per item downloaded    

print("Done!")

Fetching table 1 out of 13 from AD   77...
Snoozing for 14 seconds...

Fetching table 2 out of 13 from AD   77...
Snoozing for 18 seconds...

Fetching table 3 out of 13 from AD   77...
Snoozing for 25 seconds...

Fetching table 4 out of 13 from AD   78...
Snoozing for 24 seconds...

Fetching table 5 out of 13 from AD   79...
Snoozing for 24 seconds...

Fetching table 6 out of 13 from AD   80...
Snoozing for 19 seconds...

Fetching table 7 out of 13 from AD   81...
Snoozing for 29 seconds...

Fetching table 8 out of 13 from AD   82...
Snoozing for 17 seconds...

Fetching table 9 out of 13 from AD   83...
Snoozing for 29 seconds...

Fetching table 10 out of 13 from AD   84...
Snoozing for 23 seconds...

Fetching table 11 out of 13 from AD   85...
Snoozing for 16 seconds...

Fetching table 12 out of 13 from AD   86...
Snoozing for 17 seconds...

Fetching table 13 out of 13 from AD   87...

Done!


In [10]:
len(all_dfs)

13

In [11]:
all_dfs

[   electoral_district reported Harris-Walz_Democratic Trump-Vance_Republican  \
 0                ED 1   99.00%                    371                    168   
 1                ED 2   99.00%                    503                    200   
 2                ED 3   99.00%                    471                    144   
 3                ED 4   83.33%                     94                     27   
 4                ED 5   99.00%                    470                    178   
 ..                ...      ...                    ...                    ...   
 58              ED 59   99.00%                      0                      0   
 59              ED 60   99.00%                      0                      0   
 60              ED 61   99.00%                      0                      0   
 61              ED 62   99.00%                      0                      0   
 62              ED 63   99.00%                      0                      0   
 
    Trump-Vance_Conservati

In [12]:
results = pd.concat(all_dfs, axis=0)
results

Unnamed: 0,electoral_district,reported,Harris-Walz_Democratic,Trump-Vance_Republican,Trump-Vance_Conservative,Harris-Walz_Working_Families,write_in,assembly_district
0,ED 1,99.00%,371,168,7,26,3,AD 77
1,ED 2,99.00%,503,200,7,17,6,AD 77
2,ED 3,99.00%,471,144,18,12,2,AD 77
3,ED 4,83.33%,94,27,2,6,2,AD 77
4,ED 5,99.00%,470,178,11,11,2,AD 77
...,...,...,...,...,...,...,...,...
54,ED 55,99.00%,448,166,13,18,8,AD 87
55,ED 56,99.00%,4,3,0,1,0,AD 87
56,ED 57,99.00%,0,0,0,0,0,AD 87
57,ED 58,99.00%,0,0,0,0,0,AD 87


In [13]:
results.to_csv("electoral_district_votes.csv", encoding="UTF-8", index=False)