In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service

In [2]:
driver=webdriver.Chrome()

driver.get("https://alamedacountyca.gov/rov_app/vcalist")

webpage = driver.page_source

driver.quit()

In [3]:
soup = BeautifulSoup(webpage,'html.parser') #structure the html that can be found 

In [4]:
span_holding_all_tables = soup.find('span',{"id":"pollingLocationsDiv"})

df = pd.DataFrame(columns=["ID", "Locatoin", "Address","Voting Days"])

# Iterate through each table within the span
for span_table in span_holding_all_tables.find_all("table"):
    tbody = span_table.find("tbody")
    if tbody:  # Ensure tbody exists
        rows = tbody.find_all("tr")  # Get all rows
        
        for row in rows:
            cells = row.find_all("td")  # Get all cells in the row
            if len(cells) >= 4:  # Ensure there are enough cells
                id_ = cells[0].text.strip()
                location = cells[1].text.strip()
                address = cells[2].find("a").text.strip() if cells[2].find("a") else cells[2].text.strip()
                voting_days = cells[3].text.strip()
                
                # Append the data to the DataFrame
                df = pd.concat([df, pd.DataFrame({"ID": [id_],
                                                  "Location": [location],
                                                  "Address": [address],
                                                  "Voting Days": [voting_days]})], ignore_index=True)

# Display the resulting DataFrame
print(df)

                ID Locatoin                                   Address   
0   Precinct000021      NaN               2152 Central Avenue Alameda  \
1   Precinct000022      NaN                 1815 Union Street Alameda   
2   Precinct000023      NaN                  1912 Central Ave Alameda   
3   Precinct000024      NaN                      2130 Otis Dr Alameda   
4   Precinct000025      NaN  555 Ralph Appezzato Memorial Pwy Alameda   
..             ...      ...                                       ...   
95  Precinct000019      NaN              945 Paseo Grande San Lorenzo   
96  Precinct000020      NaN              33520 Western Ave Union City   
97  Precinct000098      NaN          31224 Union City Blvd Union City   
98  Precinct000099      NaN                       703 C St Union City   
99  Precinct000100      NaN              2801 Hop Ranch Rd Union City   

                                  Voting Days   
0    Availability\n\t\t\t\t\t\t\t\t\t\t4-Days  \
1    Availability\n\t\t\t

In [5]:
# get rid of useless words
df["Voting Days"] = df["Voting Days"].str.replace(r'\s+', ' ', regex=True).str.strip()


df["Location"] = df["Location"].str.replace(r'\s+', ' ', regex=True).str.strip()


df["Address"] = df["Address"].str.replace(r'\s+', ' ', regex=True).str.strip()

print(df)

                ID Locatoin                                   Address   
0   Precinct000021      NaN               2152 Central Avenue Alameda  \
1   Precinct000022      NaN                 1815 Union Street Alameda   
2   Precinct000023      NaN                  1912 Central Ave Alameda   
3   Precinct000024      NaN                      2130 Otis Dr Alameda   
4   Precinct000025      NaN  555 Ralph Appezzato Memorial Pwy Alameda   
..             ...      ...                                       ...   
95  Precinct000019      NaN              945 Paseo Grande San Lorenzo   
96  Precinct000020      NaN              33520 Western Ave Union City   
97  Precinct000098      NaN          31224 Union City Blvd Union City   
98  Precinct000099      NaN                       703 C St Union City   
99  Precinct000100      NaN              2801 Hop Ranch Rd Union City   

             Voting Days                                          Location  
0    Availability 4-Days      Location Califor

In [6]:
# Remove the redundant "Location " from the start of each value in the Location column
df['Location'] = df['Location'].str.replace(r'^Location\s*', '', regex=True)

df['Voting Days'] = df['Voting Days'].str.replace(r'^Availability\s*', '', regex=True)

In [7]:
print(df)

                ID Locatoin                                   Address   
0   Precinct000021      NaN               2152 Central Avenue Alameda  \
1   Precinct000022      NaN                 1815 Union Street Alameda   
2   Precinct000023      NaN                  1912 Central Ave Alameda   
3   Precinct000024      NaN                      2130 Otis Dr Alameda   
4   Precinct000025      NaN  555 Ralph Appezzato Memorial Pwy Alameda   
..             ...      ...                                       ...   
95  Precinct000019      NaN              945 Paseo Grande San Lorenzo   
96  Precinct000020      NaN              33520 Western Ave Union City   
97  Precinct000098      NaN          31224 Union City Blvd Union City   
98  Precinct000099      NaN                       703 C St Union City   
99  Precinct000100      NaN              2801 Hop Ranch Rd Union City   

   Voting Days                                 Location  
0       4-Days      California Historical Radio Society  
1      

In [8]:
# Drop the incorrect "Locatoin" column
df.drop(columns=["Locatoin"], inplace=True)

# Verify the result
print(df.head())

               ID                                   Address Voting Days   
0  Precinct000021               2152 Central Avenue Alameda      4-Days  \
1  Precinct000022                 1815 Union Street Alameda      4-Days   
2  Precinct000023                  1912 Central Ave Alameda      4-Days   
3  Precinct000024                      2130 Otis Dr Alameda      4-Days   
4  Precinct000025  555 Ralph Appezzato Memorial Pwy Alameda      4-Days   

                              Location  
0  California Historical Radio Society  
1            Bohol Circle Incorporated  
2     1st Congregational Ch Of Alameda  
3                   South Shore Center  
4                   College Of Alameda  


In [10]:
df = pd.DataFrame(df)

In [11]:
df

Unnamed: 0,ID,Address,Voting Days,Location
0,Precinct000021,2152 Central Avenue Alameda,4-Days,California Historical Radio Society
1,Precinct000022,1815 Union Street Alameda,4-Days,Bohol Circle Incorporated
2,Precinct000023,1912 Central Ave Alameda,4-Days,1st Congregational Ch Of Alameda
3,Precinct000024,2130 Otis Dr Alameda,4-Days,South Shore Center
4,Precinct000025,555 Ralph Appezzato Memorial Pwy Alameda,4-Days,College Of Alameda
...,...,...,...,...
95,Precinct000019,945 Paseo Grande San Lorenzo,11-Days,Df Wesleyan Church Of Tongan In America
96,Precinct000020,33520 Western Ave Union City,11-Days,Union City Apostolic Church
97,Precinct000098,31224 Union City Blvd Union City,4-Days,The Mark Green Sports Center
98,Precinct000099,703 C St Union City,4-Days,Our Lady Of The Rosary Church


In [12]:
# Export the DataFrame to a CSV file
df.to_csv('precincts_data.csv', index=False)

print("CSV file has been saved as 'precincts_data.csv'")

CSV file has been saved as 'precincts_data.csv'
