In [24]:
import os
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import random
import csv

In [4]:
# Importing csv with judge names and disclosure availability status
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
grandparent_dir = os.path.dirname(parent_dir)
data_dir = os.path.join(grandparent_dir, 'data/persons_positions/people_positions_disc_summary.csv')
print(data_dir)

/Users/eshan23/eshanprashar_git_profile/judges-conflicts/data/persons_positions/people_positions_disc_summary.csv


In [22]:
# Import the csv
df_jud_fin_status = pd.read_csv(data_dir)
df_jud_fin_status.head()

Unnamed: 0,person_id,judge_flag,position,status,disclosures_count,name_first,name_middle,name_last,political_affiliations,race
0,1,0,1,non-judge,,George,,Washington,f,w
1,2,0,1,non-judge,,John,,Adams,f,w
2,3,0,1,non-judge,,Thomas,,Jefferson,j,w
3,4,0,1,non-judge,,James,,Madison,j,w
4,5,0,1,non-judge,,James,,Monroe,j,w


In [23]:
# Created a new column called name and extract that in a list
df_jud_fin_status['name'] = df_jud_fin_status['name_first'] + ' ' + df_jud_fin_status['name_last']

# Filter people with judge_flag == 1 and disclosures_count == NaN
df_jud_fin_status_filtered = df_jud_fin_status[(df_jud_fin_status['judge_flag'] >= 1) & (df_jud_fin_status['disclosures_count'].isnull())]
judges = df_jud_fin_status_filtered['name'].to_list()
print(judges)
print(len(judges))

['William Harrison', 'William Taft', 'John Kennedy', 'Matthew Abruzzo', 'Marcus Acheson', 'James Ackerman', 'Jackson Adair', 'Arlin Adams', 'Elmer Adams', 'George Adams', 'George Adams', 'William Adamson', 'Jesse Adkins', 'Simon Adler', 'Robert Aguilar', 'Robert Ainsworth', 'Alexander Akerman', 'Bailey Aldrich', 'Edgar Aldrich', 'James Alesia', 'Charles Allen', 'Florence Allen', 'William Allen', 'Wayne Alley', 'Clarence Allgood', 'James Allred', 'James Almond', 'Samuel Alschuler', 'Frank Altimari', 'Richard Alvey', 'Charles Amidon', 'Albert Anderson', 'Aldon Anderson', 'George Anderson', 'Harry Anderson', 'J. Anderson', 'Robert Anderson', 'Thomas Anderson', 'Maurice Andrews', 'Alexis Angell', 'Herschel Arant', 'George Arceneaux', 'Robert Archbald', 'Thurman Arnold', 'Winston Arnow', 'Sidney Aronovitz', 'Alfred Arraj', 'Carl Atkins', 'George Atkinson', 'William Atwell', 'Anthony Augelli', 'Richard Austin', 'John Avis', 'Thomas Bailey', 'Francis Baker', 'John Baker', 'William Baker', 'Al

In [26]:
# Open the browser and register 
driver = webdriver.Chrome()
url = 'https://pub.jefs.uscourts.gov/#'
driver.get(url)
time.sleep(5)

In [27]:
# Search for each judge
def search_name(name):
    try:
        # Open the website
        driver.get("https://pub.jefs.uscourts.gov/#")

        # Locate the search input field and enter the name
        search_box = driver.find_element(By.ID, "srch-qry")
        search_box.clear()
        search_box.send_keys(name)
        search_box.send_keys(Keys.RETURN)

        # Wait for results to load
        time.sleep(5)

        # Check if there are no results
        no_results = driver.find_elements(By.ID, "no-results")
        if no_results:
            return None  # No results found

        # Initialize an empty list to store the results
        results = []

        # Extract the main result area
        while True:
            result_elements = driver.find_elements(By.CSS_SELECTOR, "#main_area_div .result-name, .result-metadata")

            for i in range(0, len(result_elements), 3):
                judge_name = result_elements[i].text
                position = result_elements[i + 1].text if (i + 1) < len(result_elements) else ""
                court_name = result_elements[i + 2].text if (i + 2) < len(result_elements) else ""
                results.append([name, judge_name, position, court_name])

            # Check if the "Load More" button exists
            try:
                load_more_button = driver.find_element(By.CLASS_NAME, "load-more-btn")
                load_more_button.click()
                time.sleep(3)  # Wait for more results to load
            except:
                break  # No more results

        return results

    except Exception as e:
        print(f"Error searching for {name}: {e}")
        return None

In [31]:
# Take a random sample of 200 names from the list
random.seed(42)
judges_sample = random.sample(judges, 1200)
print(judges_sample)

['Chad Firetag', 'Samuel West', 'Charles Davis', 'Michael Ryan', 'Allison Humphreys', 'Howard Fought', "Patrick O'Sullivan", 'Hubert Teitelbaum', 'Nancy Koba', 'Eudon Ferrell', 'Albert Reeves', 'Charles Wachob', 'Gordon Baranco', 'Jesse Eschbach', 'Thomas DuVal', 'William Sessions', 'Robin Hudson', 'James Drew', 'Donald Meloche', 'John Monterosso', 'Mark Delahay', 'Dan Grimmer', 'Albert Angstman', 'Danilo Lacayo', 'Leonard Marquez', 'Thomas Gray', 'Jamoa Moberly', 'David Aisenson', 'John Webb', 'Laurence Kay', 'Charles Koosed', 'Rhonda Wood', 'Charles Bellinger', 'James Hanna', 'Teresa Pearson', 'J. Beecher', 'Payne Breazeale', 'James Pilkinton', 'Thomas Kluczynski', 'John Leventhal', 'Robert Davis', 'George Taylor', 'Charles Schwartz', 'Eugene Phillips', 'James Smith', 'David Ishee', 'Rose Ledet', 'Terry Bork', 'Edward Gehl', 'Alexander Harvey', 'Bonnie Sudderth', 'H. Moore', 'Eric DuTemple', 'Fred Struckmeyer', 'Richard Elmore', 'Gunnar Nordbye', 'Craig Manson', 'Lester Roth', 'Eliza

In [32]:
# Open the CSV file to store the results
with open("judge_search_results.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Search Name", "Judge Name", "Position", "Court Name"])
    
    # Iterate over the list of names and search each
    for name in judges_sample:
        results = search_name(name)
        if results:
            writer.writerows(results)
        else:
            writer.writerow([name, "", "", ""])  # No results found

# Close the browser once done
driver.quit()

Error searching for Russell Kussman: Message: disconnected: not connected to DevTools
  (failed to check if window was closed: disconnected: not connected to DevTools)
  (Session info: chrome=129.0.6668.103)
Stacktrace:
0   chromedriver                        0x0000000102bcc500 cxxbridge1$str$ptr + 1917112
1   chromedriver                        0x0000000102bc4890 cxxbridge1$str$ptr + 1885256
2   chromedriver                        0x00000001027d4538 cxxbridge1$string$len + 89424
3   chromedriver                        0x00000001027be414 core::str::slice_error_fail::hbaf5d05fe3921cd2 + 63828
4   chromedriver                        0x00000001027be354 core::str::slice_error_fail::hbaf5d05fe3921cd2 + 63636
5   chromedriver                        0x00000001028525e0 cxxbridge1$string$len + 605688
6   chromedriver                        0x000000010280d374 cxxbridge1$string$len + 322444
7   chromedriver                        0x000000010280dfc4 cxxbridge1$string$len + 325596
8   chromedriver 