In [1]:
import pdfplumber
import spacy
import os

# Load spaCy's pre-trained model for NLP (detecting people's names)
nlp = spacy.load("en_core_web_sm")

# Function to extract District Court Judge candidates using NLP
def extract_judge_candidates_intelligently(pdf_path):
    candidates = []
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if "District Court Judge" in text:
                # Process the text with spaCy NLP model
                doc = nlp(text)
                
                # Use Named Entity Recognition (NER) to extract people's names
                for ent in doc.ents:
                    if ent.label_ == "PERSON":
                        candidates.append(ent.text.strip())
    return candidates

# Directory containing the PDFs
pdf_directory = "dir"

# Loop through all PDF files in the directory and extract candidates
all_candidates = {}

for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        pdf_path = os.path.join(pdf_directory, filename)
        candidates = extract_judge_candidates_intelligently(pdf_path)
        if candidates:
            all_candidates[filename] = candidates

# Display the extracted information
for filename, candidates in all_candidates.items():
    print(f"Candidates in {filename}:")
    for candidate in candidates:
        print(f" - {candidate}")





Candidates in ALAMANCE-20161108-Style001.pdf:
 - Alamance-Burlington
 - Jackie S. Cole
 - Brian Feeley
 - Michael R.
 - Mike
 - Morgan
 - Patsy Simpson
 - Robert H. (
 - Bob
 - Pamela Tyler Thompson
 - Steve A. Van Pelt
 - Phil Berger
 - Linda Stephens
 - Hunter Murphy
 - Margaret Eagles Barrett Brown
 - Donald Ray Buie
 - David Michael Spruill
 - Kezia Graham Workman
 - Bob Hunter
 - Abe Jones
 - Richard Dietz
 - Vince Rozier
 - Valerie Zachary
 - Rickye McKoy-Mitchell
 - Tom Lambeth
 - Steve Messick
 - Kathryn
Candidates in CHEROKEE-20161108-Style007-HANGING_DOG.pdf:
 - Richard K. Walker
 - Michael R.
 - Robert H. (
 - Bob
 - Johnny Shields
 - Phil Berger
 - Linda Stephens
 - Hunter Murphy
 - Margaret Eagles
 - Donald Ray Buie
Unaffiliated
 - Bob Hunter
 - Abe Jones
 - Richard Dietz
 - Vince Rozier
 - Valerie Zachary
 - Rickye McKoy-Mitchell
 - Monica Hayes Leslie
 - Tessa Shelton Sellers
 - Kimberly N. Carpenter
Candidates in GUILFORD-20161108-Style050.pdf:
 - |C.
 - Steve Troxler
 