In [10]:
import requests
import os
import re
import glob


In [11]:
def find_lowest_year(root_dir):
    min_year = float('inf')
    pattern = os.path.join(root_dir, '**/*-Ohio-*.pdf')

    for filename in glob.glob(pattern, recursive=True):
        parts = os.path.basename(filename).split('-')
        if parts and parts[0].isdigit():
            year = int(parts[0])
            min_year = min(min_year, year)

    return min_year if min_year != float('inf') else None

# # Replace 'your_directory_path' with the path to the root directory containing the subdirectories
# root_directory = '/Users/deantaylor/ohio_case_scrape'
# year = find_lowest_year(root_directory)
# print(f"The lowest year is: {year}")


In [14]:
def find_next_case_number(directory, year):
    highest_case_number = 0
    year = find_lowest_year(directory)
    # Regular expression to extract the case number
    pattern = re.compile(rf"{year}-Ohio-(\d+)\.pdf")

    # Iterate through all files in all subdirectories
    for root, dirs, files in os.walk(directory):
        for file in files:
            match = pattern.match(file)
            if match:
                case_number = int(match.group(1))
                if case_number > highest_case_number:
                    highest_case_number = case_number

    # Increment the highest case number by one
    return highest_case_number + 1

# Usage example
directory_path = '/Users/deantaylor/ohio_case_scrape'
year = find_lowest_year(directory_path)
case_no = find_next_case_number(directory_path, year)
print(f"Next Case Number: {year}-Ohio-{case_no}")


Next Case Number: 2020-Ohio-160


In [15]:
def download_pdf(district, case_number, year):
    url = f"https://www.supremecourt.ohio.gov/rod/docs/pdf/{district}/{year}/{year}-Ohio-{case_number}.pdf"
    # url = base_url.format(district=district, case_number=case_number)
    response = requests.get(url)
    
    # Check if the response is successful (PDF exists)
    if response.status_code == 200:
        folder_name = f"District_{district}"
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)

        file_path = os.path.join(folder_name, f"{str(year)}-Ohio-{case_number}.pdf")
        with open(file_path, 'wb') as file:
            file.write(response.content)
        return True
    return False

# Main loop to iterate through districts and case numbers
error = 0
case_number = find_next_case_number(directory_path, year)
# case_number = 3099
while True:
    for district in range(1, 13):
        if download_pdf(district, case_number, year):
            print(f"Downloaded: District {district} - {year}-Ohio-{case_number}.pdf")
            case_number += 1
            error=0
            break
    else:
        # Break the while loop if the case number is not found in any district
        print(f"Case number {case_number} not found in any district.")
        error +=1
        if error > 20:
            print(f'done with year {year}')
            # break
            year -= 1
            #write year to to year.txt file
            with open('year.txt', 'w') as f:
                f.write(str(year))
            error = 0
            case_number = 1
            continue
        case_number += 1
        print(f'error count: {error}')
        continue

Downloaded: District 11 - 2020-Ohio-160.pdf
Downloaded: District 11 - 2020-Ohio-161.pdf
Downloaded: District 11 - 2020-Ohio-162.pdf
Downloaded: District 11 - 2020-Ohio-163.pdf
Downloaded: District 1 - 2020-Ohio-164.pdf
Downloaded: District 10 - 2020-Ohio-165.pdf
Downloaded: District 10 - 2020-Ohio-166.pdf
Case number 167 not found in any district.
error count: 1
Case number 168 not found in any district.
error count: 2
Case number 169 not found in any district.
error count: 3
Case number 170 not found in any district.
error count: 4
Downloaded: District 9 - 2020-Ohio-171.pdf
Downloaded: District 5 - 2020-Ohio-172.pdf
Downloaded: District 5 - 2020-Ohio-173.pdf
Downloaded: District 5 - 2020-Ohio-174.pdf
Downloaded: District 5 - 2020-Ohio-175.pdf
Downloaded: District 5 - 2020-Ohio-176.pdf
Downloaded: District 5 - 2020-Ohio-177.pdf
Downloaded: District 6 - 2020-Ohio-178.pdf
Case number 179 not found in any district.
error count: 1
Case number 180 not found in any district.
error count: 2
D

KeyboardInterrupt: 