In [2]:
import aiohttp
import os
from selectolax.parser import HTMLParser
from dotenv import load_dotenv
from typing import Optional
from pydantic import BaseModel, ConfigDict
from selectolax.parser import HTMLParser

OCSN_BASE_URL = "https://www.oscn.net"

load_dotenv()

class WebPage(BaseModel):
    url: str
    html: str
    parser: HTMLParser
    model_config = ConfigDict(arbitrary_types_allowed=True)


decisions_url = "https://www.oscn.net/decisions/ok/30"
headers = {
    "User-Agent": os.getenv("OSCN_USER_AGENT")
}
async def GET(url: str):
    async with aiohttp.ClientSession() as session:
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                html = await response.text(encoding="latin-1")
                parser = HTMLParser(html)
                return WebPage(url=url, html=html, parser=parser)
            else:
                print(f"Error: Status code {response.status}")
                return None

# Execute the async function
decisions_page = await GET(decisions_url)

In [3]:
from selectolax.parser import HTMLParser
from dataclasses import dataclass
from datetime import datetime
from pydantic import BaseModel

class CourtCase(BaseModel):
    style: str
    citation: str
    summary: str
    date_decided: datetime
    detail_url: str
    details: Optional[list[str]] = None



def parse_court_cases(page: WebPage) -> list[CourtCase]:
    cases = []

    # Find all case elements
    case_elements = page.parser.css("li.decision")

    for case in case_elements:
        # Get the case style (title)
        style_element = case.css_first("h4.style a")
        style = style_element.text() if style_element else ""

        # Get the detail URL
        detail_url = style_element.attributes.get("href", "") if style_element else ""

        # Get the citation
        citation_element = case.css_first("a.citation")
        citation = citation_element.text() if citation_element else ""

        # Get the summary (remove the paragraph number)
        summary_element = case.css_first("p.summaryParagraph")
        summary = (
            summary_element.text()
            .replace("¶0 ", "")
            .replace("¶ 0 ", "")
            .replace("¶ 1 ", "")
            if summary_element
            else ""
        )

        # Get and parse the date
        date_element = case.css_first("span.decidedDate")
        date_str = (
            date_element.text().strip().replace("Decided ", "") if date_element else ""
        )
        date_decided = datetime.strptime(date_str, "%m/%d/%Y") if date_str else None

        case_obj = CourtCase(
            style=style,
            citation=citation,
            summary=summary,
            date_decided=date_decided,
            detail_url=f"{OCSN_BASE_URL}{detail_url}",
        )
        cases.append(case_obj)

    return cases


cases = parse_court_cases(decisions_page)

In [4]:
for case in cases:
    print(f"\nCase: {case.json()}")
    print(case.detail_url)



Case: {"style":"CUMMINGS v. SASNETT","citation":"2025 OK 7","summary":"Years after his divorce was final, a firefighter retired and elected to participate in the Oklahoma Firefighters Pension and Retirement Plan B program. The firefighter's former spouse sought to enforce the parties' divorce decree, requiring the firefighter to pay her a portion of his Plan B benefits. The district court ruled in favor of the former spouse and ordered the firefighter to pay his former spouse a portion of the funds in the Plan B account. The Court of Civil Appeals reversed. This Court granted certiorari. We hold that when a vested firefighter selects the Plan B option post-divorce and the divorce decree does not specify the allocation of these funds, the Plan B account is divisible marital property to the extent that any funds in the account are attributable to the marital years.","date_decided":"2025-01-22T00:00:00","detail_url":"https://www.oscn.net/applications/oscn/deliverdocument.asp?citeid=54778

/var/folders/b8/7gv3b8555kz5nl38l_my03w80000gn/T/ipykernel_20059/1349751183.py:2: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  print(f"\nCase: {case.json()}")


In [5]:

async def parse_case_details(page: WebPage) -> Optional[list[str]]:
    paragraphs = [p.text().strip() for p in page.parser.css("p")]
    return paragraphs
    


for case in cases:
    case_page = await GET(case.detail_url)
    details = await parse_case_details(case_page)
    case.details = details
    print(details)


['CUMMINGS v. SASNETT2025 OK 7Case Number: 120418Decided: 01/22/2025THE SUPREME COURT OF THE STATE OF OKLAHOMA', 'NOTICE: THIS OPINION HAS NOT BEEN RELEASED FOR PUBLICATION. UNTIL RELEASED, IT IS SUBJECT TO REVISION OR WITHDRAWAL.', '', '', 'LUTHER GREGORY CUMMINGS, Respondent/Appellant,\nv.\nBRENDA SASNETT (FORMERLY CUMMINGS), Petitioner/Appellee.', 'ON CERTIORARI FROM THE COURT OF CIVIL APPEALS, DIVISION IV', "¶0 Years after his divorce was final, a firefighter retired and elected to participate in the Oklahoma Firefighters Pension and Retirement Plan B program. The firefighter's former spouse sought to enforce the parties' divorce decree, requiring the firefighter to pay her a portion of his Plan B benefits. The district court ruled in favor of the former spouse and ordered the firefighter to pay his former spouse a portion of the funds in the Plan B account. The Court of Civil Appeals reversed. This Court granted certiorari. We hold that when a vested firefighter selects the Plan B

In [6]:
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig  # Add this import
import torch

# Initialize model and tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 4-bit quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
)
# Check if MPS is available, otherwise fallback to CPU
device = "mps" if torch.backends.mps.is_available() else "cpu"


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    # quantization_config=quantization_config,
    torch_dtype=torch.float16,
    do_sample=True,
    device_map=device,
)
async def make_case_prompt(case: CourtCase) -> str:
    case_text = f"""
    Case: {case.style}
    Citation: {case.citation}
    Summary: {case.summary}
    Details: {' '.join(case.details if case.details else [])}
    """
    prompt = f"""Please provide a concise news article of the following court case:
        {case_text}
    """

    return prompt


async def summarize_case(case: CourtCase) -> str:
    # Create prompt
    prompt = await make_case_prompt(case)
    
    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decode and return the generated summary
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Remove the prompt from the response
    summary = summary[len(prompt):]
    
    return summary.strip()

# Generate summaries for each case
articles: list[str] = []
torch.mps.empty_cache()
for case in cases:
    try:
        summary = await summarize_case(case)
        # Clear MPS memory cache after each generation
        if device == "mps":
            torch.mps.empty_cache()
        print(f"\nAI Summary for {case.style}:")
        print(summary)
        articles.append(summary)
        print("-" * 80)
    except Exception as e:
        print(f"Error generating summary for {case.style}: {e}")
        print("-" * 80)

Loading checkpoint shards: 100%|██████████| 3/3 [00:19<00:00,  6.45s/it]



AI Summary for CUMMINGS v. SASNETT:
In this case, a firefighter named Luther Gregory Cummings retired and elected to participate in the Oklahoma Firefighters Pension and Retirement Plan B program. His former spouse, Brenda Sasnett, sought to enforce their divorce decree, which awarded her 50% of Cummings' benefits from his Oklahoma Firefighter Plan A retirement. However, the decree did not specify the allocation of Plan B benefits.

     The district court ruled in favor of Sasnett, ordering Cummings to pay her a portion of the funds in his Plan B account. The Court of Civil Appeals reversed this decision, holding that the Plan B account was not marital property.

     The Oklahoma Supreme Court granted certiorari and held that when a vested firefighter elects the Plan B option after a divorce is final and the divorce decree is silent as to the award of those Plan B funds, the Plan B account is divisible marital property to the extent that any funds in the account are attributable to 

In [7]:
import csv
from datetime import datetime

# Create a filename with current timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"court_summaries_{timestamp}.csv"

# Write articles to CSV
with open(filename, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['Article'])  # Header
    for article in articles:
        writer.writerow([article])

print(f"Saved {len(articles)} articles to {filename}")


Saved 5 articles to court_summaries_20250218_152917.csv


In [10]:
# Load Mistral model for podcast script generation
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from typing import List
import logging

def create_podcast_prompt(articles: list[str]) -> str:
    combined_articles = "\n\n".join(articles)
    return f"""Below are several news summaries about court cases. 
    Create a natural, engaging podcast script that covers these stories. Include an introduction and conclusion. 
    Make transitions between stories smooth and conversational.

News Summaries:
{combined_articles}

Create a podcast script that:
1. Has a brief introduction
2. Covers each story in an engaging way
3. Uses natural transitions between stories
4. Has a conclusion
5. Is written in a conversational tone

Podcast Script:"""

async def generate_podcast_script(articles: list[str]) -> str:
    # Initialize model and tokenizer
    model_id = "mistralai/Mistral-7B-Instruct-v0.2"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, torch_dtype=torch.float16, device_map="mps"
    )

    # Create prompt
    prompt = create_podcast_prompt(articles)

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=2048,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and return script
    script = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return script[len(prompt):].strip()

# Generate podcast script
print("\nGenerating podcast script...")
podcast_script = await generate_podcast_script(articles)

# Save script to file
script_filename = f"podcast_script_{timestamp}.txt"
with open(script_filename, 'w', encoding='utf-8') as f:
    f.write(podcast_script)

print(f"\nPodcast script saved to {script_filename}")
print("\nPodcast Script Preview:")
print("-" * 80)
print(podcast_script[:500] + "...")
print("-" * 80)


Generating podcast script...


Loading checkpoint shards: 100%|██████████| 3/3 [00:20<00:00,  6.78s/it]



Podcast script saved to podcast_script_20250218_152917.txt

Podcast Script Preview:
--------------------------------------------------------------------------------
[Intro music]

Host: (Cheerful voice) Welcome, listeners, to another episode of the Oklahoma Legal Roundup, the podcast where we bring you the latest news and developments in Oklahoma law. I'm your host, [Name], and today we've got some interesting cases for you. So let's dive right in.

[First Story: Firefighter Pension and Retirement Plan]

Host: (Excited tone) Our first story involves a dispute over firefighter pension benefits in Oklahoma. Luther Gregory Cummings, a retired firefighter, chos...
--------------------------------------------------------------------------------


In [13]:
from gtts import gTTS
from uuid import uuid4
import logging
# Generate audio using gTTS
output_path = f"podcasts/{uuid4()}.mp3"

logger = logging.getLogger(__name__)

try:
    logger.debug(f"Generating audio for script: {podcast_script[:100]}...")
    tts = gTTS(text=podcast_script, lang="en")
    tts.save(output_path)
    audio_status = f"Podcast audio generated and saved as {output_path}"
    logger.info(audio_status)
except Exception as e:
    audio_status = f"Error in audio generation: {str(e)}"
    logger.error(audio_status)

print(audio_status)

Podcast audio generated and saved as podcasts/7b42cf41-fce4-469e-8c6d-bfaba5898cba.mp3
