In [3]:
import os
from datetime import datetime
from pathlib import Path

from openai import OpenAI
import instructor
from pydantic import BaseModel, Field

In [5]:
from dotenv import load_dotenv

In [7]:
load_dotenv()

True

In [8]:
# ---------------------------------------------------------------------
# 1.  Endpoint client
# ---------------------------------------------------------------------
client = OpenAI(
    api_key=os.environ.get("BASE10_API_KEY", "YOUR_API_KEY"),
    base_url="https://model-7qr7px53.api.baseten.co/environments/production/sync/v1",
)

# Wrap client so responses are automatically parsed
client = instructor.from_openai(client, mode=instructor.Mode.MD_JSON)

In [None]:
# ---------------------------------------------------------------------
# 2.  Design an appropriate system prompt for these tasks
# ---------------------------------------------------------------------

In [None]:
# TODO: Improve the system prompt for better performance on legal documents
SYSTEM_PROMPT = """You are an expert legal transcript analyzer"""

In [None]:
# ---------------------------------------------------------------------
# 3.  Load the transcript
# ---------------------------------------------------------------------

In [16]:
RAW_TRANSCRIPT = open('transcript.txt').read()

In [17]:
# ---------------------------------------------------------------------
# 3.  Summarize the transcript (example)
# ---------------------------------------------------------------------

In [18]:
USER_PROMPT_TEMPLATE="""
Summarize this transcript
{raw_transcript}
"""

In [20]:
class TranscriptSummary(BaseModel):
    summary: str = Field(..., description="Transcript summary")

In [25]:
summary_response, raw_summary_completion = client.chat.completions.create_with_completion(
    model="qwen-3",
    response_model=TranscriptSummary,
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT_TEMPLATE.format(raw_transcript=RAW_TRANSCRIPT)},
    ],
    temperature=0.6,
)

In [39]:
print(summary_response.summary)

The transcript records a deposition led by Terry Seligman, representing plaintiff Moonlight Plaza Associates, examining Chris Jacob, a senior environmental project manager at Cancun Farms. Jacob confirms her role involves overseeing environmental remediation of spill sites through hired consultants, though she does not directly manage day-to-day activities. She reviews consultants' documents, discusses remediation scope, and ensures compliance with state regulations for closure. Jacob clarifies that leases are not considered in remediation planning. The deposition includes technical setup discussions, document reviews (marked as Cancun Farms 1 and 2), and ongoing interrogatory responses when the transcript ends abruptly.


In [None]:
# This model also includes reasoning tokens. Do you agree with it's reasoning?

In [37]:
print(raw_summary_completion.choices[0].message.reasoning_content)

Okay, so I need to summarize this transcript. Let me start by reading through it carefully.

The transcript starts with Terry Seligman from the law firm Richmond and Levine, P.C., representing the plaintiff Moonlight Plaza Associates. He's addressing Ms. Jacob, who's the deponent from Cancun Farms. There's some initial setup about technical issues and instructions for the deposition. Then, after confirming she's not under any influence, they start discussing the documents. 

The first document is Cancun Farms 1. Ms. Jacob is a senior environmental project manager who oversees spill remediation. She explains that they hire consultants for daily tasks but she reviews their documents and discusses the scope. There's some back and forth about whether leases affect remediation scope, and she says no because she doesn't review leases. The main goal is closing spills per regulations. 

Then they move to Cancun Farms 2, but the transcript cuts off while discussing interrogatory number four. 



In [None]:
# ---------------------------------------------------------------------
# 4.  Classify the type of legal proceeding
# ---------------------------------------------------------------------

In [None]:
# TODO: Design a prompt to classify the type of legal proceeding

In [41]:
USER_PROMPT_TEMPLATE="""
________
{raw_transcript}
"""

In [None]:
# TODO: Add attributes to a Pydantic model for the classification response

In [40]:
class LegalProceeding(BaseModel):
    pass

In [None]:
proceeding_response, raw_proceeding_completion = client.chat.completions.create_with_completion(
    model="qwen-3",
    response_model=TranscriptSummary,
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT_TEMPLATE.format(raw_transcript=RAW_TRANSCRIPT)},
    ],
    temperature=0.6,
)

In [None]:
# ---------------------------------------------------------------------
# 5.  Identify each speaker listed
# ---------------------------------------------------------------------

In [None]:
# TODO: Design a prompt to identify each speaker listed (Speaker A, Speaker B, etc.)

In [None]:
USER_PROMPT_TEMPLATE="""
________
{raw_transcript}
"""

In [None]:
class Speakers(BaseModel):
    pass

In [None]:
# TODO: Add attributes to a Pydantic model for the speaker response

In [None]:
speakers_response, raw_speakers_completion = client.chat.completions.create_with_completion(
    model="qwen-3",
    response_model=TranscriptSummary,
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT_TEMPLATE.format(raw_transcript=RAW_TRANSCRIPT)},
    ],
    temperature=0.6,
)

In [None]:
# ---------------------------------------------------------------------
# 5.  Identify sections of cross-talk
# ---------------------------------------------------------------------

In [None]:
# TODO: Design a prompt to identify sections of cross-talk

In [None]:
USER_PROMPT_TEMPLATE="""
________
{raw_transcript}
"""

In [None]:
# TODO: Design the Pydantic model for the cross-talk response

In [None]:
class CrossTalkAnalysis(BaseModel):
    pass

In [None]:
crosstalk_response, raw_crosstalk_completion = client.chat.completions.create_with_completion(
    model="qwen-3",
    response_model=TranscriptSummary,
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT_TEMPLATE.format(raw_transcript=RAW_TRANSCRIPT)},
    ],
    temperature=0.6,
)