# Opinion Analysis

## Initialize and import

In [None]:
%load_ext autoreload
%autoreload

import json
from pathlib import Path

import dotenv
import jinja2
import pandas as pd

from src.apis.anthropic import claude_infer
from src.supreme_court import get_sc_data
from src.token_count import analyze_corpus_tokens

dotenv.load_dotenv()

SC_PATH = Path("data") / "sc_opinions.json"
sc_df = get_sc_data(SC_PATH)

CC_PATH = Path("data") / "NALUS.json"
cc_df = pd.read_json(CC_PATH)

## Compute tokens

In [None]:
print(f"Supreme court ({len(sc_df)} cases):")
analyze_corpus_tokens(sc_df.text.tolist())

print(f"\nConstitutional court ({len(cc_df)} cases):")
analyze_corpus_tokens(cc_df.text.tolist())

## Templates

In [None]:
template_name = "extract_acts_02.jinja2"
template_path = Path("templates") / template_name
template_str = template_path.read_text()
template = jinja2.Template(template_str)

In [None]:
rendered = template.render(court_opinion=sc_df.iloc[0].text)

result = claude_infer(rendered, top_k=1)
print(result)

In [None]:
result_parsed = json.loads(result)

In [None]:
import itertools
import re


def is_civil_code_reference(item):
    NEEDLES = "89/2012", "o. z.", "o.z."
    return any(needle in item.lower() for needle in NEEDLES) or re.search(
        r"občansk\w*\s+zákon\w*", item, re.IGNORECASE
    )


def filter_civil_code_references(items):
    predicates = [is_civil_code_reference(item) for item in items]
    return list(itertools.compress(items, predicates))


filtered = filter_civil_code_references(result_parsed)

In [None]:
from typing import Iterable


def extract_section_number(text: str):
    match = re.search(r'§\s*(\d+)', text)
    if match:
        return int(match.group(1))
    return None
  

In [None]:
[extract_section_number(item) for item in filtered]

## Gemini

In [None]:
import base64
import os
from google import genai
from google.genai import types


def generate(text):
    client = genai.Client(
        api_key=os.environ.get("GEMINI_API_KEY"),
    )

    model = "gemini-2.0-flash"
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text=text),
            ],
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        top_k=40,
        max_output_tokens=8192,
        response_mime_type="text/plain",
    )

    for chunk in client.models.generate_content_stream(
        model=model,
        contents=contents,
        config=generate_content_config,
    ):
        print(chunk.text, end="")


generate("napiš mi českou básničku, musí se rýmovat")

## OpenAI

In [None]:
from openai import OpenAI


def openai_infer(
    text: str,
    max_tokens: int = 4000,
    temperature: float = 1,
    mini: bool = False
) -> str:
    client = OpenAI()
    completion = client.chat.completions.create(
        model="gpt-4o-mini" if mini else "gpt-4",
        max_tokens=max_tokens,
        temperature=temperature,
        messages=[{
            "role": "user",
            "content": text
        }]
    )
    return completion.choices[0].message.content


print(openai_infer("Napiš mi básničku. Musí se rýmovat.", mini=False))