In [28]:
! pip install -q ollama openai python-dotenv instructor jsonschema requests dotmap

# Import ollama and verify it works


In [1]:
# load OPENAI_API_KEY from environment variables
import os
from dotenv import load_dotenv
import requests

load_dotenv()

# load OPENAI_API_KEY from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")

print(openai_api_key[-5:])


fJjIA


In [230]:
from openai import OpenAI

import instructor

clients = {
  'openai': OpenAI(api_key=openai_api_key),
  'ollama': OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
}
# models = ['llama3.1', 'gpt-4o-mini']
models = ['llama3.1', 'gpt-4o-mini', 'gpt-4o-2024-08-06']
# models = ['gpt-4o-mini']


model2client = {
  'aya': 'ollama',
  'llama3.1': 'ollama',
  'gpt-4o-mini': 'openai',
  'gpt-4o-2024-08-06': 'openai'
}

def get_client(model_name):
  return clients[model2client[model_name]]

# Load the data

In [306]:
import re
from typing import List, Literal
from pydantic import BaseModel, Field


class Case(BaseModel):
    mask: str = Field(...)
    grammatical_cases: List[Literal[
        "nominative",
        "genitive",
        "dative",
        "accusative",
        "instrumental",
        "locative",
        "vocative"
    ]]


class Gender(BaseModel):
    grammatical_gender: Literal["masculine", "feminine", "any"]
    mask: str = Field(...)


class GendersWithCasesSchema(BaseModel):
    cases: List[Case]
    genders: List[Gender]


def parse_text_to_genders_with_cases(text: str) -> GendersWithCasesSchema:
    """
    Scans for patterns of the form:
      [<mask>|<grammatical_gender>|<grammatical_case>]
    in the given text, and returns a structure with:
      - a 'cases' list, each containing a 'mask' and a list of 'grammatical_cases'
      - a 'genders' list, each containing a 'mask' and a 'grammatical_gender'
    """

    # Regex that captures three groups:
    #   1) The mask (any text without '|' or ']')
    #   2) The gender (masculine|feminine)
    #   3) The case (nominative|genitive|...|vocative)
    pattern = r"\[([^\|\]]+)\|(masculine|feminine|any)\|(nominative|genitive|dative|accusative|instrumental|locative|vocative)\]"
    matches = re.findall(pattern, text)

    # We'll accumulate grammatical cases by mask
    mask_to_cases: dict[str, List[str]] = {}
    # We'll store a single gender per mask
    mask_to_gender: dict[str, str] = {}

    for mask, gender, grammatical_case in matches:
        # Append the grammatical case to the list for this mask
        if mask not in mask_to_cases:
            mask_to_cases[mask] = []
        mask_to_cases[mask].append(grammatical_case)

        # Record the gender if we haven't seen this mask yet.
        # (Assuming repeated masks always have the same gender.)
        if mask not in mask_to_gender:
            mask_to_gender[mask] = gender

    # Build Case objects from aggregated data
    all_cases: List[Case] = []
    for mask, cases in mask_to_cases.items():
        all_cases.append(
            Case(
                mask=mask,
                grammatical_cases=cases
            )
        )

    # Build Gender objects
    all_genders: List[Gender] = [
        Gender(
            mask=mask,
            grammatical_gender=gender,
        )
        for mask, gender in mask_to_gender.items()
    ]

    return GendersWithCasesSchema(cases=all_cases, genders=all_genders)




In [310]:

test_data_folder = "./court_cases/"
true_data_folder = "./court_cases_labeled/"

# Read test data
import os

test_data = []
for file in os.listdir(test_data_folder):
    with open(test_data_folder + file, 'r') as f:
        test_data.append(f.read())

# Read true data
true_data = []
for file in os.listdir(true_data_folder):
    with open(true_data_folder + file, 'r') as f:
        true_data.append(f.read())

assert len(test_data) == len(true_data)
assert len(test_data) > 0
print(true_data[0][:100])


# Parse true data
true_labels = {}

for idx, data in enumerate(true_data):
    true_labels[idx] = parse_text_to_genders_with_cases(data)

assert len(true_labels) == len(true_data)

print(true_labels[0])


Справа № 685/5/24
Провадження № 2/685/77/24 

У Х В А Л А
(про залишення позовної заяви без руху)

 
cases=[Case(mask='ОСОБА_1', grammatical_cases=['genitive', 'genitive', 'genitive']), Case(mask='ОСОБА_2', grammatical_cases=['genitive', 'genitive', 'genitive']), Case(mask='ОСОБА_3', grammatical_cases=['genitive']), Case(mask='ОСОБА_4', grammatical_cases=['nominative']), Case(mask='ОСОБА_5', grammatical_cases=['nominative'])] genders=[Gender(grammatical_gender='feminine', mask='ОСОБА_1'), Gender(grammatical_gender='feminine', mask='ОСОБА_2'), Gender(grammatical_gender='masculine', mask='ОСОБА_3'), Gender(grammatical_gender='feminine', mask='ОСОБА_4'), Gender(grammatical_gender='feminine', mask='ОСОБА_5')]


In [294]:
from ollama import chat
import json
from dotmap import DotMap


def predict(model, messages, schema):
    if model2client[model] == 'openai': 
        client = get_client(model)
        response = client.beta.chat.completions.parse(
            model=model,
            messages=messages,
            response_format=schema,
        )
        response = schema.model_validate_json(response.choices[0].message.content)
    elif model2client[model] == 'ollama':
        response = chat(
            messages=messages,
            model=model,
            format=schema.model_json_schema()
        )
        response = schema.model_validate_json(response['message']['content'])


    return response

In [295]:
# extract to utils.py
def resolve_fstrings(messages, context):
    """
    Resolves f-strings in a list of messages using a context dictionary.

    :param messages: List of message dictionaries (e.g., [{"role": "system", "content": "..."}]).
    :param context: Dictionary containing the variables for f-string substitution.
    :return: List of resolved message dictionaries.
    """
    resolved_messages = []
    for message in messages:
        resolved_message = {
            key: eval(f"f'''{value}'''", {}, context) if isinstance(value, str) else value
            for key, value in message.items()
        }
        resolved_messages.append(resolved_message)
    return resolved_messages

assert resolve_fstrings([{"role": "system", "content": "Hello, {name}!"}], {"name": "world"}) == [{"role": "system", "content": "Hello, world!"}]


In [296]:
def run_prediction(template, schema, limit=None, verbose=False):
    y_pred = {}

    for model in models:
        y_pred[model] = {}

    for model in models:
        y_pred[model] = {}
        for idx, data in enumerate(test_data if limit is None else test_data[:limit]):
            # entities = [(entity + ' occurres ' + str(len(true_labels[idx][entity]['grammatical_cases'])) + ' time(s)')  for entity in true_labels[idx].keys()]
            entities = [entity.mask for entity in true_labels[idx].genders]
            
            data = {
                "entities": ', '.join(entities),
                "text": data
            }


            global messages
            messages = resolve_params(template, data)

            if verbose:
                print(messages)

            response = predict(model, resolve_params(template, data), schema)

            y_pred[model][idx] = response
    
    return y_pred

In [297]:
y_pred = run_prediction(oneshot_messages, GendersWithCasesSchema, limit=1, verbose=True)

[{'role': 'system', 'content': '\n        You are a professional linguist trained to determine the grammatical gender and grammatical case of entities in a Ukrainian text.\n\n        You will receive:\n        1. A list of entities and the number of their occurrences in the text.\n        2. A text in Ukrainian containing an entity labeled as \'ОСОБА_\'.\n\n        Your task is to identify the grammatical gender and case of this entity, considering:\n        1. The context of the text. E.g., a person can be a parent, verbs can have endings that indicate the gender.\n        2. Interactions with other entities.\n        3. Dependencies on other words and entities.\n        4. Words surrounding the entity.\n\n        Possible grammatical genders: \'masculine\', \'feminine\', \'any\'. Any means that you can\'t determine but it can be either masculine or feminine.\n        Possible cases:\n        1. \'nominative\' for the subject. Answers the question \'хто? що?\'\n        2. \'genitive\'

In [298]:
zeroshot_messages = [
    {
      "role": "system",
      "content": '''
        You are a professional linguist trained to determine the grammatical gender and grammatical case of entities in a Ukrainian text.

        You will receive:
        1. a list of entities and with number of their occurrences in the text.
        2. a text in Ukrainian containing an entity labeled as 'ОСОБА_'.

        Your task is to identify the grammatical gender and case of this entity, considering:
        1. The context of the text. E.g. a person can be a parent, verbs can have endings that indicates the gender.
        2. Interactions with other entities.
        3. Dependencies on other words and entities.
        4. Words surrounding the entity.

        Possible grammatical genders: 'masculine', 'feminine', 'any'. Any means that you can't determine but it can be either masculine or feminine.
        Possible cases:
        1. 'nominative' for the subject. Answers the question 'хто? що?'
        2. 'genitive' for possession. Answers the question 'кого? чого?'
        3. 'dative' for the indirect object. Answers the question 'кому? чому?'
        4. 'accusative' for the direct object. Answers the question 'кого? що?'
        5. 'instrumental' for the means. Answers the question 'ким? чим?'
        6. 'locative' for location. Answers the question 'на кому? на чому?'
        7. 'vocative' for addressing someone. 

        Rules:
        1. Make sure to cover all occurrences of the entity in the text.
        2. Group occurrences by their mask.
        
        Use these hints to determine the entity’s grammatical gender and case in all its occurrences.

        Ensure to cover all entities and their occurrences in the text.
      '''
    },
    {
      "role": "user",
      "content": '''
        Please, detect all entities from this list: {{entities}}.
        Court decision: 
        
        ---

        {{text}}

        ---
        
        Only output valid JSON with no additional commentary.

      '''
    }
  ]

In [299]:
oneshot_messages = [
    {
        "role": "system",
        "content": '''
        You are a professional linguist trained to determine the grammatical gender and grammatical case of entities in a Ukrainian text.

        You will receive:
        1. A list of entities and the number of their occurrences in the text.
        2. A text in Ukrainian containing an entity labeled as 'ОСОБА_'.

        Your task is to identify the grammatical gender and case of this entity, considering:
        1. The context of the text. E.g., a person can be a parent, verbs can have endings that indicate the gender.
        2. Interactions with other entities.
        3. Dependencies on other words and entities.
        4. Words surrounding the entity.

        Possible grammatical genders: 'masculine', 'feminine', 'any'. Any means that you can't determine but it can be either masculine or feminine.
        Possible cases:
        1. 'nominative' for the subject. Answers the question 'хто? що?'
        2. 'genitive' for possession. Answers the question 'кого? чого?'
        3. 'dative' for the indirect object. Answers the question 'кому? чому?'
        4. 'accusative' for the direct object. Answers the question 'кого? що?'
        5. 'instrumental' for the means. Answers the question 'ким? чим?'
        6. 'locative' for location. Answers the question 'на кому? на чому?'
        7. 'vocative' for addressing someone.

        Rules:
        1. Make sure to cover all occurrences of the entity in the text.
        2. Group occurrences by their mask.

        Here is an example response based on a similar case:
        For text:
        Справа № 303/12352/23
        2/303/1929/23

        УХВАЛА
        про повернення позовної заяви 

        Суддя Мукачівського міськрайонного суду Закарпатської області Заболотний А.М., розглянувши матеріали за позовом ОСОБА_1 до ОСОБА_2 про розірвання шлюбу,

        Response will be:
        {"cases":[{"mask":"ОСОБА_1","grammatical_cases":["nominative"]},{"mask":"ОСОБА_2","grammatical_cases":["genitive"]}],"genders":[{"grammatical_gender":"masculine","mask":"ОСОБА_1"},{"grammatical_gender":"feminine","mask":"ОСОБА_2"}]}

        Ensure your response follows this format and provides the gender and case for each occurrence of the entity.
        '''
    },
    {
        "role": "user",
        "content": '''
        Entities in this text: ОСОБА_1 occurres 5 time(s), ОСОБА_2 occurres 3 time(s).
        Court decision: 
        
        ---

        Справа № 303/12352/23
        2/303/1929/23

        УХВАЛА
        про повернення позовної заяви 

        02 січня 2024 року
        м. Мукачево

        Суддя Мукачівського міськрайонного суду Закарпатської області Заболотний А.М., розглянувши матеріали за позовом ОСОБА_1 до ОСОБА_2 про розірвання шлюбу,-

        в с т а н о в и в:

        20.12.2023 року ОСОБА_1 звернувся до суду з позовом до ОСОБА_2 про розірвання шлюбу.
        Ухвалою судді від 21.12.2023 року позовну заяву залишено без руху для усунення недоліків протягом п`яти днів з дня отримання цієї ухвали. 
        Копію ухвали від 21.12.2023 року представником позивача, адвокатом Баняс В.В., було отримано особисто 21.12.2023 року про, що свідчить наявна в матеріалах справи розписка.
        В подальшому, 26.12.2023 року від представника позивача  ОСОБА_1 , адвоката Баняс В.В. надійшла заява в якій він відкликає позовну заяву та просить повернути йому документи. 
        Враховуючи вищевикладене, а також те, що провадження у цій справі не відкрито, позовну заяву ОСОБА_1 до ОСОБА_2 про розірвання шлюбу слід повернути позивачу.
        
        Позовну заяву  ОСОБА_1 до  ОСОБА_2 про розірвання шлюбу повернути позивачу.

        ---

        Only output valid JSON with no additional commentary.
        '''
    },
    {
        "role": "system",
        "content": '{{"cases":[{"mask":"ОСОБА_1","grammatical_cases":["genitive","nominative","genitive","genitive","genitive"]},{"mask":"ОСОБА_2","grammatical_cases":["genitive","genitive","genitive"]}],"genders":[{"grammatical_gender":"masculine","mask":"ОСОБА_1"},{"grammatical_gender":"feminine","mask":"ОСОБА_2"}]}'
    },
    {
      "role": "user",
      "content": '''
        Please, detect all entities from this list: {{entities}}.
        Court decision: 
        
        ---

        {{text}}

        ---

        Only output valid JSON with no additional commentary.
      '''
    }
]

In [326]:
from typing import Dict, List
import math

def _lcs_length(seq1: List[str], seq2: List[str]) -> int:
    m, n = len(seq1), len(seq2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    for i in range(m):
        for j in range(n):
            if seq1[i] == seq2[j]:
                dp[i + 1][j + 1] = dp[i][j] + 1
            else:
                dp[i + 1][j + 1] = max(dp[i + 1][j], dp[i][j + 1])
    return dp[m][n]

def _evaluate_f1_ordered_metrics(true_map: Dict[str, List[str]], ai_map: Dict[str, List[str]]):
    # We gather all masks, ensuring we consider missing ones from either side
    all_masks = set(true_map.keys()) | set(ai_map.keys())
    precisions, recalls = [], []

    for mask in all_masks:
        tlist = true_map.get(mask, [])
        alist = ai_map.get(mask, [])

        # LCS length measures how many items match in correct order
        lcs = _lcs_length(tlist, alist)

        total_true = len(tlist)
        total_ai = len(alist)

        precision = lcs / total_ai if total_ai else 0.0
        recall = lcs / total_true if total_true else 0.0

        precisions.append(precision)
        recalls.append(recall)

    avg_precision = sum(precisions) / len(precisions) if precisions else 0.0
    avg_recall = sum(recalls) / len(recalls) if recalls else 0.0
    f1_score = (2 * avg_precision * avg_recall / (avg_precision + avg_recall)
                if (avg_precision + avg_recall) else 0.0)

    return {
        "precision": avg_precision,
        "recall": avg_recall,
        "f1_score": f1_score
    }

def evaluate_f1(true_data: GendersWithCasesSchema, ai_data: GendersWithCasesSchema):
    # Store the sequences for each mask so we keep duplicates AND order
    true_cases = {c.mask: list(c.grammatical_cases) for c in true_data.cases}
    ai_cases   = {c.mask: list(c.grammatical_cases) for c in ai_data.cases}

    # If you truly want to handle multiple occurrences of gender in order, store them as a list
    # Typically there's just one gender, but we'll do it the same way for consistency
    true_genders = {g.mask: [g.grammatical_gender] for g in true_data.genders}
    ai_genders   = {g.mask: [g.grammatical_gender] for g in ai_data.genders}

    cases_metrics   = _evaluate_f1_ordered_metrics(true_cases, ai_cases)
    genders_metrics = _evaluate_f1_ordered_metrics(true_genders, ai_genders)

    return {
        "cases": cases_metrics,
        "genders": genders_metrics
    }


In [301]:
# calculate avg f1 scores for cases and gender separately for each model
def calculate_f1_scores(models, test_data, true_labels, y_pred):
    """
    Returns a dictionary containing average F1 metrics for cases and genders, 
    computed across the provided set of models, test_data, and predictions.
    """
    f1_scores = {}
    for model in models:
        avg_f1_scores = []
        for idx in range(len(test_data)):
            true_data = true_labels[idx]
            ai_data = y_pred[model][idx]
            avg_f1_scores.append(evaluate_f1(true_data, ai_data))

        f1_scores[model] = {
            "cases": {
                "precision": sum(score["cases"]["precision"] for score in avg_f1_scores) / len(avg_f1_scores),
                "recall": sum(score["cases"]["recall"] for score in avg_f1_scores) / len(avg_f1_scores),
                "f1_score": sum(score["cases"]["f1_score"] for score in avg_f1_scores) / len(avg_f1_scores)
            },
            "genders": {
                "precision": sum(score["genders"]["precision"] for score in avg_f1_scores) / len(avg_f1_scores),
                "recall": sum(score["genders"]["recall"] for score in avg_f1_scores) / len(avg_f1_scores),
                "f1_score": sum(score["genders"]["f1_score"] for score in avg_f1_scores) / len(avg_f1_scores)
            },
        }

    return f1_scores


In [329]:
y_pred = run_prediction(zeroshot_messages, GendersWithCasesSchema, limit=10, verbose=False)
scores = calculate_f1_scores(models, test_data, true_labels, y_pred)
print(json.dumps(scores, indent=2))

{
  "llama3.1": {
    "cases": {
      "precision": 0.12111111111111113,
      "recall": 0.055862193362193356,
      "f1_score": 0.0674072339318241
    },
    "genders": {
      "precision": 0.12000000000000002,
      "recall": 0.12000000000000002,
      "f1_score": 0.12000000000000002
    }
  },
  "gpt-4o-mini": {
    "cases": {
      "precision": 0.16999999999999998,
      "recall": 0.1653354978354978,
      "f1_score": 0.15772123138351163
    },
    "genders": {
      "precision": 0.13999999999999999,
      "recall": 0.13999999999999999,
      "f1_score": 0.14
    }
  },
  "gpt-4o-2024-08-06": {
    "cases": {
      "precision": 0.22666666666666666,
      "recall": 0.16185064935064936,
      "f1_score": 0.18037923305108744
    },
    "genders": {
      "precision": 0.18,
      "recall": 0.18,
      "f1_score": 0.18
    }
  }
}


In [330]:
y_pred = run_prediction(oneshot_messages, GendersWithCasesSchema, limit=10, verbose=False)
scores = calculate_f1_scores(models, test_data, true_labels, y_pred)
print(json.dumps(scores, indent=2))

{
  "llama3.1": {
    "cases": {
      "precision": 0.195,
      "recall": 0.15816017316017317,
      "f1_score": 0.1714489882268318
    },
    "genders": {
      "precision": 0.22000000000000003,
      "recall": 0.22000000000000003,
      "f1_score": 0.22000000000000003
    }
  },
  "gpt-4o-mini": {
    "cases": {
      "precision": 0.27666666666666667,
      "recall": 0.23006493506493508,
      "f1_score": 0.24384878412080288
    },
    "genders": {
      "precision": 0.12,
      "recall": 0.12,
      "f1_score": 0.12000000000000002
    }
  },
  "gpt-4o-2024-08-06": {
    "cases": {
      "precision": 0.275,
      "recall": 0.27878787878787875,
      "f1_score": 0.2724835378921147
    },
    "genders": {
      "precision": 0.15999999999999998,
      "recall": 0.15999999999999998,
      "f1_score": 0.16
    }
  }
}
