In [10]:
# %pip install google-generativeai

In [19]:
import os
import json
import shutil
import google.generativeai as genai

### Set up the mini-eval directory with the 'answers' (LLM-based ground truth) and 'documents' (perturbed documents without tags).


In [20]:
base_dir = 'perturbed_legal_documents'
PERTURBATION_TYPES = ['ambiguity', 'inconsistencies', 'misaligned_terminalogy', 'omission', 'structural_flaws']
CATEGORIES = ['inText', 'legal']

In [None]:
for pt in PERTURBATION_TYPES:
    for ct in CATEGORIES:
        print(f"\nProcessing: {pt}_{ct}_contradiction")

        input_dir = f'{base_dir}/{pt}_{ct}_contradiction/'
        doc_dir = os.path.join(input_dir, 'modified_files_no_tags')

        if not os.path.exists(input_dir):
            print(f"❌ Input dir not found: {input_dir}")
            continue
        if not os.path.exists(doc_dir):
            print(f"❌ Document dir not found: {doc_dir}")
            continue

        output_answers = f'mini-eval/answers/{pt}_{ct}_contradiction/'
        os.makedirs(output_answers, exist_ok=True)

        output_documents = f'mini-eval/documents/{pt}_{ct}_contradiction/'
        os.makedirs(output_documents, exist_ok=True)

        # Collect all valid json->txt pairs
        json_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.json')])
        print(f"🔎 Found {len(json_files)} JSON files")

        valid_pairs = []

        for json_file in json_files:
            if not json_file.startswith("perturbed_") or not json_file.endswith(".pdf.json"):
                print(f"  ⚠️ Skipping incorrectly named file: {json_file}")
                continue

            base_name = json_file[len("perturbed_"):-len(".pdf.json")]
            txt_file = f"modified_{base_name}.pdf.txt"
            txt_path = os.path.join(doc_dir, txt_file)

            if os.path.exists(txt_path):
                valid_pairs.append((json_file, txt_file))
                print(f"  ✅ Matched: {json_file} <-> {txt_file}")
            else:
                print(f"  ❌ Missing TXT: {txt_file}")

            if len(valid_pairs) == 5:
                break

        if not valid_pairs:
            print("Can't find corresponding files????")
            continue

        # Copy matched pairs
        for json_file, txt_file in valid_pairs:
            src_json = os.path.join(input_dir, json_file)
            dst_json = os.path.join(output_answers, json_file)

            src_txt = os.path.join(doc_dir, txt_file)
            dst_txt = os.path.join(output_documents, txt_file)

            shutil.copy(src_json, dst_json)
            shutil.copy(src_txt, dst_txt)
            print(f"  📁 Copied: {json_file} and {txt_file}")

In [12]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyDgafwAgDi2Zjvu6jdt_SIZ60VgK1Na32E"
API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-2.0-flash")

**Just a small dataset which makes it easier to retrieve answers alongside documents**

In [13]:
class MiniEvalDataset:
    def __init__(self):
        self.mini_eval_dir = "mini-eval"
        self.mini_eval_answers_dir = os.path.join(self.mini_eval_dir, "answers")
        self.mini_eval_documents_dir = os.path.join(self.mini_eval_dir, "documents")
        self.files = [os.path.splitext(f)[0] for f in os.listdir(self.mini_eval_answers_dir)]
        self.files.sort()

    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        with open(os.path.join(self.mini_eval_answers_dir, self.files[idx] + ".json"), "r", encoding="utf-8") as f:
            answers = json.load(f)

        with open(os.path.join(self.mini_eval_documents_dir, self.files[idx] + ".txt"), "r", encoding="utf-8") as f:
            documents = "\n".join(f.readlines())

        return {
            "answers": answers,
            "documents": documents,
        }

You retrieve elements in each dataset like this:

In [14]:
dataset = MiniEvalDataset()
dataset[0]["answers"]

[{'file_name': 'AlliedEsportsEntertainmentInc_20190815_8-K_EX-10.34_11788308_EX-10.34_Sponsorship Agreement.pdf',
  'perturbation': [{'type': 'Inconsistencies - In Text Contradiction',
    'original_text': '4.2 Newegg shall provide to Allied the Sponsorship Benefits set out in Schedule 3, including paying the Sponsorship Fee as provided in that Schedule. Any value-added, goods and services, or similar tax or duty imposed by any government or tax authority on any Sponsorship Benefit shall be borne solely by Allied.',
    'changed_text': '4.2 Newegg shall provide to Allied the Sponsorship Benefits set out in Schedule 3, including paying the Sponsorship Fee as provided in that Schedule. Any value-added, goods and services, or similar tax or duty imposed by any government or tax authority on any Sponsorship Benefit shall be borne solely by Allied. Newegg has 60 days from the invoice date to remit payment.',
    'explanation': 'This change adds a specific payment deadline of 60 days in Sect

**You check the length like this:**

In [15]:
len(dataset)

5

#### WIP: This needs to be adjusted to be few-shot.

In [16]:
for sample in dataset:
    response = model.generate_content("""You are a legal contract expert and know how to check legal documents properly and find any discrepancies or contradictions within a file. You are also aware of all state and national laws when it comes to legal docuements.
The file is a legal document and you are to check for any discrepancies or contradictions within the file.
There are 10 categories when it comes to discrepancies or contradictions:
1. Ambiguity in text - this means that the text is not clear and can be interpreted in multiple ways.
2. Ambiguity in legal terms - this means that the legal terms used in the text are not clear and can be interpreted in multiple ways.
3. Inconsistencies in text - this means that the text is not consistent and contradicts itself.
4. Inconsistencies in legal terms - this means that the legal terms used in the text are not consistent and contradict some law.
5. Misaligned in text - this means that the text is not aligned with the rest of the document and does not make sense.
6. Misaligned in legal terms - this means that the legal terms used in the text are not aligned with the law and do not make sense.
7. Omission in text - this means that there is something missing in the text that should be there.
8. Omission in legal terms - this means that there is something missing in the legal terms that should be there.
9. Structural Flaws in text - this means that the text is not structured properly and does not make sense.
10. Structural Flaws in legal terms - this means that the legal terms used in the text are not structured properly and do not make sense.

Instructions:
1. Read the file and check for any discrepancies or contradictions within the file.
2. Provide a detailed explanation of why this is a discrepancy or contradiction.
3. Provide the section where the discrepancy or contradiction exists.
4. Provide the section location. Like Section 5.4.                                    
4. Categorize the discrepancy or contradiction into one of the 10 categories above (return the number of the category).
    
Return the results in json format. Do not add any text besides the json format. The json format should be like this:
{
    "section": "Sponsor shall pay Club the Annual Fee for each Contract Year of this Agreement in six (6) equal installments, each\ndue on or prior to the 1st of each month between June and November of the applicable Contract Year."
    "explanation": "This change introduces a contradiction regarding the payment deadline. Section 3(a) states that all installments are due by November 1st, but the added sentence allows the final payment to be made as late as December 15th without penalty. This creates ambiguity as to the actual deadline for the final installment and whether late fees would apply between November 2nd and December 15th."
    "location": "Section 5.2"
    "category": 3
}

This is the document:
""" + sample["documents"])
    model_response = response.to_dict()["candidates"][0]["content"]["parts"][0]["text"]
    print(model_response)
    break  

```json
[
  {
    "section": "Allied shall comply with: (a) all applicable laws, rules, regulations, regulatory policies, guidelines or codes applicable to the Arena and Allied’s activities to be carried out in performing its obligations in accordance with this Agreement, including all such guidelines and codes issued by statutory, regulatory and industry bodies, and further, will not pay, deliver, or offer or promise to pay or deliver, any funds or other item of value excluding the Products, either directly or through any third party, to any state or federal governmental official for any reason whatsoever other than the payment of statutory and administrative fees, charges and taxes that are due from Allied as a result of its performance under this Agreement",
    "explanation": "This section, regarding compliance with laws and prohibition of improper payments, contains an ambiguity. While it generally prohibits payments to government officials, it creates an exception for 'statutory 

In [17]:
def clean_and_parse_model_response(raw_response):
    raw_response = raw_response.strip().strip("`")
    if raw_response.startswith("json"):
        raw_response = raw_response[4:].strip()

    try:
        parsed = json.loads(raw_response)
    except json.JSONDecodeError as e:
        print("Failed to parse JSON:", e)
        return None

    return parsed


def add_section_identified_flag(predictions, ground_truth_perturbations):
    gt_locations = {p["location"].strip() for p in ground_truth_perturbations}
    gt_changed_texts = [p["changed_text"] for p in ground_truth_perturbations]

    for pred in predictions:
        # LOCATION MATCH
        pred_loc = pred.get("location", "").strip()
        pred["location_match"] = pred_loc in gt_locations

        # TEXT MATCH (check if model's reponse for 'section' matches what was perturbed)
        pred_section = pred.get("section", "").strip()
        pred["text_match"] = any(pred_section in gt_text or gt_text in pred_section for gt_text in gt_changed_texts)

    return predictions

This part needs to be part of the og loop

- This part shows if the "section" from the model's response matches the "section location" that was modified in the perturbed document.
- Also shows if the "text" that was caught by the model's response matches what was modified in the perturbed document.

In [18]:
parsed_response = clean_and_parse_model_response(model_response)

ground_truth = sample["answers"][0]["perturbation"]

if parsed_response:
    updated_predictions = add_section_identified_flag(parsed_response, ground_truth)

    for item in updated_predictions:
        print(json.dumps(item, indent=2))   # set up a directory to store the .json
else:
    print("Could not parse model response.")

{
  "section": "Allied shall comply with: (a) all applicable laws, rules, regulations, regulatory policies, guidelines or codes applicable to the Arena and Allied\u2019s activities to be carried out in performing its obligations in accordance with this Agreement, including all such guidelines and codes issued by statutory, regulatory and industry bodies, and further, will not pay, deliver, or offer or promise to pay or deliver, any funds or other item of value excluding the Products, either directly or through any third party, to any state or federal governmental official for any reason whatsoever other than the payment of statutory and administrative fees, charges and taxes that are due from Allied as a result of its performance under this Agreement",
  "explanation": "This section, regarding compliance with laws and prohibition of improper payments, contains an ambiguity. While it generally prohibits payments to government officials, it creates an exception for 'statutory and adminis