In [46]:
from transformers import  AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, DataCollatorForLanguageModeling
from sklearn.model_selection import train_test_split
from datasets import DatasetDict, Dataset
import pandas as pd
import numpy as np
import warnings
import logging
import random
import torch
import json
import yaml
import os
import json
import torch
from transformers import pipeline
from sklearn.metrics import accuracy_score
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import ollama

warnings.filterwarnings("ignore")

with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)
def seed_everything(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)





data_file = config['actual_data']
data_list = []
with open(data_file, 'r', encoding='utf-8') as df:
    for line in df:
        data_list.append(json.loads(line))

judge_file = "judge1.jsonl"
judge_data_list = []
with open(judge_file, 'r', encoding='utf-8') as df:
    for line in df:
        judge_data_list.append(json.loads(line))



In [2]:
def get_dict_from_path(path ,key):
    contexts_file=path
    context_dict = {}
    with open(contexts_file, 'r', encoding='utf-8') as cf:
        for line in cf:
            entry = json.loads(line)
            question = entry.get('Question')
            retrieved_context = entry.get(key)
            if question and retrieved_context:
                context_dict[question] = retrieved_context
    return context_dict 



contexts_file = "rag.xlsx"
context_df = pd.read_excel(contexts_file)

if 'Question' not in context_df.columns or 'retrieved_context' not in context_df.columns:
    raise ValueError("Excel file must contain 'Question' and 'retrieved_context' columns.")

# Build the dictionary
rag_dict = dict(zip(context_df['Question'], context_df['retrieved_context']))


In [8]:
defense_dict=get_dict_from_path("Defense.jsonl","Defense")
critique_dict=get_dict_from_path("Critique.jsonl","Critique")
pretrained_model_reasoning=get_dict_from_path("result_qwen7b_main_architecture05032025.jsonl","main_model_reasoning")

In [9]:
len(judge_data_list)

937

In [47]:
question_to_data = {item["Question"]: item for item in data_list}

In [18]:
def get_data_entry(question, judge_data):
    # Start with a copy of the matching entry from question_to_data
    data = question_to_data[question].copy()
    
    # Remove unwanted keys if they exist
    data.pop("Correct Answer", None)
    data.pop("Reasoning", None)
    data.pop("Passage", None)
    
    # Add additional fields
    data['rag'] = rag_dict.get(question, None)
    data['main_model_reasoning'] = pretrained_model_reasoning.get(question, None)
    data['Critique'] = critique_dict.get(question, None)
    data['Defense'] = defense_dict.get(question, None)
    
    # Copy over any additional keys from judge_data that are not already in data
    for key, value in judge_data.items():
        if key not in data:
            data[key] = value

    return data


In [43]:
question=judge_data_list[0]["Question"]
temp=get_data_entry(question,judge_data_list[0])


In [44]:
temp

{'Question': "What legal actions should the authorities consider based on the specific allegations of assault highlighted in the passage, particularly concerning the victim's age and the nature of the crimes?",
 'A': 'Authorities should only provide counseling to the victim and not pursue legal actions.',
 'B': 'Authorities must take immediate legal action to investigate the allegations as the victim is a minor and the offenses fall under serious criminal categories.',
 'C': 'The authorities have a duty to dismiss the allegations immediately due to political involvement.',
 'D': 'Legal action can be postponed until the victim is older.',
 'rag': "when claim of\noffence; the court shall make an inquiry,take such evidence asmay be necessary (but\njuvenility is\nraised before not anaffidavit) so asto determine the age of such person, and shall record afinding\nany court. whether theperson isajuvenile orachild or not, stating his age asnearly as may be:\nProvided that aclaim ofjuvenility m

In [None]:
cov_system_prompt = """You are a legal verification expert tasked with validating and, if needed, correcting the answer provided by a legal QA model.

Your task involves:
1. Reviewing the question, model answer, reasoning, and retrieved legal context.
2. Judging the validity of the model's answer using the context.
3. Correcting the answer if it is wrong and explaining your correction.
4. Performing legal fallacy checks before verdict:
   - Statute applicability (age thresholds; POCSO consent irrelevance; Sec 19 mandatory reporting; Sec 33(7) privacy)
   - Element coverage (assault vs penetrative; aggravated conditions)
   - Temporal/forum errors (anticipatory bail post-arrest; wrong forum/procedure)
5. Following a strict verification format including a structured fallacy report.
"""


In [None]:
def generate_cov_content(data):
    question = data.get('Question', '')
    options = {
        'A': data.get('A', ''),
        'B': data.get('B', ''),
        'C': data.get('C', ''),
        'D': data.get('D', '')
    }
    chosen_option = data.get('Correct Answer', '')
    main_model_reasoning=data.get('main_model_reasoning','')

    rag_context = data.get('rag', '')
    winner = data.get('Winner', 'N/A')
    judgement = data.get('Judgement', '')
    final_reasoning = data.get('final_reasoning', '')
    critique = data.get('Critique', '')
    defense = data.get('Defense', '')
    critique_debate = f"{critique}\n\n{defense}\n\n{judgement}\n\n{final_reasoning}"

    content = f"""
1Ô∏è‚É£ Review the following:

- **Question:** {question}

- **Options:**
A: {options['A']}
B: {options['B']}
C: {options['C']}
D: {options['D']}

- **Chosen Option Content (from the model):**
{chosen_option}

-**Model Reasoning:**
{main_model_reasoning}

- **Retrieved Legal Context:**
{rag_context}

- **Winner of Debate:** {winner}

- **Model Reasoning (Judgement + Final Reasoning):**
{judgement}\n\n{final_reasoning}

- **Critique + Defense + Judgement + Reasoning:**
{critique_debate}

---

2Ô∏è‚É£ **Chain of Verification Steps:**

‚úÖ **Step 1:** Analyze the retrieved legal context and determine how each part relates to the question and each option.

‚úÖ **Step 2:** For **each option (A/B/C/D)**:
- Clearly state whether the option is **Supported** or **Not Supported** based on the retrieved context.
- Provide 1-2 sentences justifying your verdict for that option.

‚úÖ **Step 3:** Run legal fallacy checks and list:
- Misapplication of statutes / inapplicable provisions
- Missing required elements relative to the chosen statute
- Procedural/temporal/forum errors

‚úÖ **Step 4:** Verify the model‚Äôs chosen option:
- Does the retrieved context fully support it?
- Is the reasoning legally sound and complete?
- If **Defender won**, focus on validating and strengthening the Defender‚Äôs argument.
- If **Challenger won**, focus on exposing flaws and showing why the Challenger succeeded.

‚úÖ **Step 5:** **Final Correction:**
- If the model‚Äôs original answer is correct, reaffirm it confidently.
- If the model‚Äôs answer is incorrect, identify the **correct option** (from A/B/C/D), provide the full text of the correct option, and explain **why it is correct**.

---

**Your Output Format:**

- **Verification Verdict for Each Option:**
    - Option A: <Supported/Not Supported> - <short justification>
    - Option B: <Supported/Not Supported> - <short justification>
    - Option C: <Supported/Not Supported> - <short justification>
    - Option D: <Supported/Not Supported> - <short justification>

- **Fallacy Report:**
    - misapplication: [ ... ]
    - missing_elements: [ ... ]
    - procedural_errors: [ ... ]

- **Verification of Chosen Option:**
    - Verdict: <Fully Verified / Partially Verified / Not Verified>
    - Detailed Verification Reasoning: <step-by-step reasoning>

- **Final Correct Answer:**
    - Correct Option: <correct_option full text>
    - Final Reasoning: <detailed reasoning why this is the correct option>
"""
    return  content


In [None]:
json_system_prompt = '''
You are a strict JSON datapoint creator.
Your response must be in strict JSON format as shown below:
```json
{
  "Verification": {
    "option_verdicts": {
      "A": "Supported | Not Supported",
      "B": "Supported | Not Supported",
      "C": "Supported | Not Supported",
      "D": "Supported | Not Supported"
    },
    "fallacy_report": {
      "misapplication": ["..."],
      "missing_elements": ["..."],
      "procedural_errors": ["..."]
    },
    "chosen_option_verdict": "Fully Verified | Partially Verified | Not Verified",
    "verification_reasoning": "..."
  },
  "Correct Answer": "<Write the full content of the correct option>",
  "final_reasoning": "<Detailed reasoning for the correct option>"
}
```
Output must be strictly valid JSON.
Note: Do not mention the option letter; include full text for Correct Answer only.
'''


In [56]:
def extract_nested_braces(s):
    # s = s.replace('"','')
    s += "}"
    stack = []
    start = -1
    result = []
    # print(s)
    for i, char in enumerate(s):
        if char == '{':
            if not stack:
                start = i
            stack.append(char)
        elif char == '}':
            if stack:
                stack.pop()
                if not stack:
                    result.append(s[start:i+1])
    return result[0] if result else None

def string_to_dict(json_string):
    try:
        # Parse the JSON string into a dictionary
        return json.loads(json_string)
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON string: {e}")




def model_output( data_entry):
    content = generate_cov_content(data_entry)
    response = ollama.chat(
        model='llama3.1:8b',  # or any model you've pulled, e.g., 'mistral', 'llama2', etc.
        messages=[
            {'role': 'system', 'content': cov_system_prompt},
            {'role': 'user', 'content': content},
        ]
    )
    json_output=response['message']['content']
    response = ollama.chat(
        model='llama3.1:8b',  # or any model you've pulled, e.g., 'mistral', 'llama2', etc.
        messages=[
            {'role': 'system', 'content': json_system_prompt},
            {'role': 'user', 'content': json_output},
        ]
    )
    judge_output=response['message']['content']
    result = string_to_dict(extract_nested_braces(judge_output))
    # print(result)
    return result

In [59]:
length=len(judge_data_list)
while(length<1950):
    result_log_path = 'cov.jsonl'  # ‚úÖ Path to save the results
    print("HII")

    # üîÑ Load existing results if available
    if os.path.isfile(result_log_path):
        with open(result_log_path, 'r', encoding='utf-8') as file:
            output_log = [json.loads(line) for line in file]
    else:
        output_log = []

    # üõë Create a set of already processed questions to avoid duplicates
    processed_questions = set(entry["Question"] for entry in output_log)
    i=len(processed_questions)
    for judge_item in judge_data_list:
        question = judge_item["Question"]
        if question in processed_questions:
            i+=1
            continue
        try:
            compact_data=get_data_entry(question,judge_item)
            result=model_output(compact_data)
            result['Question']=question
            output_log.append(result)
            with open(result_log_path, 'a', encoding='utf-8') as file:
                file.write(json.dumps(result, ensure_ascii=False) + '\n')

            print(f"‚úÖ Data point {i} processed and saved.")
            i += 1
        except Exception as e:
            print(f"‚ö†Ô∏è Error processing question: {question}")
            print(f"Exception: {e}")   


HII
‚úÖ Data point 0 processed and saved.
‚úÖ Data point 1 processed and saved.


KeyboardInterrupt: 