In [5]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
Installing collected packages: evaluate
Successfully installed evaluate-0.4.3


In [9]:
!pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
Installing collected packages: bert_score
Successfully installed bert_score-0.3.13


In [16]:
!pip install hf_xet

Collecting hf_xet
  Downloading hf_xet-1.1.0-cp37-abi3-macosx_11_0_arm64.whl.metadata (494 bytes)
Downloading hf_xet-1.1.0-cp37-abi3-macosx_11_0_arm64.whl (4.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
[?25hInstalling collected packages: hf_xet
Successfully installed hf_xet-1.1.0


In [25]:
from openai import OpenAI
from typing import List, Dict
import itertools
import time
from tqdm import tqdm
import evaluate
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Initialize BLEU and BERTScore metrics
bleu_metric = evaluate.load("bleu")
bertscore_metric = evaluate.load("bertscore")

class LegalNVIDIAAnswerer:
    def __init__(self, api_key, model="nvidia/llama-3.1-nemotron-70b-instruct"):
        self.client = OpenAI(
            api_key=api_key,
            base_url="https://integrate.api.nvidia.com/v1"
        )
        self.model = model

    def build_prompt(self, query, context_docs):
        context = "\n\n".join([f"Context {i+1}:\n{doc.strip()}" for i, doc in enumerate(context_docs)])
        prompt = (
            f"{context}\n\n"
            f"Question: {query.strip()}\n\n"
            f"Answer with a complete response followed by a short reasoning.\n"
            f"Format:\nAnswer: <your answer>\nReasoning: <your reasoning>"
        )
        return prompt

    def generate(self, query, context_docs, temperature=0.3, top_p=1.0, best_of_n=1, debug=False, retries=3, wait_time=10):
        prompt = self.build_prompt(query, context_docs)
        messages = [{"role": "user", "content": prompt}]
        candidates = []

        for _ in range(best_of_n):
            for attempt in range(retries):
                try:
                    completion = self.client.chat.completions.create(
                        model=self.model,
                        messages=messages,
                        temperature=temperature,
                        top_p=top_p,
                        max_tokens=1024,
                        stream=False
                    )
                    response_text = completion.choices[0].message.content.strip()

                    if debug:
                        print("🧠 Raw Response:\n", response_text)

                    candidates.append(response_text)
                    break  # Success
                except Exception as e:
                    print(f"⚠️ Attempt {attempt+1} failed: {e}")
                    if attempt < retries - 1:
                        print(f"⏳ Retrying in {wait_time}s...")
                        time.sleep(wait_time)
                    else:
                        raise RuntimeError("❌ All retries failed for NVIDIA LLM")

        return candidates

def simple_reranker(candidates: List[str]) -> str:
    return max(candidates, key=lambda x: len(x))

def evaluate_outputs(predictions: List[str], references: List[str]) -> Dict:
    bleu = bleu_metric.compute(predictions=predictions,
                               references=[[ref] for ref in references])['bleu']
    bert = bertscore_metric.compute(predictions=predictions, references=references, lang='en')['f1']
    bert_avg = sum(bert) / len(bert)
    return {"BLEU": bleu, "BERTScore": bert_avg}

def hyperparameter_grid_search(answerer: LegalNVIDIAAnswerer,
                                query: str,
                                context_docs: List[str],
                                reference: str,
                                temperatures: List[float],
                                top_ps: List[float],
                                best_of_n: int = 3):
    all_results = []
    grid = list(itertools.product(temperatures, top_ps))

    for temp, top_p in tqdm(grid, desc="Grid Search"):
        candidates = answerer.generate(
            query, context_docs,
            temperature=temp,
            top_p=top_p,
            best_of_n=best_of_n
        )
        best_answer = simple_reranker(candidates)
        predictions = [best_answer]
        references = [reference]
        scores = evaluate_outputs(predictions, references)
        result = {
            "temperature": temp,
            "top_p": top_p,
            "BLEU": scores["BLEU"],
            "BERTScore": scores["BERTScore"],
            "Best Answer": best_answer
        }
        print(f"Config Tested: {result}")
        all_results.append(result)

    return all_results

def log_results_and_plot(results: List[Dict], csv_path: str, heatmap_path: str):
    df = pd.DataFrame(results)
    df.to_csv(csv_path, index=False)
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    pivot_bleu = df.pivot(index="temperature", columns="top_p", values="BLEU")
    sns.heatmap(pivot_bleu, annot=True, fmt=".5f", cmap="YlGnBu")
    plt.title("BLEU Score Heatmap")

    plt.subplot(1, 2, 2)
    pivot_bert = df.pivot(index="temperature", columns="top_p", values="BERTScore")
    sns.heatmap(pivot_bert, annot=True, fmt=".5f", cmap="YlOrRd")
    plt.title("BERTScore Heatmap")

    plt.tight_layout()
    plt.savefig(heatmap_path, dpi=300)
    plt.close()
    print(f"\nResults saved to {csv_path} and heatmap to {heatmap_path}")

# =========================
# Example Usage
# =========================
if __name__ == "__main__":
    API_KEY = "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"

    answerer = LegalNVIDIAAnswerer(api_key=API_KEY)

    query = "Can police search a vehicle without a warrant under the Fourth Amendment?"
    context_docs = [
        "In Carroll v. United States, the Court ruled that warrantless vehicle searches are permitted with probable cause.",
        "The 'automobile exception' is justified by the inherent mobility of vehicles and lower expectation of privacy.",
        "The scope of the search must be limited to areas where the evidence might reasonably be found.",
        "Probable cause remains a prerequisite — mere suspicion is not sufficient."
    ]
    reference = "Answer: Yes, under the automobile exception, police can search a vehicle without a warrant if they have probable cause. Reasoning: This is justified by the vehicle's mobility and reduced expectation of privacy as established in Carroll v. United States."

    temperatures = [0.1, 0.3, 0.5, 0.7]
    top_ps = [0.5, 0.7, 0.9]

    results = hyperparameter_grid_search(
        answerer=answerer,
        query=query,
        context_docs=context_docs,
        reference=reference,
        temperatures=temperatures,
        top_ps=top_ps,
        best_of_n=3
    )

    log_results_and_plot(
        results,
        csv_path="nemotron_hyperparameter.csv",
        heatmap_path="nemotron_hyperparameter_heatmap.png"
    )

Grid Search:   0%|                                       | 0/12 [00:00<?, ?it/s]Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Grid Search:   8%|██▌                            | 1/12 [00:06<01:09,  6.32s/it]

Config Tested: {'temperature': 0.1, 'top_p': 0.5, 'BLEU': 0.08405462664062499, 'BERTScore': 0.8871520757675171, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search:  17%|█████▏                         | 2/12 [00:12<01:01,  6.11s/it]

Config Tested: {'temperature': 0.1, 'top_p': 0.7, 'BLEU': 0.08405462664062499, 'BERTScore': 0.8871520757675171, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search:  25%|███████▊                       | 3/12 [00:18<00:57,  6.34s/it]

Config Tested: {'temperature': 0.1, 'top_p': 0.9, 'BLEU': 0.0976659047691377, 'BERTScore': 0.8943610787391663, 'Best Answer': '**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of information from Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed with probable cause, not mere suspicion (Context 4). The "automobile exception" (Context 2) provides the legal justification for this, highlighting the vehicle\'s mobility and the lower privacy expectation as key factors.'}


Grid Search:  33%|██████████▎                    | 4/12 [00:24<00:49,  6.14s/it]

Config Tested: {'temperature': 0.3, 'top_p': 0.5, 'BLEU': 0.08405462664062499, 'BERTScore': 0.8871520757675171, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search:  42%|████████████▉                  | 5/12 [00:30<00:42,  6.07s/it]

Config Tested: {'temperature': 0.3, 'top_p': 0.7, 'BLEU': 0.08193962108645049, 'BERTScore': 0.8865646123886108, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of information from Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search:  50%|███████████████▌               | 6/12 [00:37<00:37,  6.30s/it]

Config Tested: {'temperature': 0.3, 'top_p': 0.9, 'BLEU': 0.0976659047691377, 'BERTScore': 0.8943610787391663, 'Best Answer': '**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of information from Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed with probable cause, not mere suspicion (Context 4). The "automobile exception" (Context 2) provides the legal justification for this, highlighting the vehicle\'s mobility and the lower privacy expectation as key factors.'}


Grid Search:  58%|██████████████████             | 7/12 [00:43<00:30,  6.12s/it]

Config Tested: {'temperature': 0.5, 'top_p': 0.5, 'BLEU': 0.08405462664062499, 'BERTScore': 0.8871520757675171, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search:  67%|████████████████████▋          | 8/12 [00:49<00:24,  6.08s/it]

Config Tested: {'temperature': 0.5, 'top_p': 0.7, 'BLEU': 0.08193962108645049, 'BERTScore': 0.8865646123886108, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of information from Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search:  75%|███████████████████████▎       | 9/12 [00:55<00:18,  6.21s/it]

Config Tested: {'temperature': 0.5, 'top_p': 0.9, 'BLEU': 0.08106771392535507, 'BERTScore': 0.8929747343063354, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the combination of information from Contexts 1, 2, and 4. According to Carroll v. United States (Context 1), warrantless vehicle searches are allowed with probable cause, not mere suspicion (Context 4). The justification for this exception to the Fourth Amendment's warrant requirement is the inherent mobility of vehicles and the lower expectation of privacy in them (Context 2)."}


Grid Search:  83%|█████████████████████████     | 10/12 [01:01<00:12,  6.06s/it]

Config Tested: {'temperature': 0.7, 'top_p': 0.5, 'BLEU': 0.08405462664062499, 'BERTScore': 0.8871520757675171, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search:  92%|███████████████████████████▌  | 11/12 [01:07<00:06,  6.04s/it]

Config Tested: {'temperature': 0.7, 'top_p': 0.7, 'BLEU': 0.08193962108645049, 'BERTScore': 0.8865646123886108, 'Best Answer': "**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the integration of information from Contexts 1, 2, and 4. Specifically, according to Carroll v. United States (Context 1), warrantless vehicle searches are allowed if there is probable cause (Context 4), leveraging the 'automobile exception' (Context 2) which accommodates the unique aspects of vehicles."}


Grid Search: 100%|██████████████████████████████| 12/12 [01:15<00:00,  6.29s/it]

Config Tested: {'temperature': 0.7, 'top_p': 0.9, 'BLEU': 0.06868504084022532, 'BERTScore': 0.8927922248840332, 'Best Answer': '**Answer:** Yes, under certain conditions.\n**Reasoning:** This answer is based on the convergence of information from Contexts 1, 2, and 4. Specifically, Carroll v. United States (Context 1) established the legality of warrantless vehicle searches with probable cause. The "automobile exception" (Context 2) further justifies this practice due to the mobility of vehicles and the reduced privacy expectation. However, as emphasized in Context 4, probable cause is a necessity, indicating that not all situations permit a warrantless search, only those where there is more than mere suspicion.'}






Results saved to nemotron_hyperparameter.csv and heatmap to nemotron_hyperparameter_heatmap.png
