In [3]:
from openai import OpenAI
from typing import List, Dict
import itertools
import time
from tqdm import tqdm
import evaluate
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Initialize BLEU and BERTScore metrics
bleu_metric = evaluate.load("bleu")
bertscore_metric = evaluate.load("bertscore")

class LegalLLaMA3_70B_Answerer:
    def __init__(self, api_key, model="meta/llama3-70b-instruct"):
        self.client = OpenAI(
            api_key=api_key,
            base_url="https://integrate.api.nvidia.com/v1"
        )
        self.model = model

    def build_prompt(self, query, context_docs):
        context = "\n\n".join([f"Context {i+1}:\n{doc.strip()}" for i, doc in enumerate(context_docs)])
        prompt = (
            f"{context}\n\n"
            f"Question: {query.strip()}\n\n"
            f"Answer with a complete response followed by a short reasoning.\n"
            f"Format:\nAnswer: <your answer>\nReasoning: <your reasoning>"
        )
        return prompt

    def generate(self, query, context_docs, temperature=0.3, top_p=1.0, best_of_n=1, debug=False, retries=3, wait_time=10):
        prompt = self.build_prompt(query, context_docs)
        messages = [{"role": "user", "content": prompt}]
        candidates = []

        for _ in range(best_of_n):
            for attempt in range(retries):
                try:
                    completion = self.client.chat.completions.create(
                        model=self.model,
                        messages=messages,
                        temperature=temperature,
                        top_p=top_p,
                        max_tokens=1024,
                        stream=False
                    )
                    response_text = completion.choices[0].message.content.strip()

                    if debug:
                        print("🧠 Raw Response:\n", response_text)

                    candidates.append(response_text)
                    break  # Success
                except Exception as e:
                    print(f"⚠️ Attempt {attempt+1} failed: {e}")
                    if attempt < retries - 1:
                        print(f"⏳ Retrying in {wait_time}s...")
                        time.sleep(wait_time)
                    else:
                        raise RuntimeError("❌ All retries failed for LLaMA3-70B")

        return candidates

def simple_reranker(candidates: List[str]) -> str:
    return max(candidates, key=lambda x: len(x))

def evaluate_outputs(predictions: List[str], references: List[str]) -> Dict:
    bleu = bleu_metric.compute(predictions=predictions,
                               references=[[ref] for ref in references])['bleu']
    bert = bertscore_metric.compute(predictions=predictions, references=references, lang='en')['f1']
    bert_avg = sum(bert) / len(bert)
    return {"BLEU": bleu, "BERTScore": bert_avg}

def hyperparameter_grid_search(answerer: LegalLLaMA3_70B_Answerer,
                                query: str,
                                context_docs: List[str],
                                reference: str,
                                temperatures: List[float],
                                top_ps: List[float],
                                best_of_n: int = 3):
    all_results = []
    grid = list(itertools.product(temperatures, top_ps))

    for temp, top_p in tqdm(grid, desc="Grid Search"):
        candidates = answerer.generate(
            query, context_docs,
            temperature=temp,
            top_p=top_p,
            best_of_n=best_of_n
        )
        best_answer = simple_reranker(candidates)
        predictions = [best_answer]
        references = [reference]
        scores = evaluate_outputs(predictions, references)
        result = {
            "temperature": temp,
            "top_p": top_p,
            "BLEU": scores["BLEU"],
            "BERTScore": scores["BERTScore"],
            "Best Answer": best_answer
        }
        print(f"Config Tested: {result}")
        all_results.append(result)

    return all_results

def log_results_and_plot(results: List[Dict], csv_path: str, heatmap_path: str):
    df = pd.DataFrame(results)
    df.to_csv(csv_path, index=False)
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    pivot_bleu = df.pivot(index="temperature", columns="top_p", values="BLEU")
    sns.heatmap(pivot_bleu, annot=True, fmt=".5f", cmap="YlGnBu")
    plt.title("BLEU Score Heatmap")

    plt.subplot(1, 2, 2)
    pivot_bert = df.pivot(index="temperature", columns="top_p", values="BERTScore")
    sns.heatmap(pivot_bert, annot=True, fmt=".5f", cmap="YlOrRd")
    plt.title("BERTScore Heatmap")

    plt.tight_layout()
    plt.savefig(heatmap_path, dpi=300)
    plt.close()
    print(f"\nResults saved to {csv_path} and heatmap to {heatmap_path}")

# =========================
# Example Usage
# =========================
if __name__ == "__main__":
    API_KEY = "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"

    answerer = LegalLLaMA3_70B_Answerer(api_key=API_KEY)

    query = "Can police search a vehicle without a warrant under the Fourth Amendment?"
    context_docs = [
        "In Carroll v. United States, the Court ruled that warrantless vehicle searches are permitted with probable cause.",
        "The 'automobile exception' is justified by the inherent mobility of vehicles and lower expectation of privacy.",
        "The scope of the search must be limited to areas where the evidence might reasonably be found.",
        "Probable cause remains a prerequisite — mere suspicion is not sufficient."
    ]
    reference = "Answer: Yes, under the automobile exception, police can search a vehicle without a warrant if they have probable cause. Reasoning: This is justified by the vehicle's mobility and reduced expectation of privacy as established in Carroll v. United States."

    temperatures = [0.1, 0.3, 0.5, 0.7]
    top_ps = [0.5, 0.7, 0.9]

    results = hyperparameter_grid_search(
        answerer=answerer,
        query=query,
        context_docs=context_docs,
        reference=reference,
        temperatures=temperatures,
        top_ps=top_ps,
        best_of_n=3
    )

    log_results_and_plot(
        results,
        csv_path="llama3_hyperparameter_tuning.csv",
        heatmap_path="llama3_hyperparameter_heatmap.png"
    )


Grid Search:   0%|                                       | 0/12 [00:00<?, ?it/s]Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Grid Search:   8%|██▌                            | 1/12 [00:05<00:56,  5.12s/it]

Config Tested: {'temperature': 0.1, 'top_p': 0.5, 'BLEU': 0.3251067144932862, 'BERTScore': 0.9371240139007568, 'Best Answer': 'Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, which is justified by the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found.'}


Grid Search:  17%|█████▏                         | 2/12 [00:09<00:48,  4.81s/it]

Config Tested: {'temperature': 0.1, 'top_p': 0.7, 'BLEU': 0.3755310872178264, 'BERTScore': 0.950772762298584, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' justifies this due to the inherent mobility of vehicles and lower expectation of privacy."}


Grid Search:  25%|███████▊                       | 3/12 [00:14<00:42,  4.70s/it]

Config Tested: {'temperature': 0.1, 'top_p': 0.9, 'BLEU': 0.3251067144932862, 'BERTScore': 0.9371240139007568, 'Best Answer': 'Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, which is justified by the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found.'}


Grid Search:  33%|██████████▎                    | 4/12 [00:18<00:36,  4.55s/it]

Config Tested: {'temperature': 0.3, 'top_p': 0.5, 'BLEU': 0.3755310872178264, 'BERTScore': 0.950772762298584, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' justifies this due to the inherent mobility of vehicles and lower expectation of privacy."}


Grid Search:  42%|████████████▉                  | 5/12 [00:22<00:31,  4.46s/it]

Config Tested: {'temperature': 0.3, 'top_p': 0.7, 'BLEU': 0.3755310872178264, 'BERTScore': 0.950772762298584, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' justifies this due to the inherent mobility of vehicles and lower expectation of privacy."}


Grid Search:  50%|███████████████▌               | 6/12 [00:27<00:26,  4.48s/it]

Config Tested: {'temperature': 0.3, 'top_p': 0.9, 'BLEU': 0.28374632201065675, 'BERTScore': 0.9379559755325317, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' justifies this due to the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found."}


Grid Search:  58%|██████████████████             | 7/12 [00:32<00:22,  4.59s/it]

Config Tested: {'temperature': 0.5, 'top_p': 0.5, 'BLEU': 0.28374632201065675, 'BERTScore': 0.9379559755325317, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' justifies this due to the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found."}


Grid Search:  67%|████████████████████▋          | 8/12 [00:37<00:18,  4.70s/it]

Config Tested: {'temperature': 0.5, 'top_p': 0.7, 'BLEU': 0.3251067144932862, 'BERTScore': 0.9371240139007568, 'Best Answer': 'Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, which is justified by the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found.'}


Grid Search:  75%|███████████████████████▎       | 9/12 [00:42<00:14,  4.89s/it]

Config Tested: {'temperature': 0.5, 'top_p': 0.9, 'BLEU': 0.22162393278937678, 'BERTScore': 0.9181151986122131, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\n\nReasoning: According to Carroll v. United States (Context 1), warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' (Context 2) justifies this due to the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found (Context 3) and probable cause remains a prerequisite (Context 4)."}


Grid Search:  83%|█████████████████████████     | 10/12 [00:47<00:09,  4.88s/it]

Config Tested: {'temperature': 0.7, 'top_p': 0.5, 'BLEU': 0.28374632201065675, 'BERTScore': 0.9379559755325317, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' justifies this due to the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found."}


Grid Search:  92%|███████████████████████████▌  | 11/12 [00:52<00:04,  4.91s/it]

Config Tested: {'temperature': 0.7, 'top_p': 0.7, 'BLEU': 0.28374632201065675, 'BERTScore': 0.9379559755325317, 'Best Answer': "Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, and the 'automobile exception' justifies this due to the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found."}


Grid Search: 100%|██████████████████████████████| 12/12 [00:57<00:00,  4.78s/it]

Config Tested: {'temperature': 0.7, 'top_p': 0.9, 'BLEU': 0.3251067144932862, 'BERTScore': 0.9371240139007568, 'Best Answer': 'Answer: Yes, police can search a vehicle without a warrant under the Fourth Amendment if they have probable cause.\nReasoning: According to Carroll v. United States, warrantless vehicle searches are permitted with probable cause, which is justified by the inherent mobility of vehicles and lower expectation of privacy, as long as the scope of the search is limited to areas where the evidence might reasonably be found.'}






Results saved to llama3_hyperparameter_tuning.csv and heatmap to llama3_hyperparameter_heatmap.png
