# Ablation Analysis

In [None]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

RESULTS_DIR = Path("../experiments/results")
LOGS_DIR = Path("../experiments/logs")

1. System-level comparison (baselines vs RAG + LoRA)

In [None]:
metrics_path = RESULTS_DIR / "metrics.json"
with metrics_path.open() as f:
    all_results = json.load(f)

all_results

df_systems = pd.DataFrame.from_dict(all_results, orient="index")
df_systems

ax = df_systems[["rougeL", "job_relevance_mean", "resume_alignment_mean"]].plot(
    kind="bar", rot=45
)
plt.title("System Comparison: Baselines vs RAG+LoRA")
plt.ylabel("Score")
plt.tight_layout()
plt.show()

2. Hyperparameter Tuning (LoRA rank, learning rate, etc.)

In [None]:
hparam_path = RESULTS_DIR / "hparam_results.json"
with hparam_path.open() as f:
    hparam_results = json.load(f)

df_hparams = pd.DataFrame.from_dict(hparam_results, orient="index")
df_hparams

# Example: effect of lora_r on val_loss
if "lora_r" in df_hparams.columns:
    df_hparams.groupby("lora_r")["val_loss"].mean().plot(kind="bar")
    plt.title("Effect of LoRA Rank on Validation Loss")
    plt.ylabel("Mean validation loss")
    plt.show()

# Example: effect of learning_rate on rougeL
if "learning_rate" in df_hparams.columns:
    df_hparams.groupby("learning_rate")["rougeL"].mean().plot(kind="bar")
    plt.title("Effect of Learning Rate on ROUGE-L")
    plt.ylabel("Mean ROUGE-L")
    plt.show()

3. Ablation Study: Impact of RAG and LoRA

We compare four variants:
1. Prompt-only model (no RAG, no LoRA)
2. LoRA fine-tuned model without RAG
3. RAG with base model (no LoRA)
4. RAG with LoRA (our full system)

In [None]:
ablation_path = RESULTS_DIR / "ablation_results.json"
with ablation_path.open() as f:
    ablation = json.load(f)

df_ablation = pd.DataFrame.from_dict(ablation, orient="index")
df_ablation

df_ablation[["rougeL", "job_relevance_mean", "resume_alignment_mean"]].plot(
    kind="bar", rot=45
)
plt.title("Ablation Study: RAG & LoRA")
plt.ylabel("Score")
plt.tight_layout()
plt.show()

4. Error Analysis and Qualitative Examples

We examine examples where the RAG + LoRA system still underperforms or makes mistakes, such as:
- Mentioning irrelevant skills.
- Overly generic openings.
- Weak closing paragraphs.

In [None]:
qual_path = RESULTS_DIR / "qualitative_examples.json"
if qual_path.exists():
    with qual_path.open() as f:
        qual_examples = json.load(f)

    # Show a few examples
    for ex in qual_examples[:3]:
        print("=" * 80)
        print("JOB:\n", ex["job_text"][:600], "\n")
        print("RESUME:\n", ex["resume_text"][:600], "\n")
        print("REFERENCE COVER LETTER:\n", ex["reference"][:600], "\n")
        print("GENERATED COVER LETTER:\n", ex["generated"][:600], "\n")
