In [None]:
# -*- coding: utf-8 -*-
"""
5_Final_Visualization.ipynb

This notebook serves as the comprehensive visualization and analysis hub for the final report.
It combines key figures and tables to present a holistic view of the fine-tuning experiments
and provides discussion points for conclusions.
"""

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from src.config import TABLES_DIR, FIGURES_DIR, REPORT_TITLE
from src.visualize import plot_rouge_scores, plot_trainable_parameters, plot_training_and_inference_time

# --- Load Evaluation Results ---
print("Loading final evaluation results...")
results_path = os.path.join(TABLES_DIR, "evaluation_results.csv")

if os.path.exists(results_path):
    results_df = pd.read_csv(results_path, index_col='Model')
    print("Evaluation results loaded successfully.")
    print("\n--- Summary of Evaluation Results ---")
    print(results_df.round(4))
    # Save to report/tables if not already there, or ensure it's up to date
    results_df.to_csv(os.path.join(TABLES_DIR, "final_evaluation_summary.csv"))
    print(f"Summary table saved to {os.path.join(TABLES_DIR, "final_evaluation_summary.csv")}")
else:
    print(f"Error: Evaluation results file not found at {results_path}. Please run `src/evaluate.py` first.")
    results_df = pd.DataFrame() # Create empty DataFrame to prevent errors

# --- Re-generate Key Visualizations ---
if not results_df.empty:
    print("\nRe-generating key visualizations for the final report...")
    plot_rouge_scores(results_df, filename="final_rouge_comparison.png")
    plot_trainable_parameters(results_df, filename="final_trainable_params.png")
    plot_training_and_inference_time(results_df, filename="final_time_comparison.png")
    print("Key visualizations re-generated and saved to report/figures/.")
else:
    print("Skipping visualization as evaluation results are empty.")


# --- Comprehensive Analysis and Conclusions for Report ---
print("\n--- Comprehensive Analysis and Conclusions for Final Report ---")
print("This section should be filled out manually in your final report, drawing insights from all generated figures and tables.")
print("\n**Suggested structure for your report's Results & Analysis and Conclusion sections:**")

print("\n**I. Results & Analysis**")
print("   A. **Quantitative Performance (ROUGE Scores)**")
print("      - Present the table of ROUGE scores (ROUGE-1, ROUGE-2, ROUGE-L) from `final_evaluation_summary.csv`.")
print("      - Discuss the performance of each fine-tuning method. Which one achieved the highest scores and why do you think so?")
print("      - Analyze the differences between ROUGE-1 (unigram overlap), ROUGE-2 (bigram overlap), and ROUGE-L (longest common subsequence). Does any method excel in a specific type of overlap?")
print("      - Compare the performance against the baseline (if you include it - e.g., the base model without fine-tuning, or a simple extractive summarizer)."  )

print("   B. **Resource Efficiency (Trainable Parameters, Training Time, GPU Memory)**")
print("      - Present the bar charts for trainable parameters (`final_trainable_params.png`) and training/inference times (`final_time_comparison.png`).")
print("      - Discuss the resource footprint of each method. How do PEFT methods (LoRA, QLoRA, Adapter, Prompt-tuning) compare to Full Fine-tuning in terms of trainable parameters and training time?")
print("      - Include observations on GPU memory usage during your training runs (manual notes). Explain why QLoRA is particularly memory-efficient.")

print("   C. **Qualitative Analysis (Sample Predictions)**")
print("      - Refer to `notebooks/4_Sample_Predictions.ipynb` for examples.")
print("      - Include 1-2 compelling examples in your report (original article, reference summary, and generated summaries from select models).")
print("      - Discuss the quality of generated summaries: coherence, fluency, factual correctness, conciseness. Highlight strengths and weaknesses of each method based on these samples.")
print("      - Note any common errors or interesting patterns observed.")

print("   D. **Trade-offs and Recommendations**")
print("      - Summarize the overall trade-offs between model performance, resource requirements, and implementation complexity.")
print("      - Provide recommendations for choosing a fine-tuning method based on different project constraints (e.g., limited GPU vs. high accuracy requirement)?")

print("\n**II. Conclusion & Future Work**")
print("   A. **Summary of Findings**")
print("      - Briefly reiterate the main conclusions drawn from your experiments.")
print("   B. **Limitations**")
print("      - Acknowledge any limitations of your study (e.g., dataset size, number of epochs, specific hyperparameters chosen, hardware constraints).")
print("   C. **Future Work**")
print("      - Propose potential avenues for future research (e.g., experimenting with different datasets, larger models, more advanced PEFT techniques, ensemble methods, human evaluation, different metrics like BLEU/METEOR)."  )

print("Final visualization notebook setup complete. This notebook is designed to help you construct your final project report.")
