In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install transformers
!pip install datasets
!pip install peft
!pip install evaluate
!pip install tqdm
!pip install torch
!pip install bert_score
!pip install sacrebleu

In [None]:
import os
import json
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm

# For evaluation
import evaluate


# --- Constants & Mappings ---

PERSPECTIVES = ["INFORMATION", "SUGGESTION", "EXPERIENCE", "QUESTION", "CAUSE"]
BIO_TAGS = ["O"] + [f"{tag}-{p}" for p in PERSPECTIVES for tag in ["B", "I"]]
perspective2id = {p: i for i, p in enumerate(PERSPECTIVES)}
id2perspective = {i: p for p, i in perspective2id.items()}
bio2id = {t: i for i, t in enumerate(BIO_TAGS)}
id2bio = {i: t for t, i in bio2id.items()}


def load_json(path):
    with open(path, "r") as f:
        return json.load(f)

def join_answers(entry):
    answers = entry.get("answers", [])
    if isinstance(answers, str):
        return answers.strip()
    if isinstance(answers, list):
        return " ".join(a for a in answers if isinstance(a, str)).strip()
    return ""

def get_reference_summaries(example):
    
    # Extract reference summaries from the test example.
    # sssuming each example may contain a "labelled_summaries" field with keys like "INFORMATION_SUMMARY".
    refs = {}
    labelled_summaries = example.get("labelled_summaries", {})
    for perspective in PERSPECTIVES:
        key = f"{perspective}_SUMMARY"
        ref = labelled_summaries.get(key, "").strip()
        if ref and ref.lower() not in ["false", "true", "not_duplicate", "n/a", "duplicate"]:
            refs[perspective] = ref
    return refs



# --- Classifier Model ---

from transformers import AutoTokenizer, AutoModel

class DualHeadClassifier(nn.Module):
    def __init__(self, model_name="roberta-base", num_perspectives=5, num_span_tags=len(BIO_TAGS)):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        hidden_size = self.encoder.config.hidden_size
        self.classifier = nn.Linear(hidden_size, num_perspectives)
        self.tagger = nn.Linear(hidden_size, num_span_tags)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden = outputs.last_hidden_state
        cls_token = last_hidden[:, 0, :]
        cls_logits = self.classifier(self.dropout(cls_token))
        tag_logits = self.tagger(self.dropout(last_hidden))
        return cls_logits, tag_logits



# --- Generator Function ---

from transformers import BartTokenizer, BartForConditionalGeneration

def generate_summary_for_perspective(input_text, perspective, generator_model, generator_tokenizer, device):
    prompt = (
        f"Generate a {perspective} summary:\n"
        f"{input_text}\n"
        f"Provide a clear and structured {perspective.lower()} summary."
    )
    inputs = generator_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    input_ids = inputs.input_ids.to(device)
    output_ids = generator_model.generate(input_ids=input_ids, max_length=150, num_beams=5)
    summary = generator_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return summary.strip()



# --- Evaluation Function ---

def evaluate_predictions(pipeline_results):
    # load evaluation metrics
    bleu_metric = evaluate.load("sacrebleu")
    bertscore_metric = evaluate.load("bertscore")
   
    # initialize results container per perspective.
    eval_results = {p: {"references": [], "predictions": []} for p in PERSPECTIVES}
   
    for item in pipeline_results:
        # pipeline_results items include a "reference_summaries" field (if available)
        ref_summaries = item.get("reference_summaries", {})
        pred_summaries = item.get("generated_summaries", {})
        for perspective in PERSPECTIVES:
            if perspective in ref_summaries and perspective in pred_summaries:
                eval_results[perspective]["references"].append(ref_summaries[perspective])
                eval_results[perspective]["predictions"].append(pred_summaries[perspective])
   
    # compute BLEU and BERTScore for each perspective
    for perspective in PERSPECTIVES:
        refs = eval_results[perspective]["references"]
        preds = eval_results[perspective]["predictions"]
        if refs and preds:
            bleu_score = bleu_metric.compute(
                predictions=preds,
                references=[[ref] for ref in refs]
            )["score"]
            bert_result = bertscore_metric.compute(
                predictions=preds,
                references=refs,
                lang="en"
            )
            bert_avg = np.mean(bert_result["f1"])
            print("{} - BLEU: {:.8f}, BERTScore: {:.4f}".format(perspective, bleu_score, bert_avg))
        else:
            print("{} - Not enough data for evaluation.".format(perspective))


    
# ---------------------------
# --- Main Pipeline ---
# ---------------------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------- classifier model and tokenizer loading----------
    
classifier_model_path = "/kaggle/input/dual-classifier-model/pytorch/default/1/dual_classifier_final.pt"
classifier_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
classifier_model = DualHeadClassifier(model_name="roberta-base",
                                      num_perspectives=len(PERSPECTIVES),
                                      num_span_tags=len(BIO_TAGS))
classifier_state = torch.load(classifier_model_path, map_location=device)
classifier_model.load_state_dict(classifier_state)
classifier_model.to(device)
classifier_model.eval()


# ---------- generator model and tokenizer loading ----------

generator_model_path = "/kaggle/input/bard-lora-hard-2/pytorch/default/1"
generator_tokenizer = BartTokenizer.from_pretrained(generator_model_path)
generator_model = BartForConditionalGeneration.from_pretrained(generator_model_path)
generator_model.to(device)
generator_model.eval()

test_data_path = "/kaggle/input/nlp-project/test_project.json"
test_data = load_json(test_data_path)
# test_data = test_data[:int(0.01 * len(test_data))]

pipeline_results = []

for item in tqdm(test_data, desc="Processing Test Examples"):
    question = item.get("question", "").strip()
    answer = join_answers(item)

    # reference summaries
    reference_summaries = get_reference_summaries(item)

    if not question or not answer:
        continue

    # combine text input as in classifier prediction
    text = f"Question: {question} Answer: {answer}"

    # ---------- classifier prediction ----------
    
    encoding = classifier_tokenizer(
        text,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=512,
        return_offsets_mapping=True
    )
    input_ids = encoding["input_ids"].to(device)
    attention_mask = encoding["attention_mask"].to(device)

    
    with torch.no_grad():
        cls_logits, _ = classifier_model(input_ids, attention_mask)
        # apply sigmoid to get probabilities for multi-label classification
        pred_probs = torch.sigmoid(cls_logits).squeeze(0).cpu().numpy()

    # select predicted perspectives with probability > 0.5
    predicted_perspectives = [id2perspective[i] for i, prob in enumerate(pred_probs) if prob > 0.5]


    
    # ---------- generator summary for each predicted perspective ----------
    
    generated_summaries = {}
    if predicted_perspectives:
        for perspective in predicted_perspectives:
            summary = generate_summary_for_perspective(text, perspective, generator_model, generator_tokenizer, device)
            generated_summaries[perspective] = summary
    else:
        # in case no perspective exceeds threshold, generate summaries for all
        for perspective in PERSPECTIVES:
            summary = generate_summary_for_perspective(text, perspective, generator_model, generator_tokenizer, device)
            generated_summaries[perspective] = summary

    pipeline_results.append({
        "question": question,
        "answer": answer,
        "predicted_perspectives": predicted_perspectives,
        "generated_summaries": generated_summaries,
        "reference_summaries": reference_summaries
    })


# Saving pipeline results to file 
output_file = "pipeline_2_test_predictions.json"
with open(output_file, "w") as f:
    json.dump(pipeline_results, f, indent=2)
print(f"Pipeline complete. Results saved to {output_file}")


# ---------- Evaluatations ----------

evaluate_predictions(pipeline_results)

