In [7]:
!pip install transformers datasets torch pandas numpy scikit-learn rouge-score nltk bert-score



In [8]:
!pip install POT



In [9]:
import pandas as pd
import numpy as np
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, BertTokenizer, BertModel
from datasets import Dataset
from sklearn.metrics import f1_score
from rouge_score import rouge_scorer
import nltk
from bert_score import score as bert_score
from collections import Counter
import ot  # For WMD optimal transport

nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.translate.meteor_score import meteor_score

# Try importing moverscore; if not available, we'll approximate
try:
    from moverscore import get_moverscore
    MOVESCORE_AVAILABLE = True
except ImportError:
    MOVESCORE_AVAILABLE = False
    print("MoverScore library not available. Approximating with BERT embeddings and WMD.")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df[['tweet', 'GT Target']]

def preprocess_data(df):
    df['tweet'] = df['tweet'].str.strip().str.lower()
    return df

def to_dataset(df):
    return Dataset.from_pandas(df)

def calculate_kqwr(prediction, reference):
    pred_words = set(prediction.lower().split())
    ref_words = set(reference.lower().split())
    if not pred_words or not ref_words:
        return 0.0
    overlap = len(pred_words & ref_words)
    total_unique = len(pred_words | ref_words)
    return overlap / total_unique if total_unique > 0 else 0.0

def calculate_td_tc(predictions, references):
    td = sum(1 for pred, ref in zip(predictions, references) if pred == ref)
    tc = len(predictions)
    return td / tc if tc > 0 else 0.0

def get_bert_embeddings(texts, tokenizer, model):
    if not texts:  # Handle empty input
        return np.zeros((1, 768))  # Return a dummy embedding for consistency
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.cpu().numpy()  # Shape: (batch_size, seq_len, hidden_size)

def calculate_moverscore(predictions, references, bert_tokenizer, bert_model):
    if MOVESCORE_AVAILABLE:
        scores = [get_moverscore(pred, ref) for pred, ref in zip(predictions, references)]
        return np.mean(scores)
    else:
        scores = []
        for pred, ref in zip(predictions, references):
            pred_emb = get_bert_embeddings([pred], bert_tokenizer, bert_model)[0].mean(axis=0)
            ref_emb = get_bert_embeddings([ref], bert_tokenizer, bert_model)[0].mean(axis=0)
            dist_matrix = np.linalg.norm(pred_emb - ref_emb)
            scores.append(1 / (1 + dist_matrix))
        return np.mean(scores)

def calculate_yisi(predictions, references, bert_tokenizer, bert_model):
    scores = []
    for pred, ref in zip(predictions, references):
        pred_emb = get_bert_embeddings([pred], bert_tokenizer, bert_model)[0].mean(axis=0)
        ref_emb = get_bert_embeddings([ref], bert_tokenizer, bert_model)[0].mean(axis=0)
        cosine_sim = np.dot(pred_emb, ref_emb) / (np.linalg.norm(pred_emb) * np.linalg.norm(ref_emb) + 1e-8)
        precision = recall = cosine_sim
        f_score = 2 * (precision * recall) / (precision + recall + 1e-8)
        scores.append(f_score)
    return np.mean(scores)

def calculate_wmd(predictions, references, bert_tokenizer, bert_model):
    scores = []
    for pred, ref in zip(predictions, references):
        pred_words = pred.lower().split()
        ref_words = ref.lower().split()

        if not pred_words or not ref_words:  # Handle empty cases
            scores.append(0.0)  # Default to 0 if empty
            continue

        pred_embs = get_bert_embeddings(pred_words, bert_tokenizer, bert_model)[0]  # Shape: (n, hidden_size)
        ref_embs = get_bert_embeddings(ref_words, bert_tokenizer, bert_model)[0]  # Shape: (m, hidden_size)

        if pred_embs.shape[0] == 0 or ref_embs.shape[0] == 0:
            scores.append(0.0)  # Default distance for empty cases
            continue

        # Normalized bag-of-words vectors
        pred_counts = np.ones(len(pred_words)) / len(pred_words) if pred_words else np.array([1.0])
        ref_counts = np.ones(len(ref_words)) / len(ref_words) if ref_words else np.array([1.0])

        # Compute distance matrix
        dist_matrix = np.linalg.norm(pred_embs[:, np.newaxis] - ref_embs[np.newaxis, :], axis=2)

        # Ensure dist_matrix shape matches pred_counts and ref_counts
        if dist_matrix.shape != (len(pred_counts), len(ref_counts)):
            scores.append(0.0)  # Fallback for mismatch
            continue

        # Compute WMD using optimal transport
        try:
            wmd_dist = ot.emd2(pred_counts, ref_counts, dist_matrix)
            scores.append(wmd_dist)
        except ValueError as e:
            print(f"WMD calculation failed for pred: '{pred}', ref: '{ref}' - {e}")
            scores.append(0.0)  # Fallback for any OT errors

    return np.mean(scores) if scores else 0.0

def calculate_greedy_matching(predictions, references, bert_tokenizer, bert_model):
    scores = []
    for pred, ref in zip(predictions, references):
        pred_tokens = pred.lower().split()
        ref_tokens = ref.lower().split()

        if not pred_tokens or not ref_tokens:
            scores.append(0.0)
            continue

        pred_embs = get_bert_embeddings(pred_tokens, bert_tokenizer, bert_model)[0]
        ref_embs = get_bert_embeddings(ref_tokens, bert_tokenizer, bert_model)[0]

        g_pr = 0
        for ref_emb in ref_embs:
            cos_sims = [np.dot(ref_emb, pred_emb) / (np.linalg.norm(ref_emb) * np.linalg.norm(pred_emb) + 1e-8)
                        for pred_emb in pred_embs]
            g_pr += max(cos_sims)
        g_pr /= len(ref_tokens)

        g_rp = 0
        for pred_emb in pred_embs:
            cos_sims = [np.dot(pred_emb, ref_emb) / (np.linalg.norm(pred_emb) * np.linalg.norm(ref_emb) + 1e-8)
                        for ref_emb in ref_embs]
            g_rp += max(cos_sims)
        g_rp /= len(pred_tokens)

        gm_score = (g_pr + g_rp) / 2
        scores.append(gm_score)

    return np.mean(scores)

def calculate_metrics(model, tokenizer, dataset, sample_size=100):
    model.eval()
    sample = dataset.select(range(min(sample_size, len(dataset))))
    predictions, references = [], []

    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)

    with torch.no_grad():
        for example in sample:
            input_text = example['tweet']
            inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
            outputs = model.generate(**inputs)
            pred = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
            predictions.append(pred)
            references.append(example['GT Target'])

    f1 = f1_score(references, predictions, average='weighted')
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge_scores = [scorer.score(ref, pred) for ref, pred in zip(references, predictions)]
    rouge1 = np.mean([s['rouge1'].fmeasure for s in rouge_scores])
    rougeL = np.mean([s['rougeL'].fmeasure for s in rouge_scores])
    meteor_scores = [meteor_score([ref.split()], pred.split()) for ref, pred in zip(references, predictions)]
    meteor = np.mean(meteor_scores)
    P, R, F1 = bert_score(predictions, references, lang="en", verbose=False)
    bertscore_f1 = F1.mean().item()

    kqwr_scores = [calculate_kqwr(pred, ref) for pred, ref in zip(predictions, references)]
    kqwr = np.mean(kqwr_scores)
    td_tc = calculate_td_tc(predictions, references)

    moverscore = calculate_moverscore(predictions, references, bert_tokenizer, bert_model)
    yisi = calculate_yisi(predictions, references, bert_tokenizer, bert_model)
    wmd = calculate_wmd(predictions, references, bert_tokenizer, bert_model)
    greedy_matching = calculate_greedy_matching(predictions, references, bert_tokenizer, bert_model)

    return {
        "F1": f1,
        "ROUGE-1": rouge1,
        "ROUGE-L": rougeL,
        "METEOR": meteor,
        "BERTScore": bertscore_f1,
        "KqWR": kqwr,
        "td/tc": td_tc,
        "MoverScore": moverscore,
        "YiSi": yisi,
        "WMD": wmd,
        "GreedyMatching": greedy_matching
    }

def tokenize_data(example, tokenizer):
    input_text = example['tweet']
    target_text = example['GT Target']
    inputs = tokenizer(input_text, max_length=512, truncation=True, padding="max_length")
    targets = tokenizer(target_text, max_length=128, truncation=True, padding="max_length")
    inputs["labels"] = targets["input_ids"]
    return inputs

def train_model(model, tokenizer, train_dataset):
    training_args = TrainingArguments(
        output_dir="./t5_trained",
        num_train_epochs=3,
        per_device_train_batch_size=8,
        save_steps=500,
        save_total_limit=2,
        logging_steps=100,
    )
    trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset)
    trainer.train()
    return model

def main(file_path):
    df = load_data(file_path)
    df = preprocess_data(df)
    dataset = to_dataset(df)

    tokenizer = T5Tokenizer.from_pretrained("t5-base")
    model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device)

    print("Calculating Pre-training Metrics...")
    pre_metrics = calculate_metrics(model, tokenizer, dataset)
    print("Pre-training Metrics:", pre_metrics)

    train_size = min(1000, len(dataset))
    train_dataset = dataset.select(range(train_size))
    tokenized_train_dataset = train_dataset.map(lambda x: tokenize_data(x, tokenizer), batched=False)

    print("Training T5...")
    trained_model = train_model(model, tokenizer, tokenized_train_dataset)

    print("Calculating Post-training Metrics...")
    post_metrics = calculate_metrics(trained_model, tokenizer, dataset)
    print("Post-training Metrics:", post_metrics)

if __name__ == "__main__":
    main("/content/randomized_output.csv")

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


MoverScore library not available. Approximating with BERT embeddings and WMD.
Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Calculating Pre-training Metrics...


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Pre-training Metrics: {'F1': 0.0, 'ROUGE-1': np.float64(0.0045294117647058825), 'ROUGE-L': np.float64(0.0045294117647058825), 'METEOR': np.float64(0.009420626660813995), 'BERTScore': 0.8134480118751526, 'KqWR': np.float64(0.0025198412698412696), 'td/tc': 0.0, 'MoverScore': np.float32(0.091721185), 'YiSi': np.float32(0.4566427), 'WMD': np.float64(0.0), 'GreedyMatching': np.float32(0.94591653)}


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Training T5...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
100,2.4127
200,0.043
300,0.0159


Calculating Post-training Metrics...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Post-training Metrics: {'F1': 0.9284848484848485, 'ROUGE-1': np.float64(0.92), 'ROUGE-L': np.float64(0.92), 'METEOR': np.float64(0.7585014285714285), 'BERTScore': 0.9887728095054626, 'KqWR': np.float64(0.92), 'td/tc': 0.92, 'MoverScore': np.float32(0.92890364), 'YiSi': np.float32(0.97162366), 'WMD': np.float64(0.0), 'GreedyMatching': np.float32(1.9917239)}


**BART-Base (Bidirectional and Auto-Regressive Transformer)**

In [2]:
import pandas as pd
import numpy as np
import torch
from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import f1_score
from rouge_score import rouge_scorer
import nltk
from bert_score import score as bert_score
from collections import Counter
import ot  # For WMD optimal transport

nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.translate.meteor_score import meteor_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df[['tweet', 'GT Target']]

def preprocess_data(df):
    df['tweet'] = df['tweet'].str.strip().str.lower()
    return df

def to_dataset(df):
    return Dataset.from_pandas(df)

def calculate_kqwr(prediction, reference):
    pred_words = set(prediction.lower().split())
    ref_words = set(reference.lower().split())
    if not pred_words or not ref_words:
        return 0.0
    overlap = len(pred_words & ref_words)
    total_unique = len(pred_words | ref_words)
    return overlap / total_unique if total_unique > 0 else 0.0

def calculate_td_tc(predictions, references):
    td = sum(1 for pred, ref in zip(predictions, references) if pred == ref)
    tc = len(predictions)
    return td / tc if tc > 0 else 0.0

def get_bert_embeddings(texts, tokenizer, model):
    if not texts:
        return np.zeros((1, 768))
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.cpu().numpy()

def calculate_metrics(model, tokenizer, dataset, sample_size=100):
    model.eval()
    sample = dataset.select(range(min(sample_size, len(dataset))))
    predictions, references = [], []

    bert_tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')  # Use BART tokenizer for consistency
    bert_model = BartForConditionalGeneration.from_pretrained('facebook/bart-base').to(device)

    with torch.no_grad():
        for example in sample:
            input_text = example['tweet']
            inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
            outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
            pred = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
            predictions.append(pred)
            references.append(example['GT Target'])

    f1 = f1_score(references, predictions, average='weighted')
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge_scores = [scorer.score(ref, pred) for ref, pred in zip(references, predictions)]
    rouge1 = np.mean([s['rouge1'].fmeasure for s in rouge_scores])
    rougeL = np.mean([s['rougeL'].fmeasure for s in rouge_scores])
    meteor_scores = [meteor_score([ref.split()], pred.split()) for ref, pred in zip(references, predictions)]
    meteor = np.mean(meteor_scores)
    P, R, F1 = bert_score(predictions, references, lang="en", verbose=False)
    bertscore_f1 = F1.mean().item()

    kqwr_scores = [calculate_kqwr(pred, ref) for pred, ref in zip(predictions, references)]
    kqwr = np.mean(kqwr_scores)
    td_tc = calculate_td_tc(predictions, references)

    return {
        "F1": f1,
        "ROUGE-1": rouge1,
        "ROUGE-L": rougeL,
        "METEOR": meteor,
        "BERTScore": bertscore_f1,
        "KqWR": kqwr,
        "td/tc": td_tc,
    }

def tokenize_data(example, tokenizer):
    input_text = example['tweet']
    target_text = example['GT Target']
    inputs = tokenizer(input_text, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    targets = tokenizer(target_text, max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    inputs["labels"] = targets["input_ids"]
    return inputs

def train_model(model, tokenizer, train_dataset):
    training_args = TrainingArguments(
        output_dir="./bart_trained",
        num_train_epochs=3,
        per_device_train_batch_size=8,
        save_steps=500,
        save_total_limit=2,
        logging_steps=100,
    )
    trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset)
    trainer.train()
    return model

def main(file_path):
    df = load_data(file_path)
    df = preprocess_data(df)
    dataset = to_dataset(df)

    tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
    model = BartForConditionalGeneration.from_pretrained("facebook/bart-base").to(device)

    print("Calculating Pre-training Metrics...")
    pre_metrics = calculate_metrics(model, tokenizer, dataset)
    print("Pre-training Metrics:", pre_metrics)

    train_size = min(1000, len(dataset))
    train_dataset = dataset.select(range(train_size))
    tokenized_train_dataset = train_dataset.map(lambda x: tokenize_data(x, tokenizer), batched=False)

    print("Training BART...")
    trained_model = train_model(model, tokenizer, tokenized_train_dataset)

    print("Calculating Post-training Metrics...")
    post_metrics = calculate_metrics(trained_model, tokenizer, dataset)
    print("Post-training Metrics:", post_metrics)

if __name__ == "__main__":
    main("tse_explicit.csv")

  from .autonotebook import tqdm as notebook_tqdm





KeyboardInterrupt: 

**KeyBart**

In [None]:
import pandas as pd
import numpy as np
import torch
from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import f1_score
from rouge_score import rouge_scorer
import nltk
from bert_score import score as bert_score
from collections import Counter
import ot  # For WMD optimal transport

nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.translate.meteor_score import meteor_score

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df[['tweet', 'GT Target']]

def preprocess_data(df):
    df['tweet'] = df['tweet'].str.strip().str.lower()
    return df

def to_dataset(df):
    return Dataset.from_pandas(df)

def calculate_kqwr(prediction, reference):
    pred_words = set(prediction.lower().split())
    ref_words = set(reference.lower().split())
    if not pred_words or not ref_words:
        return 0.0
    overlap = len(pred_words & ref_words)
    total_unique = len(pred_words | ref_words)
    return overlap / total_unique if total_unique > 0 else 0.0

def calculate_td_tc(predictions, references):
    td = sum(1 for pred, ref in zip(predictions, references) if pred == ref)
    tc = len(predictions)
    return td / tc if tc > 0 else 0.0

def get_bert_embeddings(texts, tokenizer, model):
    if not texts:
        return np.zeros((1, 768))
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.cpu().numpy()

def calculate_metrics(model, tokenizer, dataset, sample_size=100):
    model.eval()
    sample = dataset.select(range(min(sample_size, len(dataset))))
    predictions, references = [], []

    bert_tokenizer = BartTokenizer.from_pretrained('bloomz/keybart')  # Adjust checkpoint as needed
    bert_model = BartForConditionalGeneration.from_pretrained('bloomz/keybart').to(device)

    with torch.no_grad():
        for example in sample:
            input_text = example['tweet']
            inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
            outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
            pred = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
            predictions.append(pred)
            references.append(example['GT Target'])

    f1 = f1_score(references, predictions, average='weighted')
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge_scores = [scorer.score(ref, pred) for ref, pred in zip(references, predictions)]
    rouge1 = np.mean([s['rouge1'].fmeasure for s in rouge_scores])
    rougeL = np.mean([s['rougeL'].fmeasure for s in rouge_scores])
    meteor_scores = [meteor_score([ref.split()], pred.split()) for ref, pred in zip(references, predictions)]
    meteor = np.mean(meteor_scores)
    P, R, F1 = bert_score(predictions, references, lang="en", verbose=False)
    bertscore_f1 = F1.mean().item()

    kqwr_scores = [calculate_kqwr(pred, ref) for pred, ref in zip(predictions, references)]
    kqwr = np.mean(kqwr_scores)
    td_tc = calculate_td_tc(predictions, references)

    return {
        "F1": f1,
        "ROUGE-1": rouge1,
        "ROUGE-L": rougeL,
        "METEOR": meteor,
        "BERTScore": bertscore_f1,
        "KqWR": kqwr,
        "td/tc": td_tc,
    }

def tokenize_data(example, tokenizer):
    input_text = example['tweet']
    target_text = example['GT Target']
    inputs = tokenizer(input_text, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    targets = tokenizer(target_text, max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    inputs["labels"] = targets["input_ids"]
    return inputs

def train_model(model, tokenizer, train_dataset):
    training_args = TrainingArguments(
        output_dir="./keybart_trained",
        num_train_epochs=3,
        per_device_train_batch_size=8,
        save_steps=500,
        save_total_limit=2,
        logging_steps=100,
    )
    trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset)
    trainer.train()
    return model

def main(file_path):
    df = load_data(file_path)
    df = preprocess_data(df)
    dataset = to_dataset(df)

    tokenizer = BartTokenizer.from_pretrained("bloomz/keybart")  # Replace with actual KeyBART checkpoint
    model = BartForConditionalGeneration.from_pretrained("bloomz/keybart").to(device)

    print("Calculating Pre-training Metrics...")
    pre_metrics = calculate_metrics(model, tokenizer, dataset)
    print("Pre-training Metrics:", pre_metrics)

    train_size = min(1000, len(dataset))
    train_dataset = dataset.select(range(train_size))
    tokenized_train_dataset = train_dataset.map(lambda x: tokenize_data(x, tokenizer), batched=False)

    print("Training KeyBART...")
    trained_model = train_model(model, tokenizer, tokenized_train_dataset)

    print("Calculating Post-training Metrics...")
    post_metrics = calculate_metrics(trained_model, tokenizer, dataset)
    print("Post-training Metrics:", post_metrics)

if __name__ == "__main__":
    main("/content/randomized_output.csv")