<a href="https://colab.research.google.com/github/WangKun00293/bart-bert-project/blob/main/BART_generated_text_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration
tokenizer = BartTokenizer.from_pretrained("/content/drive/MyDrive/model_41_bart_epoch_6")
model = BartForConditionalGeneration.from_pretrained("/content/drive/MyDrive/model_41_bart_epoch_6")

In [None]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.eval()
model.to(device)
import pandas as pd

In [None]:
def generate_text_with_beams(prompt, num_texts=5, temperature=1.0, top_k=50, top_p=0.95, min_length=50, max_length=200, num_beams=5, no_repeat_ngram_size=2):
    model.eval()  # Ensure the model is in evaluation mode
    generated_texts = []
    for _ in range(num_texts):
        # Encode the prompt and ensure it's on the correct device
        input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)

        # Generate text using the model with specified hyperparameters
        outputs = model.generate(
            input_ids,
            min_length=min_length,
            max_length=max_length,
            num_beams=num_beams,
            num_return_sequences=1,  # Generate one sequence at a time to ensure diversity
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            no_repeat_ngram_size=no_repeat_ngram_size,
            do_sample=True,  # Enable sampling even with num_beams for diversity
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode and add the generated text
        text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        generated_texts.append(text)

    return generated_texts

In [None]:
def generate_text(top_p, top_k, temperature, num_beams=5, num_texts=20):
    prompts = ["Write a World News", "Write a Sport News", "Write a Business News", "Write a Science/Technology News"]
    all_prompts = []
    all_texts = []

    for prompt in prompts:
        generated_texts_for_prompt = []
        num_iterations = num_texts // num_beams

        for _ in range(num_iterations):
            generated_texts = generate_text_with_beams(
                prompt=prompt,
                num_texts=num_beams,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                min_length=50,
                max_length=200,
                num_beams=num_beams,
                no_repeat_ngram_size=2
            )
            generated_texts_for_prompt.extend(generated_texts)

        # Ensure uniqueness and limit to the desired number of texts
        unique_texts = list(set(generated_texts_for_prompt))[:num_texts]
        all_prompts.extend([prompt] * len(unique_texts))
        all_texts.extend(unique_texts)

    return pd.DataFrame({'prompt': all_prompts, 'text': all_texts})

In [None]:
!pip install sacrebleu
import pandas as pd
from sklearn.metrics import jaccard_score
from itertools import combinations
import numpy as np
from nltk.util import ngrams
import sacrebleu

# Assuming df_generate is your DataFrame and 'text' is the column with your text data

# Function to calculate Jaccard Similarity between two texts
def jaccard_similarity(text1, text2):
    set1 = set(text1.split())
    set2 = set(text2.split())
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    return len(intersection) / len(union) if len(union) > 0 else 0
def jac_sim(df):
  jaccard_similarities = [jaccard_similarity(text1, text2) for text1, text2 in combinations(df['text'], 2)]
  return np.mean(jaccard_similarities)

Collecting sacrebleu
  Downloading sacrebleu-2.4.1-py3-none-any.whl (106 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/106.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m102.4/106.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.6/106.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.8.2 sacrebleu-2.4.1


In [None]:
def calculate_self_bleu(texts):
    scores = []
    for i, target in enumerate(texts):
        references = texts[:i] + texts[i+1:]
        bleu_score = sacrebleu.corpus_bleu([target], [references]).score
        scores.append(bleu_score)
    return np.mean(scores)

In [None]:
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
bert_tokenizer = BertTokenizer.from_pretrained('/content/drive/MyDrive/planB/bert/BERT402_epoch_8')
bert_model = BertForSequenceClassification.from_pretrained('/content/drive/MyDrive/planB/bert/BERT402_epoch_8', num_labels=4)
bert_model.to(device)

In [None]:
from sklearn.metrics import accuracy_score
import numpy as np
def evaluate(model, dataloader):
    model.eval()

    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)

            logits = outputs.logits.detach().cpu().numpy()
            label_ids = labels.to('cpu').numpy()

            predictions.extend(np.argmax(logits, axis=1).flatten())
            true_labels.extend(label_ids.flatten())

    avg_accuracy = accuracy_score(true_labels, predictions)
    return avg_accuracy

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
class TextDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = dataframe.label
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt",
        )

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': torch.tensor(self.targets[index], dtype=torch.long)
        }


In [None]:
label_map2 = {"Write a World News":0, "Write a Sport News":1, "Write a Business News":2, "Write a Science/Technology News":3}

In [17]:
for k in [30,35,40,45,50]:
  df_text = generate_text(top_p=0.95, top_k=k, temperature=1.2, num_beams=5, num_texts=20)
  df_text['label'] = df_text['prompt'].map(label_map2)
  dataset = TextDataset(dataframe=df_text, tokenizer=bert_tokenizer, max_len=200)
  loader = DataLoader(dataset, batch_size=16, shuffle=False)
  bert_acc = evaluate(bert_model, loader)
  js = jac_sim(df_text)
  sb = calculate_self_bleu(df_text['text'].tolist())
  print(f"top_k: {k}")
  print(f"bert accuracy: {bert_acc}")
  print(f"Jaccard similarities: {js}")
  print(f"Self-BLEU: {sb}")

top_k: 30
bert accuracy: 0.975
Jaccard similarities: 0.11556402156160839
Self-BLEU: 4.670611437286693
top_k: 35
bert accuracy: 0.9875
Jaccard similarities: 0.11300647411542006
Self-BLEU: 1.5987585476199417
top_k: 40
bert accuracy: 0.95
Jaccard similarities: 0.1089032057190632
Self-BLEU: 6.016486747472774
top_k: 45
bert accuracy: 0.95
Jaccard similarities: 0.11288165675814596
Self-BLEU: 1.573621752863128
top_k: 50
bert accuracy: 0.925
Jaccard similarities: 0.10938293006051303
Self-BLEU: 5.104405490910682


In [18]:
for k in [30,35,40,45,50]:
  df_text = generate_text(top_p=0.9, top_k=k, temperature=1.2, num_beams=5, num_texts=20)
  df_text['label'] = df_text['prompt'].map(label_map2)
  dataset = TextDataset(dataframe=df_text, tokenizer=bert_tokenizer, max_len=200)
  loader = DataLoader(dataset, batch_size=16, shuffle=False)
  bert_acc = evaluate(bert_model, loader)
  js = jac_sim(df_text)
  sb = calculate_self_bleu(df_text['text'].tolist())
  print(f"top_k: {k}")
  print(f"bert accuracy: {bert_acc}")
  print(f"Jaccard similarities: {js}")
  print(f"Self-BLEU: {sb}")

top_k: 30
bert accuracy: 0.95
Jaccard similarities: 0.11840843498377501
Self-BLEU: 3.6100798168248467
top_k: 35
bert accuracy: 0.9375
Jaccard similarities: 0.1081574646435496
Self-BLEU: 3.9766781795788084
top_k: 40
bert accuracy: 0.925
Jaccard similarities: 0.11306758988903559
Self-BLEU: 2.9220130559379016
top_k: 45
bert accuracy: 0.9
Jaccard similarities: 0.11283803651925471
Self-BLEU: 2.292040146031241
top_k: 50
bert accuracy: 0.95
Jaccard similarities: 0.10839751304980673
Self-BLEU: 4.363161682245089


In [None]:
# set temperature=1.2, top_k=35, top_p=0.95
df_generated = generate_text(top_p=0.95, top_k=35, temperature=1.2, num_beams=5, num_texts=2000)
df_generated.to_csv("/content/drive/MyDrive/planB/df_generated_all.csv")