In [51]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

def calculate_perplexity(generated_text):
    input_ids = tokenizer.encode(generated_text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(input_ids=input_ids)
        logits = outputs.logits
    loss = torch.nn.functional.cross_entropy(logits.view(-1, logits.size(-1)), input_ids.view(-1), reduction="none")
    perplexity = torch.exp(torch.mean(loss))
    return perplexity.item()

def calculate_fluency_score(generated_text):
    perplexity = calculate_perplexity(generated_text)
    return 1 / perplexity 

In [52]:
def calculate_diversity_score(generated_text):
    words = generated_text.split()
    unique_words = len(set(words))
    total_words = len(words)
    diversity_score = unique_words / total_words
    return diversity_score

In [53]:
import nltk
from nltk.corpus import brown

brown_bigrams = set(nltk.bigrams(brown.words()))

def calculate_coherence_score(generated_text):
    words = generated_text.split()
    bigrams = list(zip(words[:-1], words[1:]))
    common_bigrams = sum(bigram in brown_bigrams for bigram in bigrams)
    coherence_score = common_bigrams / len(bigrams)
    return coherence_score

In [54]:
import numpy as np
from transformers import pipeline

models = [('distilgpt2', 'distilgpt2-small'),('gpt2', 'gpt2-small'),('gpt2-medium', 'gpt2-medium')]

criteria = {'fluency': 0.3, 'diversity': 0.3, 'coherence': 0.2}

def generate_text_and_calculate_scores(model_name, model_type):
    generator = pipeline('text-generation', model=model_name)
    generated_text = generator("Once upon a time", max_length=30, do_sample=False, truncation=True)
    print("Generated text for model", model_name, ":", generated_text[0]['generated_text'])
    generated_text = generated_text[0]['generated_text']
    fluency_score = calculate_fluency_score(generated_text)
    diversity_score = calculate_diversity_score(generated_text)
    coherence_score = calculate_coherence_score(generated_text)
    return (model_name, model_type, fluency_score, diversity_score, coherence_score)

data = []
for model_name, model_type in models:
    scores = generate_text_and_calculate_scores(model_name, model_type)
    data.append(scores)

data = np.array(data)[:, 2:].astype(float)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text for model distilgpt2 : Once upon a time of war, the United States was the only country in the world to have a military presence. The United States was the only country


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text for model gpt2 : Once upon a time, the world was a place of great beauty and great danger. The world was a place of great danger, and the world was


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text for model gpt2-medium : Once upon a time, there was a man who lived in a village called Krakow. He was a very good man, and he was very


In [55]:
normalized_data = data / np.sqrt((data ** 2).sum(axis=0))
weighted_data = normalized_data * np.array(list(criteria.values()))
ideal_solution = np.max(weighted_data, axis=0)
anti_ideal_solution = np.min(weighted_data, axis=0)
distance_from_ideal = np.sqrt(((weighted_data - ideal_solution) ** 2).sum(axis=1))
distance_from_anti_ideal = np.sqrt(((weighted_data - anti_ideal_solution) ** 2).sum(axis=1))
topsis_score = distance_from_anti_ideal / (distance_from_ideal + distance_from_anti_ideal)
ranked_models = np.argsort(topsis_score)
print("Ranked Models (from best to worst):")
for rank, model_idx in enumerate(ranked_models):
    model_name, model_type = models[model_idx]
    print(f"Rank {rank + 1}: Model {model_name} ({model_type}) with TOPSIS score {topsis_score[model_idx]:.2f}")

Ranked Models (from best to worst):
Rank 1: Model gpt2 (gpt2-small) with TOPSIS score 0.13
Rank 2: Model distilgpt2 (distilgpt2-small) with TOPSIS score 0.41
Rank 3: Model gpt2-medium (gpt2-medium) with TOPSIS score 0.86
