In [None]:
from lib.BBMetrics import BBMetric
from transformers import TFAutoModelForCausalLM, AutoTokenizer
import os
from Data.data_dicts import character_dict, source_dict, random_state

import os
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    base_folder = '/content/drive/My Drive/unibo/NLP_project/BarneyBot'
    os.system("pip install datasets")
    os.system("pip install transformers")
    os.system("pip install rouge_score")
    os.system("pip install -U sentence-transformers")
else:
    base_folder = os.getcwd()

sentences1 = ["I love this! I think life is good without issues. The red fox jumps.", "Hi"]
sentences2 = ["I hate you.", "Hi"]

In [None]:
model = TFAutoModelForCausalLM.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
tokenizer.pad_token = '#'

In [6]:
BBMetric.metrics_list

['bleu',
 'semantic similarity',
 'rouge l',
 'emotion',
 'semantic answer similarity',
 'distinct',
 'semantic classifier',
 'perplexity',
 'human - coherence',
 'human - consistency',
 'human - style']

In [7]:
metric = BBMetric.load_metric("semantic classifier")

metric.train(character='Barney', character_dict=character_dict, base_folder=base_folder)

TypeError: train() takes 5 positional arguments but 7 were given

In [7]:
metric = BBMetric.load_metric("bleu")

print(metric.compute(predictions=sentences1, references=sentences2))

{'score': 0.0}


In [8]:
metric = BBMetric.load_metric("rouge l")

print(metric.compute(predictions=sentences1, references=sentences2))

{'score': 0.6666666666666666}


In [4]:
metric = BBMetric.load_metric("semantic similarity")

print(metric.compute(sentences_a=sentences1, sentences_b=sentences2))

{'scores': array([0.03812046, 0.9504694 ], dtype=float32)}


In [2]:
metric = BBMetric.load_metric("emotion")

print(metric.compute(sentences=sentences1))

{'scores': [0.6401270031929016, 0.36308011412620544], 'labels': ['joy', 'anger']}


In [3]:
metric = BBMetric.load_metric("semantic answer similarity")

print(metric.compute(predictions=sentences1, references=sentences2))

{'scores': array([0.0881714, 1.       ], dtype=float32)}


In [2]:
metric = BBMetric.load_metric("perplexity")

print(metric.compute(model=model, tokenizer=tokenizer, sentences=sentences1, stride=2)) # Stride is optional, defaults to 64

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  3.03it/s]

{'score': 118088.06238195905}





In [3]:
metric = BBMetric.load_metric("human - coherence")

metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humancoherence.csv"),
             length=2)
metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humancoherence.csv"))

>> User:Hello!
DialoGPT: Hi!
>> User:I hate you.
DialoGPT: I hate you
How do you rate this conversation (0 to 5)? 2


{'score': 0.4}

In [2]:
metric = BBMetric.load_metric("human - consistency")

metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanconsistency.csv"))
metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanconsistency.csv"))

Question: Who are you?
DialoGPT: I'm here
Question: What is your name?
DialoGPT: I'm in.
Question: What is your job?
DialoGPT: I'm a software engineer.
Question: Where do you live?
DialoGPT: I'm in the US and I've never heard of this.
How do you rate these answers (0 to 5)? 4


{'score': 0.8}

In [4]:
metric = BBMetric.load_metric("human - style")

metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanstyle.csv"),
             questions=["What are you wearing?", "Legendary"])
metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanstyle.csv"))

Question: What are you wearing?
DialoGPT: I'm wearing a shirt.
Question: Legendary
DialoGPT: What is this? A crossover episode?
How do you rate these answers (0 to 5)? 1.5
Invalid score! Must be a single integer between 0 and 5!
How do you rate these answers (0 to 5)? 2


{'score': 0.4}

In [8]:
metric = BBMetric.load_metric("distinct")
metric.compute(sentences=sentences1, ngram_size=5)

{'score': 0.07352941176470588}