In [1]:
from Lib.BBMetrics import BBMetric
from transformers import TFAutoModelForCausalLM, AutoTokenizer
import os
from Data.data_dicts import character_dict, source_dict, random_state

import os
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    base_folder = '/content/drive/My Drive/unibo/NLP_project/BarneyBot'
    os.system("pip install datasets")
    os.system("pip install transformers")
    os.system("pip install rouge_score")
    os.system("pip install -U sentence-transformers")
else:
    base_folder = os.getcwd()

sentences_basic = ["Hi!", "How are you?", "I hate you."]
sentences_basic_2 = ["Hello!", "How are you doing?", "I think this is good."]
sentences_vader = ["Come to the dark side!", "I will kill you!", "Luke, I am your father."]
sentences_barney = ["Did you get the suit?", "Legendary!", "I like girls."]

In [2]:
model = TFAutoModelForCausalLM.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
tokenizer.pad_token = '#'

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at microsoft/DialoGPT-small.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [2]:
BBMetric.metrics_list

['bleu',
 'semantic similarity',
 'rouge l',
 'emotion',
 'semantic answer similarity',
 'distinct',
 'semantic classifier',
 'perplexity',
 'human - coherence',
 'human - consistency',
 'human - style']

In [3]:
metric = BBMetric.load_metric("bleu")

print(metric.compute(predictions=sentences_basic, references=sentences_basic_2))

{'score': 0.0, 'std': 0.0}


In [10]:
metric = BBMetric.load_metric("rouge l")

print(metric.compute(predictions=sentences_basic, references=sentences_basic_2))

{'score': 0.36904761904761907, 'std': 0.3599099156626422}


In [14]:
metric = BBMetric.load_metric("distinct")

# ngram_size is optional, defaults to 3
metric.compute(sentences=sentences_basic, ngram_size=2)

{'score': 0.11616161616161617, 'std': 0.0823712445974752}

In [5]:
metric = BBMetric.load_metric("emotion")

print(metric.compute(sentences=sentences_basic))

{'score': [0.04045128605018059, 0.3372649804999431, 0.029534351934368413, 0.33119267721970874, 0.24915542546659708, 0.012401290975200633], 'std': [0.0246632631447857, 0.3520860938584187, 0.03524459240387796, 0.360727083374811, 0.31209396368681236, 0.008284051185361898], 'label': ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']}


In [16]:
metric = BBMetric.load_metric("semantic similarity")

print(metric.compute(sentences_a=sentences_basic, sentences_b=sentences_basic_2))

{'score': 0.60903007, 'std': 0.40267226}


In [17]:
metric = BBMetric.load_metric("semantic answer similarity")

print(metric.compute(predictions=sentences_basic, references=sentences_basic_2))

{'score': 0.59648186, 'std': 0.4743335}


In [4]:
metric = BBMetric.load_metric("semantic classifier")

# n_shuffles is optional, defaults to 10
# from_saved_embeddings is optional, defaults to True
# shutdown_at_end is optional, defaults to False
metric.train(character='Barney', character_dict=character_dict, source_dict=source_dict, random_state=random_state,
             base_folder=base_folder, n_shuffles=10, from_saved_embeddings=True, shutdown_at_end=False)

# n_draws is optional, defaults to len(sentences)-2
print(metric.compute(character='Barney', character_dict=character_dict, base_folder=base_folder,
               sentences=sentences_basic))
print(metric.compute(character='Barney', character_dict=character_dict, base_folder=base_folder,
               sentences=sentences_vader))
print(metric.compute(character='Barney', character_dict=character_dict, base_folder=base_folder,
               sentences=sentences_barney))

Using classifier at D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\Data\Characters\Barney\barney_classifier
{'score': 0.9999995, 'std': 0.0}


In [19]:
metric = BBMetric.load_metric("perplexity")

# stride is optional, defaults to 64
print(metric.compute(model=model, tokenizer=tokenizer, sentences=sentences_basic, stride=3))

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00,  1.75s/it]

{'score_concat': 40623438.20342068}





In [None]:
metric = BBMetric.load_metric("human - coherence")

# length is optional, defaults to 5
metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humancoherence.csv"),
             length=2)

metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humancoherence.csv"))

In [None]:
metric = BBMetric.load_metric("human - consistency")

metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanconsistency.csv"))

metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanconsistency.csv"))

In [None]:
metric = BBMetric.load_metric("human - style")

metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanstyle.csv"),
             questions=barney_sentences)

metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", "Default", "humanstyle.csv"))