# Human Metrics

In [1]:
from Lib.BBMetrics import BBMetric
from transformers import TFAutoModelForCausalLM, AutoTokenizer
import os
from Data.data_dicts import character_dict, source_dict, random_state

import os
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    base_folder = '/content/drive/My Drive/unibo/NLP_project/BarneyBot'
    os.system("pip install datasets")
    os.system("pip install transformers")
else:
    base_folder = os.getcwd()

In [2]:
tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
tokenizer.pad_token = '#'

In [3]:
from datasets import load_dataset

df_common = load_dataset('csv',
                         data_files=os.path.join(base_folder, 'Data', 'common_dataset.csv'), 
                         cache_dir=os.path.join(base_folder, "cache"))

Using custom data configuration default-77d7688dea622ede
Reusing dataset csv (D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\cache\csv\default-77d7688dea622ede\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
def eval_character(character='Default'):
    source = character_dict[character]['source']
    in_folder = os.path.join(base_folder, 'Data', 'Characters', character)
    if not os.path.exists(in_folder):
        os.makedirs(in_folder)
    out_folder = os.path.join(base_folder, 'Data', 'Characters', character)
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)
    checkpoint_folder = os.path.join(out_folder, character_dict[character]['checkpoint_folder'])
    model = TFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=checkpoint_folder)
    
    # compute human - coherence
    print("Step 1) Chat with", character, "\n\tPlease evaluate your chat with this character:", flush=True)
    metric = BBMetric.load_metric("human - coherence")
    metric.train(model=model, tokenizer=tokenizer,
                 filepath=os.path.join(os.getcwd(), "Data", "Characters", character, "humancoherence.csv"),
                 length=5) # length is optional, defaults to 5
    
    # compute human - consistency
    print("Step 2) Answers from", character, "\n\tPlease evaluate how true these responses are for the character:", flush=True)
    metric = BBMetric.load_metric("human - consistency")
    metric.train(model=model, tokenizer=tokenizer,
                 filepath=os.path.join(os.getcwd(), "Data", "Characters", character, "humanconsistency.csv"))
    # compute human - style
    print("Step 3) Answers from", character, "\n\tPlease evaluate the style of the responses.", flush=True)
    print("\tDo you think they are responses that", character, "would say?", flush=True)
    metric = BBMetric.load_metric("human - style")
    metric.train(model=model, tokenizer=tokenizer,
                 filepath=os.path.join(os.getcwd(), "Data", "Characters", character, "humanstyle.csv"),
                 questions=df_common['train'].filter(lambda x: x['source'] == source)['context'])

# Barney

In [7]:
eval_character(character='Barney')

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\Data\Characters\Barney\barney_model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


Step 3) Answers from Barney 
	Please evaluate the style of the responses.
	Do you think they are responses that Barney would say?


Loading cached processed dataset at D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\cache\csv\default-77d7688dea622ede\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-af6da9d0f5aaba29.arrow


Question: Barney, this is about the building.
DialoGPT: That's all part of the plan.
Question: All right. I'll be right there. Stay where you are.
DialoGPT: Okay, okay. What do you want?
Question: I think there's a pretty girl smiling at me there.
DialoGPT: I see... a little girl is smiling there.
Question: I love you, man.
DialoGPT: Well, I love you, too.
Question: Not even if she's hot?
DialoGPT: And she's not even the hottest girl she's ever slept with.
How do you rate these answers (0 to 5)? 4


# Sheldon

In [None]:
eval_character(character='Sheldon')

# Harry

In [None]:
eval_character(character='Harry')

# Fry

In [None]:
eval_character(character='Fry')

# Vader

In [None]:
eval_character(character='Vader')

# Joey

In [None]:
eval_character(character='Joey')

# Phoebe

In [None]:
eval_character(character='Phoebe')

# Bender

In [None]:
eval_character(character='Bender')