# Human Metrics

In [None]:
from Lib.BBMetrics import BBMetric
from transformers import TFAutoModelForCausalLM, AutoTokenizer
import os
from Data.data_dicts import character_dict, source_dict, random_state

import os
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    base_folder = '/content/drive/My Drive/unibo/NLP_project/BarneyBot'
    os.system("pip install datasets")
    os.system("pip install transformers")
else:
    base_folder = os.getcwd()

In [None]:
tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
tokenizer.pad_token = '#'

In [None]:
def eval_character(character='Default'):
    in_folder = os.path.join(base_folder, 'Data', 'Characters', character)
    if not os.path.exists(in_folder):
        os.makedirs(in_folder)
    out_folder = os.path.join(base_folder, 'Data', 'Characters', character)
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)
    checkpoint_folder = os.path.join(out_folder, character_dict[character]['checkpoint_folder'])
    model = TFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=checkpoint_folder)
    
    # compute human - coherence
    print("Step 1) Chat with", character, "\n\tPlease evaluate your chat with this character:", flush=True)
    metric = BBMetric.load_metric("human - coherence")
    metric.train(model=model, tokenizer=tokenizer,
                 filepath=os.path.join(os.getcwd(), "Data", "Characters", character, "humancoherence.csv"),
                 length=5) # length is optional, defaults to 5
    metric.compute(filepath=os.path.join(os.getcwd(), "Data", "Characters", character, "humancoherence.csv"))
    
    # compute human - consistency
    print("Step 2) chat with", character, "\n\tPlease evaluate how true these responses are for the character:", flush=True)
    metric = BBMetric.load_metric("human - consistency")
    metric.train(model=model, tokenizer=tokenizer,
                 filepath=os.path.join(os.getcwd(), "Datasets", "Characters", character, "humanconsistency.csv"))
    metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", character, "humanconsistency.csv"))
    
    # compute human - style
    print("Step 3) chat with", character, "\n\tPlease evaluate the style of the responses, thinking of the character.", flush=True)
    print("\tDo you think they are responses that", character, "would say?", flush=True)
    metric = BBMetric.load_metric("human - style")
    metric.train(model=model, tokenizer=tokenizer,
                 filepath=os.path.join(os.getcwd(), "Datasets", "Characters", character, "humanstyle.csv"),
                 questions=barney_sentences)
    metric.compute(filepath=os.path.join(os.getcwd(), "Datasets", "Characters", character, "humanstyle.csv"))

# Barney

In [None]:
eval_character(character='Barney')

# Sheldon

In [None]:
eval_character(character='Sheldon')

# Harry

In [None]:
eval_character(character='Harry')

# Fry

In [None]:
eval_character(character='Fry')

# Vader

In [None]:
eval_character(character='Vader')

# Joey

In [None]:
eval_character(character='Joey')

# Phoebe

In [None]:
eval_character(character='Phoebe')

# Bender

In [None]:
eval_character(character='Bender')