In [1]:
from Lib.BBMetrics import BBMetric
from transformers import TFAutoModelForCausalLM, AutoTokenizer
import os
from Data.data_dicts import character_dict, source_dict, random_state
from transformers import AdamWeightDecay

import os
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    base_folder = '/content/drive/My Drive/unibo/NLP_project/BarneyBot'
    os.system("pip install datasets")
    os.system("pip install transformers")
    os.system("pip install rouge_score")
    os.system("pip install -U sentence-transformers")
else:
    base_folder = os.getcwd()

barney_folder = os.path.join(base_folder, 'Data', 'Characters', 'Barney')
if not os.path.exists(barney_folder):
    os.makedirs(barney_folder)
    
sentences_basic = ["Hi!", "How are you?", "I hate you."]
sentences_basic_2 = ["Hello!", "How are you doing?", "I think this is good."]
sentences_vader = ["Come to the dark side!", "I will kill you!", "Luke, I am your father."]
sentences_barney = ["Did you get the suit?", "Legendary!", "I like girls."]

In [2]:
model = TFAutoModelForCausalLM.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
model.compile(optimizer=AdamWeightDecay(learning_rate=2e-5))
tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-small', cache_dir=os.path.join(os.getcwd(), "cache"))
tokenizer.pad_token = '#'

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at microsoft/DialoGPT-small.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour, please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss.


In [3]:
from datasets import load_dataset, DatasetDict
from transformers import DataCollatorForLanguageModeling

batch_size = 8

model_barney = TFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=\
                                            os.path.join(barney_folder, character_dict['Barney']['checkpoint_folder']))
model_barney.compile(optimizer=AdamWeightDecay(learning_rate=2e-5))
data_collator = DataCollatorForLanguageModeling(mlm=False, tokenizer=tokenizer, return_tensors='tf')

def load_df(character):
    os.environ["HF_DATASETS_CACHE"] = os.path.join(base_folder, "cache")

    dataset_path = os.path.join(base_folder, "Data", "Characters", character, character+'.csv')
    
    character_hg = load_dataset('csv', 
                                data_files=dataset_path, 
                                cache_dir=os.path.join(base_folder, "cache"))
    
    # 85% train / 10% test / 5% validation
    train_test_hg = character_hg['train'].train_test_split(test_size=0.15, seed=random_state)
    test_val = train_test_hg['test'].train_test_split(test_size=0.33, seed=random_state)
    
    
    character_hg = DatasetDict({
        'train': train_test_hg['train'],
        'test': test_val['train'],
        'val': test_val['test']
    })
    
    return character_hg

def construct_conv(row, tokenizer):
    MAX_LENGTH = 512
    row = list(reversed(list(row.values())))
    model_inputs = tokenizer(row)
    tokenizer_pad_token_id = tokenizer.encode('#')[0]
    for i in range(len(model_inputs['input_ids'])):
        model_inputs['input_ids'][i].append(tokenizer.eos_token_id)
        model_inputs['attention_mask'][i].append(1)
    model_inputs['input_ids'] = [item for sublist in model_inputs['input_ids'] for item in sublist]
    model_inputs['attention_mask'] = [item for sublist in model_inputs['attention_mask'] for item in sublist]
    if MAX_LENGTH > len(model_inputs['input_ids']):
        model_inputs['input_ids'] += [tokenizer_pad_token_id] * (MAX_LENGTH - len(model_inputs['input_ids']))
        model_inputs['attention_mask'] += [0] * (MAX_LENGTH - len(model_inputs['attention_mask']))
    elif MAX_LENGTH < len(model_inputs['input_ids']):
        model_inputs['input_ids'] = model_inputs['input_ids'][:MAX_LENGTH-1]
        model_inputs['input_ids'][-1] = tokenizer.eos_token_id
        model_inputs['attention_mask'] = model_inputs['attention_mask'][:MAX_LENGTH-1]
        model_inputs['attention_mask'][-1] = 1
    model_inputs["labels"] = model_inputs["input_ids"]
    return model_inputs

def preprocess_function(examples):
    tokenizer.pad_token = '#'
    model_inputs = construct_conv(examples, tokenizer)
    return model_inputs

barney_hg = load_df('Barney')
tokenized_barney_hg = barney_hg.map(preprocess_function, batched=False)

barney_test_set = tokenized_barney_hg["test"].to_tf_dataset(
    columns=["input_ids", "attention_mask", "labels"],
    shuffle=False,
    batch_size=batch_size,
    collate_fn=data_collator,
)

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\Data\Characters\Barney\barney_model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour, please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss.
Using custom data configuration default-0c3b759885fd8d5f
Reusing dataset csv (D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\cache\csv\default-0c3b759885fd8d5f\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb

  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\cache\csv\default-0c3b759885fd8d5f\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-c79122a57f55334a.arrow and D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\cache\csv\default-0c3b759885fd8d5f\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-4faecb1c641ab4b9.arrow
Loading cached split indices for dataset at D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\cache\csv\default-0c3b759885fd8d5f\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-7b672756e3cf7bb2.arrow and D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\cache\csv\default-0c3b759885fd8d5f\0.0.0\433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519\cache-bb39fb19125a9103.arrow
Loading cached processed dataset at D:\Unive

In [4]:
BBMetric.metrics_list

['bleu',
 'semantic similarity',
 'rouge l',
 'emotion',
 'semantic answer similarity',
 'distinct',
 'semantic classifier',
 'perplexity',
 'human - coherence',
 'human - consistency',
 'human - style']

In [5]:
metric = BBMetric.load_metric("bleu")

print(metric.compute(predictions=sentences_basic, references=sentences_basic_2))

{'score': 0.0}


In [10]:
metric = BBMetric.load_metric("rouge l")

print(metric.compute(predictions=sentences_basic, references=sentences_basic_2))

{'score': 0.36904761904761907, 'std': 0.3599099156626422}


In [14]:
metric = BBMetric.load_metric("distinct")

# ngram_size is optional, defaults to 3
metric.compute(sentences=sentences_basic, ngram_size=2)

{'score': 0.11616161616161617, 'std': 0.0823712445974752}

In [5]:
metric = BBMetric.load_metric("emotion")

print(metric.compute(sentences=sentences_basic))

{'score': [0.04045128605018059, 0.3372649804999431, 0.029534351934368413, 0.33119267721970874, 0.24915542546659708, 0.012401290975200633], 'std': [0.0246632631447857, 0.3520860938584187, 0.03524459240387796, 0.360727083374811, 0.31209396368681236, 0.008284051185361898], 'label': ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']}


In [16]:
metric = BBMetric.load_metric("semantic similarity")

print(metric.compute(sentences_a=sentences_basic, sentences_b=sentences_basic_2))

{'score': 0.60903007, 'std': 0.40267226}


In [17]:
metric = BBMetric.load_metric("semantic answer similarity")

print(metric.compute(predictions=sentences_basic, references=sentences_basic_2))

{'score': 0.59648186, 'std': 0.4743335}


In [4]:
# Metric is not autonomous from project folder structure and datatypes! #
metric = BBMetric.load_metric("semantic classifier")

# n_shuffles is optional, defaults to 10
# from_saved_embeddings is optional, defaults to True
# shutdown_at_end is optional, defaults to False
metric.train(character='Barney', character_dict=character_dict, source_dict=source_dict, random_state=random_state,
             base_folder=base_folder, n_shuffles=10, from_saved_embeddings=True, shutdown_at_end=False)

# n_draws is optional, defaults to len(sentences)-2
print(metric.compute(character='Barney', character_dict=character_dict, base_folder=base_folder,
               sentences=sentences_basic))
print(metric.compute(character='Barney', character_dict=character_dict, base_folder=base_folder,
               sentences=sentences_vader))
print(metric.compute(character='Barney', character_dict=character_dict, base_folder=base_folder,
               sentences=sentences_barney))

Using classifier at D:\University\Esami da Superare\Natural Language Processing\BarneyBot\BarneyBot\Data\Characters\Barney\barney_classifier
{'score': 0.9999995, 'std': 0.0}


In [6]:
metric = BBMetric.load_metric("perplexity")

print(metric.compute(model=model_barney, encoded_test_set=barney_test_set))

  6%|█████                                                                              | 4/66 [01:09<17:51, 17.28s/it]


KeyboardInterrupt: 

In [None]:
metric = BBMetric.load_metric("human - coherence")

# length is optional, defaults to 5
metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Data", "Characters", "Default", "humancoherence.csv"),
             length=5)

metric.compute(filepath=os.path.join(os.getcwd(), "Data", "Characters", "Default", "humancoherence.csv"))

In [None]:
metric = BBMetric.load_metric("human - consistency")

metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Data", "Characters", "Default", "humanconsistency.csv"))

metric.compute(filepath=os.path.join(os.getcwd(), "Data", "Characters", "Default", "humanconsistency.csv"))

In [None]:
metric = BBMetric.load_metric("human - style")

metric.train(model=model, tokenizer=tokenizer,
             filepath=os.path.join(os.getcwd(), "Data", "Characters", "Default", "humanstyle.csv"),
             questions=barney_sentences)

metric.compute(filepath=os.path.join(os.getcwd(), "Data", "Characters", "Default", "humanstyle.csv"))