### Fine Tuning GPT-2 Model

In [231]:
import pandas as pd
import math
from gpt2_utils import Dset 
from gpt2_utils import get_model_tokenizer, train_model, generate_texts, compute_perplexity, load_model

Set notebook variables

In [232]:
# constants 
MAX_SEQ_LEN = 10
DEVICE = 'cpu'
VERBOSE = True

GENRE = 'metal'

# Name of this trained model, will be used for filename when saving the model
MODEL_INSTANCE_NAME = 'test'

Read in train, vallidation, and test data

In [233]:
# read in cleaned data
if GENRE == 'country':
    train_lines = pd.read_csv('data/country_train.csv', header=None).values.tolist()
    val_lines = pd.read_csv('data/country_val.csv', header=None).values.tolist()
    test_lines = pd.read_csv('data/country_test.csv', header=None).values.tolist()

elif GENRE == 'metal':
    train_lines = pd.read_csv('data/metal_train.csv', header=None).values.tolist()
    val_lines = pd.read_csv('data/metal_val.csv', header=None).values.tolist()
    test_lines = pd.read_csv('data/metal_test.csv', header=None).values.tolist()

else:
    raise ValueError('Incorrect genre given.')

In [234]:
print('train lines :', len(train_lines))
print('val lines : ', len(val_lines))
print('test lines : ', len(test_lines))

train lines : 149771
val lines :  18610
test lines :  19108


In [235]:
train_end = math.ceil(len(train_lines)/10)
train_lines = train_lines[0:train_end]

val_end = math.ceil(len(val_lines)/10)
val_lines = val_lines[0:val_end]

Fine Tuning GPT-2 Model

In [236]:
# get model and tokenizer
model, tokenizer = get_model_tokenizer(MAX_SEQ_LEN)

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at ashiqabdulkhader/GPT2-Poet.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
All TF 2.0 model weights were used when initializing GPT2LMHeadModel.

Some weights of GPT2LMHeadModel were not initialized from the TF 2.0 model and are newly initialized: ['lm_head.weight', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [237]:
# encode data
train_encodings = [tokenizer(text=x, return_tensors='pt', padding='max_length', max_length=MAX_SEQ_LEN, truncation=True) for x in train_lines]
train_encodings = [enc['input_ids'].tolist()[0] for enc in train_encodings]

val_encodings = [tokenizer(text=x, return_tensors='pt', padding='max_length', max_length=MAX_SEQ_LEN, truncation=True) for x in val_lines]
val_encodings = [enc['input_ids'].tolist()[0] for enc in val_encodings]

test_encodings = [tokenizer(text=x, return_tensors='pt', padding='max_length', max_length=MAX_SEQ_LEN, truncation=True) for x in test_lines]
test_encodings = [enc['input_ids'].tolist()[0] for enc in test_encodings]

In [238]:
# create training, valdation, and testing datasets
dset_train = Dset(train_encodings)
dset_val = Dset(val_encodings)
dset_test = Dset(test_encodings)

In [239]:
# fine tune the model
model = train_model(model, dset_train, dset_val, GENRE, MODEL_INSTANCE_NAME, batches=200, epochs=1, lr=0.0001)

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

{'eval_loss': 4.284275531768799, 'eval_runtime': 20.2864, 'eval_samples_per_second': 91.736, 'eval_steps_per_second': 0.493, 'epoch': 1.0}
{'train_runtime': 521.1179, 'train_samples_per_second': 28.742, 'train_steps_per_second': 0.144, 'train_loss': 3.492087809244792, 'epoch': 1.0}


In [241]:
tokenizer_file_path = "gpt2_tokenizers/"+ GENRE.lower()+'/'+MODEL_INSTANCE_NAME+"/"
tokenizer.save_pretrained(tokenizer_file_path)

('gpt2_tokenizers/metal/test/tokenizer_config.json',
 'gpt2_tokenizers/metal/test/special_tokens_map.json',
 'gpt2_tokenizers/metal/test/vocab.json',
 'gpt2_tokenizers/metal/test/merges.txt',
 'gpt2_tokenizers/metal/test/added_tokens.json')

In [264]:
# generate lyrics
gen_texts = generate_texts(model, tokenizer, 15)
for text in gen_texts:
    print(''.join(text))

solo:
took me, he cried
s on and when the dead begins by

takes - we're real running cold
 now and then
takes and fro the twisted stones
 your hate
 now, is to be dead again
and
 now
in' yeah, yeah!
till to see that
 ooh...
till all the guardians fade away


In [260]:
import transformers

loaded_model = load_model("gpt2_trained_models/metal/gpt2_final_model")


All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [270]:
gen_texts = generate_texts(loaded_model, tokenizer, 15)
for text in gen_texts:
    print(''.join(text))

.
 (while highway kings who seem to join)
 after was over in his way of light.
o' twinkle. wave your beak
.
.
pop-pearl chair,when company goes
perhaps i could never leave
 (ooh)
.com. harper has missed the doors
˜ king roll. bones of gold was ch
.''—i was dressed like a bird
.
.
™s where you donâ€™t


In [271]:
#other_loaded_model = load_model("gpt2_trained_models/metal/test")
gen_texts = generate_texts(other_loaded_model, tokenizer, 15)
for text in gen_texts:
    print(''.join(text))

till this is a kingdom in the air
 they make me beautiful
 us for love - yeah - yeah
 us for more

 v Volume v Landtakes a rumb
 now and now
 of a world
s down


s and burns again
ing out to the battle of pain
takes me,



Compute Perplexity

In [254]:
# compute perplexity of generated lyrics
import numpy as np
test_lines_flt = np.array(test_lines).flatten().tolist()
ppl = compute_perplexity(model, tokenizer, test_lines_flt, MAX_SEQ_LEN, DEVICE)
ppl

Token indices sequence length is longer than the specified maximum sequence length for this model (177610 > 1024). Running this sequence through the model will result in indexing errors
  0%|          | 0/1 [00:00<?, ?it/s]


94.27391815185547