In [32]:
# !pip install fastai
# !pip install transformers

In [33]:
from fastai.text.all import *
from transformers import GPT2LMHeadModel, GPT2TokenizerFast

In [34]:
pretrained_weights = 'gpt2'
tokenizer = GPT2TokenizerFast.from_pretrained(pretrained_weights)
model = GPT2LMHeadModel.from_pretrained(pretrained_weights)

In [35]:
path = './poems'
# from google.colab import drive
# drive.mount('/content/drive')


In [36]:
# path="drive/MyDrive/poem/poems"
# !ls drive/MyDrive/poem/poems

In [37]:
blues = get_text_files(path, folders = ['blues-poem'])
print("There are",len(blues),"blues in the dataset")

There are 17 blues in the dataset


In [38]:
txt = blues[0].open().read(); #read the first file
print(txt)

Seems all my days of late
Are spent in doubt
Regrettinâ€™ what I done
Wonderinâ€™ how to get out
Yâ€™see a while ago
I made a big mistake
Now Iâ€™m wishing itâ€™s a dream
Please Lord, let me wake
This awful thing I done
Was while my beau was gone
I started to forget him
And had myself some fun
I met this hot distraction
Things started gettingâ€™ hot
Before I cleared my head
This man has got me caught
So I made my bed with him
He rubbed me right and how
Until my beau got home
What I gonna tell him now?
I told my beau the truth
He dumped me and fast
So I stayed with the hottie
But the love didnâ€™t last
I was missing my beau
The worse things got
I tried to get him back
But my beau he told me off
Seems all my days of late
Are spent in doubt
Regrettinâ€™ what I done
Wonderinâ€™ how to get out.


## Prepare the data

In [39]:
blues = L(o.open().read() for o in blues) # to make things easy we will gather all texts in one numpy array
blues


(#17) ['Seems all my days of late\nAre spent in doubt\nRegrettinâ€™ what I done\nWonderinâ€™ how to get out\nYâ€™see a while ago\nI made a big mistake\nNow Iâ€™m wishing itâ€™s a dream\nPlease Lord, let me wake\nThis awful thing I done\nWas while my beau was gone\nI started to forget him\nAnd had myself some fun\nI met this hot distraction\nThings started gettingâ€™ hot\nBefore I cleared my head\nThis man has got me caught\nSo I made my bed with him\nHe rubbed me right and how\nUntil my beau got home\nWhat I gonna tell him now?\nI told my beau the truth\nHe dumped me and fast\nSo I stayed with the hottie\nBut the love didnâ€™t last\nI was missing my beau\nThe worse things got\nI tried to get him back\nBut my beau he told me off\nSeems all my days of late\nAre spent in doubt\nRegrettinâ€™ what I done\nWonderinâ€™ how to get out.','Living hard,\nthe writing comes easy\nLiving easy,\nthe writing comes hard\n(Villanova Pennsylvania: June,2017)','The lad tread the path,\nThe lad stuck his f

In [40]:
def flatten(A):
    rt = []
    for i in A:
        if isinstance(i,list): rt.extend(flatten(i))
        else: rt.append(i)
    return rt
  
all_blues = flatten(blues)
len(all_blues)

17

In [41]:
class TransformersTokenizer(Transform):
    def __init__(self, tokenizer): self.tokenizer = tokenizer
    def encodes(self, x): 
        toks = self.tokenizer.tokenize(x)
        return tensor(self.tokenizer.convert_tokens_to_ids(toks))
    def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))

In [42]:
splits = [range_of(5), range(17)] # use a 70/30 split

tls = TfmdLists(all_blues, TransformersTokenizer(tokenizer), splits=splits, dl_type=LMDataLoader)

In [43]:
show_at(tls.train, 0)

Seems all my days of late
Are spent in doubt
Regrettinâ€™ what I done
Wonderinâ€™ how to get out
Yâ€™see a while ago
I made a big mistake
Now Iâ€™m wishing itâ€™s a dream
Please Lord, let me wake
This awful thing I done
Was while my beau was gone
I started to forget him
And had myself some fun
I met this hot distraction
Things started gettingâ€™ hot
Before I cleared my head
This man has got me caught
So I made my bed with him
He rubbed me right and how
Until my beau got home
What I gonna tell him now?
I told my beau the truth
He dumped me and fast
So I stayed with the hottie
But the love didnâ€™t last
I was missing my beau
The worse things got
I tried to get him back
But my beau he told me off
Seems all my days of late
Are spent in doubt
Regrettinâ€™ what I done
Wonderinâ€™ how to get out.


In [44]:
bs,sl = 4,256
dls = tls.dataloaders(bs=bs, seq_len=sl)

In [45]:
dls.show_batch(max_n=2)

Unnamed: 0,text,text_
0,"He took the path of death,\nI took the path of my birth.\nThen, we met at the last exit:\nThough we struggled, yet none could exit.\n18: 03: 27: 16: 23Seems all my days of late\nAre spent in doubt\nRegrettinâ€™ what I done\nWonderinâ€™ how to get out\nYâ€™see a while ago\nI made a big mistake\nNow Iâ€™m wishing itâ€™s a dream\nPlease Lord, let me wake\nThis awful thing I done\nWas while my beau was gone\nI started to forget him\nAnd had myself some fun\nI met","took the path of death,\nI took the path of my birth.\nThen, we met at the last exit:\nThough we struggled, yet none could exit.\n18: 03: 27: 16: 23Seems all my days of late\nAre spent in doubt\nRegrettinâ€™ what I done\nWonderinâ€™ how to get out\nYâ€™see a while ago\nI made a big mistake\nNow Iâ€™m wishing itâ€™s a dream\nPlease Lord, let me wake\nThis awful thing I done\nWas while my beau was gone\nI started to forget him\nAnd had myself some fun\nI met this"
1,this hot distraction\nThings started gettingâ€™ hot\nBefore I cleared my head\nThis man has got me caught\nSo I made my bed with him\nHe rubbed me right and how\nUntil my beau got home\nWhat I gonna tell him now?\nI told my beau the truth\nHe dumped me and fast\nSo I stayed with the hottie\nBut the love didnâ€™t last\nI was missing my beau\nThe worse things got\nI tried to get him back\nBut my beau he told me off\nSeems all my days of late\nAre spent in doubt\nRegrettinâ€™ what I done\nWonderinâ€™,hot distraction\nThings started gettingâ€™ hot\nBefore I cleared my head\nThis man has got me caught\nSo I made my bed with him\nHe rubbed me right and how\nUntil my beau got home\nWhat I gonna tell him now?\nI told my beau the truth\nHe dumped me and fast\nSo I stayed with the hottie\nBut the love didnâ€™t last\nI was missing my beau\nThe worse things got\nI tried to get him back\nBut my beau he told me off\nSeems all my days of late\nAre spent in doubt\nRegrettinâ€™ what I done\nWonderinâ€™ how


## Fine-tuning the model

In [46]:
class DropOutput(Callback):
    def after_pred(self): self.learn.pred = self.pred[0]

In [47]:
learn = Learner(dls, model, loss_func=CrossEntropyLossFlat(), cbs=[DropOutput], metrics=Perplexity()).to_fp16()

In [48]:
learn.validate()



(#2) [3.9277729988098145,50.79373550415039]

In [49]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(1, 1e-4)

## Poem Generation

In [None]:
prompt = 'love is ridiculous' # create an initial text prompt to start your generated text
prompt_ids = tokenizer.encode(prompt)
inp = tensor(prompt_ids)[None]

In [None]:
preds = learn.model.generate(inp, max_length=60, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(preds[0].cpu().numpy(), skip_special_tokens=True))

In [None]:
prompt = "heartbreak"
prompt_ids = tokenizer.encode(prompt)
inp = tensor(prompt_ids)[None]
preds = learn.model.generate(inp, max_length=60, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(preds[0].cpu().numpy(), skip_special_tokens=True))

In [None]:
import pickle

In [None]:
filename = "model.sav"
pickle.dump(learn.model, open(filename, "wb"))

In [None]:
filename = "model.sav"
model_saved = pickle.load(open(filename, "rb"))
# model_saved

# use the saved model

In [None]:
from fastai.text.all import *
from transformers import GPT2LMHeadModel, GPT2TokenizerFast

In [None]:
pretrained_weights = 'gpt2'
tokenizer = GPT2TokenizerFast.from_pretrained(pretrained_weights)
model = GPT2LMHeadModel.from_pretrained(pretrained_weights)

In [None]:
path = './poems'

In [None]:
blues = get_text_files(path, folders = ['ballad'])
print("There are",len(blues),"blues in the dataset")

In [None]:
blues = L(o.open().read() for o in blues[:50])

In [None]:
def flatten(A):
    rt = []
    for i in A:
        if isinstance(i,list): rt.extend(flatten(i))
        else: rt.append(i)
    return rt
  
all_blues = flatten(blues)
len(all_blues)

In [None]:
class TransformersTokenizer(Transform):
    def __init__(self, tokenizer): self.tokenizer = tokenizer
    def encodes(self, x): 
        toks = self.tokenizer.tokenize(x)
        return tensor(self.tokenizer.convert_tokens_to_ids(toks))
    def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))

In [None]:
prompt = ""
prompt_ids = tokenizer.encode(prompt)
inp = tensor(prompt_ids)[None]
preds = model_saved.generate(inp, max_length=60, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(preds[0].cpu().numpy(), skip_special_tokens=True))