In [None]:
from fastai.text.all import *
from nbdev.showdoc import show_doc

# ULMFiT

## Finetune a pretrained Language Model

First we get our data and tokenize it.

In [None]:
path = untar_data(URLs.IMDB)

In [None]:
texts = get_files(path, extensions=['.txt'], folders=['unsup', 'train', 'test'])
len(texts)

100000

Then we put it in a `Datasets`. For a language model, we don't have targets, so there is only one transform to numericalize the texts. Note that `tokenize_df` returns the count of the words in the corpus to make it easy to create a vocabulary.

In [None]:
def read_file(f): return L(f.read_text().split(' '))

In [None]:
splits = RandomSplitter(valid_pct=0.1)(texts)
tfms = [Tokenizer.from_folder(path), Numericalize()]
dsets = Datasets(texts, [tfms], splits=splits, dl_type=LMDataLoader)

Then we use that `Datasets` to create a `DataLoaders`. Here the class of `TfmdDL` we need to use is `LMDataLoader` which will concatenate all the texts in a source (with a shuffle at each epoch for the training set), split it in `bs` chunks then read continuously through it.

In [None]:
bs,sl=256,80
dbunch_lm = dsets.dataloaders(bs=bs, seq_len=sl, val_bs=bs)

In [None]:
dbunch_lm.show_batch()

Unnamed: 0,text,text_
0,"▁xxbos ▁xxmaj ▁this ▁is ▁an ▁xxmaj ▁emperor ' s ▁xxmaj ▁new ▁xxmaj ▁clothes ▁situation . ▁xxmaj ▁someone ▁needs ▁to ▁say ▁"" that ' s ▁not ▁a ▁funny ▁and ▁original , ▁( etc . , ▁etc . ) ▁film ; ▁that ▁is ▁an ▁inferior ▁film . ▁xxmaj ▁don ' t ▁waste ▁your ▁money ▁on ▁it ."" ▁xxmaj ▁the ▁film ▁is ▁trashy , ▁and ▁the ▁people ▁in ▁it ▁are ▁embarrassingly ▁inferior ▁trailer ▁trash . ▁xxmaj ▁they ▁are ▁all - too - realistic","▁xxmaj ▁this ▁is ▁an ▁xxmaj ▁emperor ' s ▁xxmaj ▁new ▁xxmaj ▁clothes ▁situation . ▁xxmaj ▁someone ▁needs ▁to ▁say ▁"" that ' s ▁not ▁a ▁funny ▁and ▁original , ▁( etc . , ▁etc . ) ▁film ; ▁that ▁is ▁an ▁inferior ▁film . ▁xxmaj ▁don ' t ▁waste ▁your ▁money ▁on ▁it ."" ▁xxmaj ▁the ▁film ▁is ▁trashy , ▁and ▁the ▁people ▁in ▁it ▁are ▁embarrassingly ▁inferior ▁trailer ▁trash . ▁xxmaj ▁they ▁are ▁all - too - realistic ally"
1,"▁xxmaj ▁listener ▁is ▁without ▁doubt ▁one ▁of ▁the ▁dullest ▁films ▁i ▁have ▁ever ▁seen . ▁xxmaj ▁there ▁was ▁nothing ▁happening ▁in ▁this ▁film ▁what ▁so ▁ever ▁- ▁i ▁didn ' t ▁care ▁for ▁any ▁of ▁the ▁characters , ▁didn ' t ▁buy ▁in ▁to ▁the ▁whole ▁mystery ▁type ▁plot , ▁didn ' t ▁care ▁how ▁it ▁ended ▁.... nothing . ▁xxmaj ▁there ▁is ▁no ▁comedy , ▁no ▁action , ▁no ▁thrills , ▁no ▁suspense , ▁nothing . ▁xxmaj ▁the ▁highlights","▁listener ▁is ▁without ▁doubt ▁one ▁of ▁the ▁dullest ▁films ▁i ▁have ▁ever ▁seen . ▁xxmaj ▁there ▁was ▁nothing ▁happening ▁in ▁this ▁film ▁what ▁so ▁ever ▁- ▁i ▁didn ' t ▁care ▁for ▁any ▁of ▁the ▁characters , ▁didn ' t ▁buy ▁in ▁to ▁the ▁whole ▁mystery ▁type ▁plot , ▁didn ' t ▁care ▁how ▁it ▁ended ▁.... nothing . ▁xxmaj ▁there ▁is ▁no ▁comedy , ▁no ▁action , ▁no ▁thrills , ▁no ▁suspense , ▁nothing . ▁xxmaj ▁the ▁highlights ▁include"
2,"▁mutated ▁humans ▁who ▁have ▁been ▁outcast ▁by ▁society . ▁xxmaj ▁eventually , ▁they ▁receive ▁a ▁special ▁en vo y ▁from ▁xxmaj ▁earth ▁with ▁an ▁unexpected ▁message . ▁xxmaj ▁the ▁basic ▁problem ▁is ▁that ▁this ▁whole ▁movie ▁could ▁have ▁been ▁summarized ▁into ▁a ▁sentence ▁and ▁making ▁a ▁1 ▁hour ▁movie ▁out ▁of ▁it ▁added ▁nothing . ▁xxmaj ▁what ▁you ▁essentially ▁get ▁is ▁some ▁effectively ▁gross - looking ▁characters ▁with ▁dialog ▁that ▁is ▁so ▁boring ▁you ▁want ▁to ▁blow ▁their ▁ship ▁up","▁humans ▁who ▁have ▁been ▁outcast ▁by ▁society . ▁xxmaj ▁eventually , ▁they ▁receive ▁a ▁special ▁en vo y ▁from ▁xxmaj ▁earth ▁with ▁an ▁unexpected ▁message . ▁xxmaj ▁the ▁basic ▁problem ▁is ▁that ▁this ▁whole ▁movie ▁could ▁have ▁been ▁summarized ▁into ▁a ▁sentence ▁and ▁making ▁a ▁1 ▁hour ▁movie ▁out ▁of ▁it ▁added ▁nothing . ▁xxmaj ▁what ▁you ▁essentially ▁get ▁is ▁some ▁effectively ▁gross - looking ▁characters ▁with ▁dialog ▁that ▁is ▁so ▁boring ▁you ▁want ▁to ▁blow ▁their ▁ship ▁up ▁every"
3,"▁soul ▁of ▁man ▁can ▁be ▁distorted ▁in ▁such ▁a ▁way ▁that , ▁pain ▁and ▁suffering ▁being ▁brought ▁to ▁bear ▁on ▁a ▁fellow ▁human ▁being ▁is ▁in ▁some ▁way ▁satisfying . ▁xxmaj ▁be ▁it ▁mental ▁or ▁physical . ▁i ▁found ▁the ▁film ▁very ▁thought ▁provoking . ▁xxbos ▁i ▁have ▁been ▁hooked ▁on ▁"" gg "" ▁since ▁midway ▁through ▁2001 - 2002 ▁(2 nd ▁season ), ▁when ▁i ▁tuned ▁in ▁to ▁see ▁"" small ville "" ▁10 ▁minutes ▁early . ▁xxmaj ▁thanks","▁of ▁man ▁can ▁be ▁distorted ▁in ▁such ▁a ▁way ▁that , ▁pain ▁and ▁suffering ▁being ▁brought ▁to ▁bear ▁on ▁a ▁fellow ▁human ▁being ▁is ▁in ▁some ▁way ▁satisfying . ▁xxmaj ▁be ▁it ▁mental ▁or ▁physical . ▁i ▁found ▁the ▁film ▁very ▁thought ▁provoking . ▁xxbos ▁i ▁have ▁been ▁hooked ▁on ▁"" gg "" ▁since ▁midway ▁through ▁2001 - 2002 ▁(2 nd ▁season ), ▁when ▁i ▁tuned ▁in ▁to ▁see ▁"" small ville "" ▁10 ▁minutes ▁early . ▁xxmaj ▁thanks ▁to"
4,"▁came ▁to ▁the ▁cinemas , ▁and ▁brought ▁my ▁entire ▁family ▁along . ▁i ▁had ▁already ▁seen ▁xxmaj ▁jim ▁xxmaj ▁carrey ▁in ▁the ▁xxmaj ▁mask , ▁and ▁xxmaj ▁ace ▁xxmaj ▁ventura , ▁and ▁loved ▁him ▁in ▁those ▁films . ▁xxmaj ▁the ▁review ▁of ▁the ▁film ▁was ▁quite ▁good , ▁so ▁i ▁looked ▁forward ▁to ▁this . ▁xxmaj ▁my ▁father ▁wondered ▁if ▁it ▁really ▁was ▁a ▁movie ▁in ▁his ▁taste ▁... ▁and ▁then ▁the ▁movie ▁started . ▁i ▁have ▁never ▁in ▁my","▁to ▁the ▁cinemas , ▁and ▁brought ▁my ▁entire ▁family ▁along . ▁i ▁had ▁already ▁seen ▁xxmaj ▁jim ▁xxmaj ▁carrey ▁in ▁the ▁xxmaj ▁mask , ▁and ▁xxmaj ▁ace ▁xxmaj ▁ventura , ▁and ▁loved ▁him ▁in ▁those ▁films . ▁xxmaj ▁the ▁review ▁of ▁the ▁film ▁was ▁quite ▁good , ▁so ▁i ▁looked ▁forward ▁to ▁this . ▁xxmaj ▁my ▁father ▁wondered ▁if ▁it ▁really ▁was ▁a ▁movie ▁in ▁his ▁taste ▁... ▁and ▁then ▁the ▁movie ▁started . ▁i ▁have ▁never ▁in ▁my ▁movie"
5,"▁or ▁xxmaj ▁ladd ▁and ▁xxmaj ▁lake ) to ▁soften ▁the ▁violent ▁elements . ▁"" kiss ▁xxmaj ▁tomorrow ▁xxmaj ▁goodbye "" ▁( a ▁gem ▁of ▁a ▁noir ▁title ▁if ▁there ▁ever ▁was ▁one , ▁be speaking ▁a ▁bleak , ▁fatalistic ▁vision ) ▁contains ▁no ▁romantic ▁subplot , ▁unless ▁you ▁count ▁the ▁xxmaj ▁cagney ▁character ' s ▁involvement ▁with ▁the ▁rich ▁woman , ▁which ▁is ▁more ▁about ▁greed ▁and ▁rebellion ▁than ▁love . ▁xxmaj ▁this ▁film ▁is ▁a ▁great ▁of ▁example ▁of","▁xxmaj ▁ladd ▁and ▁xxmaj ▁lake ) to ▁soften ▁the ▁violent ▁elements . ▁"" kiss ▁xxmaj ▁tomorrow ▁xxmaj ▁goodbye "" ▁( a ▁gem ▁of ▁a ▁noir ▁title ▁if ▁there ▁ever ▁was ▁one , ▁be speaking ▁a ▁bleak , ▁fatalistic ▁vision ) ▁contains ▁no ▁romantic ▁subplot , ▁unless ▁you ▁count ▁the ▁xxmaj ▁cagney ▁character ' s ▁involvement ▁with ▁the ▁rich ▁woman , ▁which ▁is ▁more ▁about ▁greed ▁and ▁rebellion ▁than ▁love . ▁xxmaj ▁this ▁film ▁is ▁a ▁great ▁of ▁example ▁of ▁pure"
6,"▁xxmaj ▁swann ▁( o ' toole ) ▁out ▁of ▁trouble ▁ . ▁xxmaj ▁by ▁no ▁means ▁an ▁easy ▁task . ▁xxmaj ▁the ▁things ▁they ▁do ▁together ▁are ▁quite ▁out ▁of ▁the ▁ordinary . ▁xxmaj ▁though ▁he ▁is ▁in ▁charge ▁of ▁keeping ▁xxmaj ▁swann ▁out ▁of ▁trouble , ▁he ▁really ▁gets ▁to ▁enjoy ▁his ▁job . ▁xxmaj ▁this ▁film ▁also ▁show ▁the ▁side ▁of ▁the ▁entertainment ▁history ▁we ▁don ' t ▁see , ▁xxmaj ▁swann ▁( o ' toole ▁ )","▁swann ▁( o ' toole ) ▁out ▁of ▁trouble ▁ . ▁xxmaj ▁by ▁no ▁means ▁an ▁easy ▁task . ▁xxmaj ▁the ▁things ▁they ▁do ▁together ▁are ▁quite ▁out ▁of ▁the ▁ordinary . ▁xxmaj ▁though ▁he ▁is ▁in ▁charge ▁of ▁keeping ▁xxmaj ▁swann ▁out ▁of ▁trouble , ▁he ▁really ▁gets ▁to ▁enjoy ▁his ▁job . ▁xxmaj ▁this ▁film ▁also ▁show ▁the ▁side ▁of ▁the ▁entertainment ▁history ▁we ▁don ' t ▁see , ▁xxmaj ▁swann ▁( o ' toole ▁ ) ▁confesses"
7,", ▁it ▁just ▁keeps ▁plodding ▁on . ▁xxmaj ▁christopher ▁xxmaj ▁walken ▁has ▁a ▁part , ▁but ▁it ▁is ▁completely ▁senseless , ▁as ▁is ▁most ▁of ▁the ▁movie . ▁xxmaj ▁this ▁movie ▁had ▁potential , ▁but ▁it ▁looks ▁like ▁some ▁really ▁bad ▁made ▁for ▁xxup ▁tv ▁movie . ▁i ▁would ▁avoid ▁this ▁movie . ▁xxbos ▁"" men ace "" ▁is ▁not ▁funny . ▁xxmaj ▁it ▁tries ▁hard ▁- ▁too ▁hard . ▁but ▁rarely ▁brings ▁a ▁smile . ▁xxmaj ▁there ▁is ▁no","▁it ▁just ▁keeps ▁plodding ▁on . ▁xxmaj ▁christopher ▁xxmaj ▁walken ▁has ▁a ▁part , ▁but ▁it ▁is ▁completely ▁senseless , ▁as ▁is ▁most ▁of ▁the ▁movie . ▁xxmaj ▁this ▁movie ▁had ▁potential , ▁but ▁it ▁looks ▁like ▁some ▁really ▁bad ▁made ▁for ▁xxup ▁tv ▁movie . ▁i ▁would ▁avoid ▁this ▁movie . ▁xxbos ▁"" men ace "" ▁is ▁not ▁funny . ▁xxmaj ▁it ▁tries ▁hard ▁- ▁too ▁hard . ▁but ▁rarely ▁brings ▁a ▁smile . ▁xxmaj ▁there ▁is ▁no ▁acting"
8,"▁pink ▁xxmaj ▁panther . ▁xxmaj ▁this ▁was ▁an ▁xxup ▁rko ▁movie ▁but ▁it ▁did ▁not ▁have ▁the ▁nice ▁airplane ▁logo ▁that ▁xxup ▁rko ▁used ▁to ▁use . ▁i ▁liked ▁xxmaj ▁victor ▁xxmaj ▁mature ▁in ▁xxmaj ▁one ▁xxmaj ▁million , ▁xxup ▁b . c . , ▁and ▁xxmaj ▁sam p son ▁and ▁xxmaj ▁delilah ▁and ▁especially ▁in ▁xxmaj ▁violent ▁xxmaj ▁saturday . ▁xxmaj ▁see ▁if ▁you ▁can ▁find ▁that ▁one . ▁xxmaj ▁he ▁was ▁wonderful ▁in ▁the ▁comedy ▁with ▁xxmaj ▁peter","▁xxmaj ▁panther . ▁xxmaj ▁this ▁was ▁an ▁xxup ▁rko ▁movie ▁but ▁it ▁did ▁not ▁have ▁the ▁nice ▁airplane ▁logo ▁that ▁xxup ▁rko ▁used ▁to ▁use . ▁i ▁liked ▁xxmaj ▁victor ▁xxmaj ▁mature ▁in ▁xxmaj ▁one ▁xxmaj ▁million , ▁xxup ▁b . c . , ▁and ▁xxmaj ▁sam p son ▁and ▁xxmaj ▁delilah ▁and ▁especially ▁in ▁xxmaj ▁violent ▁xxmaj ▁saturday . ▁xxmaj ▁see ▁if ▁you ▁can ▁find ▁that ▁one . ▁xxmaj ▁he ▁was ▁wonderful ▁in ▁the ▁comedy ▁with ▁xxmaj ▁peter ▁xxmaj"


Then we have a convenience method to directly grab a `Learner` from it, using the `AWD_LSTM` architecture.

In [None]:
opt_func = partial(Adam, wd=0.1)
learn = language_model_learner(dbunch_lm, AWD_LSTM, opt_func=opt_func, metrics=[accuracy, Perplexity()], path=path)
learn = learn.to_fp16()

In [None]:
learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7,0.8))

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,4.426135,3.984901,0.292371,53.779987,07:00


In [None]:
learn.save('stage1')

In [None]:
learn.load('stage1');

In [None]:
learn.unfreeze()
learn.fit_one_cycle(10, 2e-3, moms=(0.8,0.7,0.8))

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,4.163227,3.870354,0.30684,47.959347,07:24
1,4.055693,3.790802,0.316436,44.291908,07:41
2,3.979279,3.729021,0.323357,41.638317,07:22
3,3.919654,3.688891,0.32777,40.000469,07:22
4,3.889432,3.660633,0.330762,38.885933,07:22
5,3.842923,3.637397,0.333315,37.992798,07:26
6,3.813823,3.619074,0.335308,37.303013,07:25
7,3.793213,3.60801,0.336566,36.892574,07:20
8,3.766456,3.60214,0.337257,36.676647,07:22
9,3.759768,3.600955,0.33745,36.633202,07:23


Once we have fine-tuned the pretrained language model to this corpus, we save the encoder since we will use it for the classifier.

In [None]:
learn.save_encoder('finetuned1')

## Use it to train a classifier

In [None]:
texts = get_files(path, extensions=['.txt'], folders=['train', 'test'])

In [None]:
splits = GrandparentSplitter(valid_name='test')(texts)

For classification, we need to use two set of transforms: one to numericalize the texts and the other to encode the labels as categories.

In [None]:
x_tfms = [Tokenizer.from_folder(path), Numericalize(vocab=dbunch_lm.vocab)]
dsets = Datasets(texts, [x_tfms, [parent_label, Categorize()]], splits=splits, dl_type=SortedDL)

In [None]:
bs = 64

In [None]:
dls = dsets.dataloaders(before_batch=pad_input_chunk, bs=bs)

In [None]:
dls.show_batch(max_n=2)

Unnamed: 0,text,category
0,"xxbos * * attention xxmaj spoilers * * \n\n xxmaj first of all , let me say that xxmaj rob xxmaj roy is one of the best films of the 90 's . xxmaj it was an amazing achievement for all those involved , especially the acting of xxmaj liam xxmaj neeson , xxmaj jessica xxmaj lange , xxmaj john xxmaj hurt , xxmaj brian xxmaj cox , and xxmaj tim xxmaj roth . xxmaj michael xxmaj canton xxmaj jones painted a wonderful portrait of the honor and dishonor that men can represent in themselves . xxmaj but alas … \n\n it constantly , and unfairly gets compared to "" braveheart "" . xxmaj these are two entirely different films , probably only similar in the fact that they are both about xxmaj scots in historical xxmaj scotland . xxmaj yet , this comparison frequently bothers me because it seems",pos
1,"xxbos xxmaj by now you 've probably heard a bit about the new xxmaj disney dub of xxmaj miyazaki 's classic film , xxmaj laputa : xxmaj castle xxmaj in xxmaj the xxmaj sky . xxmaj during late summer of 1998 , xxmaj disney released "" kiki 's xxmaj delivery xxmaj service "" on video which included a preview of the xxmaj laputa dub saying it was due out in "" 1 xxrep 3 9 "" . xxmaj it 's obviously way past that year now , but the dub has been finally completed . xxmaj and it 's not "" laputa : xxmaj castle xxmaj in xxmaj the xxmaj sky "" , just "" castle xxmaj in xxmaj the xxmaj sky "" for the dub , since xxmaj laputa is not such a nice word in xxmaj spanish ( even though they use the word xxmaj laputa many times",pos


Then we once again have a convenience function to create a classifier from this `DataLoaders` with the `AWD_LSTM` architecture.

In [None]:
opt_func = partial(Adam, wd=0.1)
learn = text_classifier_learner(dls, AWD_LSTM, metrics=[accuracy], path=path, drop_mult=0.5, opt_func=opt_func)

We load our pretrained encoder.

In [None]:
learn = learn.load_encoder('finetuned1')
learn = learn.to_fp16(clip=0.1)

Then we can train with gradual unfreezing and differential learning rates.

In [None]:
lr = 1e-1 * bs/128

In [None]:
learn.fit_one_cycle(1, lr, moms=(0.8,0.7,0.8), wd=0.1)

epoch,train_loss,valid_loss,accuracy,time
0,0.328318,0.20065,0.92212,01:08


In [None]:
learn.freeze_to(-2)
lr /= 2
learn.fit_one_cycle(1, slice(lr/(2.6**4),lr), moms=(0.8,0.7,0.8), wd=0.1)

epoch,train_loss,valid_loss,accuracy,time
0,0.20812,0.166004,0.93744,01:15


In [None]:
learn.freeze_to(-3)
lr /= 2
learn.fit_one_cycle(1, slice(lr/(2.6**4),lr), moms=(0.8,0.7,0.8), wd=0.1)

epoch,train_loss,valid_loss,accuracy,time
0,0.162498,0.154959,0.9424,01:35


In [None]:
learn.unfreeze()
lr /= 5
learn.fit_one_cycle(2, slice(lr/(2.6**4),lr), moms=(0.8,0.7,0.8), wd=0.1)

epoch,train_loss,valid_loss,accuracy,time
0,0.1338,0.163456,0.94056,01:34
1,0.095326,0.154301,0.94512,01:34
