In [None]:
from fastai.text.all import *

# Classification - using Pre-Trained Language Model

## Load Data

In [None]:
base= Path('/home/harpreet/Insync/google_drive_shaannoor/datasets')

In [None]:
URLs.IMDB

'https://s3.amazonaws.com/fast-ai-nlp/imdb.tgz'

In [None]:
path = untar_data(URLs.IMDB, archive = base/'archive', data = base/'data'); path

Path('/home/harpreet/Insync/google_drive_shaannoor/datasets/data/imdb')

# Transfer Learning - Pretrained language model for Classification task

In [None]:
dls = TextDataLoaders.from_folder(path, valid='test')
learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn.fine_tune(4, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.466856,0.390369,0.82604,02:18


epoch,train_loss,valid_loss,accuracy,time
0,0.310698,0.269283,0.8834,04:48
1,0.242144,0.204152,0.92148,04:49
2,0.180471,0.185548,0.93132,04:50
3,0.144198,0.190639,0.93116,04:49


In [None]:
learn.predict("I really liked that movie!")

('pos', TensorText(1), TensorText([3.5822e-04, 9.9964e-01]))

## Domain Adaptation and Transfer Learning 
- Step1: Fine Tune a language model using data from your domain (Domain adaptation)
- Step2: Use Fine-tuned Language Model for Classiifcation Task (Transfer Learning)

### Fine-Tuning the Language Model

In [None]:
get_imdb = partial(get_text_files, folders=['train', 'test', 'unsup'])

In [None]:
dls_lm = DataBlock(
    blocks=TextBlock.from_folder(path, is_lm=True),
    get_items=get_imdb, splitter=RandomSplitter(0.1)
).dataloaders(path, path, bs=128, seq_len=80)

In [None]:
dls_lm.show_batch(max_n=2)

Unnamed: 0,text,text_
0,"xxbos xxmaj detective xxmaj barney xxmaj caine ( george xxup c. xxmaj scott ) gets involved in an increasingly twist - laden plot that begins when an old friend of his is found murdered . xxmaj as it progresses , he travels to xxmaj germany , speaks to a variety of characters , and hooks up with a grieving young woman ( marthe xxmaj keller ) . xxmaj the story actually began back at the end of xxmaj world xxmaj","xxmaj detective xxmaj barney xxmaj caine ( george xxup c. xxmaj scott ) gets involved in an increasingly twist - laden plot that begins when an old friend of his is found murdered . xxmaj as it progresses , he travels to xxmaj germany , speaks to a variety of characters , and hooks up with a grieving young woman ( marthe xxmaj keller ) . xxmaj the story actually began back at the end of xxmaj world xxmaj war"
1,"of the movie i had decided that this was a good thing , when seven i though the ' relationship ' guff was a bad thing . \n\n xxmaj for an action film there is also plenty of gratifying gory action , especially a couple of epic battle scenes between the platoon and hordes of xxmaj shogun era warriors . xxmaj the makers of the movie have ensured that as many deaths as possible are bloody and , lets face","the movie i had decided that this was a good thing , when seven i though the ' relationship ' guff was a bad thing . \n\n xxmaj for an action film there is also plenty of gratifying gory action , especially a couple of epic battle scenes between the platoon and hordes of xxmaj shogun era warriors . xxmaj the makers of the movie have ensured that as many deaths as possible are bloody and , lets face it"


In [None]:
learn = language_model_learner(
    dls_lm, AWD_LSTM, drop_mult=0.3, 
    metrics=[accuracy, Perplexity()]).to_fp16()

In [None]:
learn.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,3.986422,3.899642,0.300608,49.384743,28:10


#### Saving and Loading Models

In [None]:
learn.path

Path('/home/harpreet/Insync/google_drive_shaannoor/datasets/data/imdb')

In [None]:
learn.save('1epoch')

Path('/home/harpreet/Insync/google_drive_shaannoor/datasets/data/imdb/models/1epoch.pth')

In [None]:
learn = learn.load('1epoch')

In [None]:
learn.unfreeze()
learn.fit_one_cycle(10, 2e-3)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,3.775334,3.679386,0.32167,39.622074,31:17
1,3.70265,3.64358,0.326642,38.228447,32:24
2,3.638296,3.606509,0.331719,36.837238,31:53
3,3.570001,3.580097,0.334851,35.87701,32:04
4,3.505574,3.558936,0.337656,35.125797,31:18
5,3.435345,3.545428,0.339654,34.654507,30:46
6,3.368109,3.534763,0.341558,34.286892,31:03
7,3.294571,3.53224,0.342481,34.200504,31:09
8,3.23767,3.534461,0.342661,34.27655,31:06
9,3.229012,3.538148,0.342501,34.403133,31:18


In [None]:
learn.save_encoder('finetuned')

### Transfer Learning using Fine-Tuned Language Model

In [None]:
dls_clas = DataBlock(
    blocks=(TextBlock.from_folder(path, vocab=dls_lm.vocab),CategoryBlock),
    get_y = parent_label,
    get_items=partial(get_text_files, folders=['train', 'test']),
    splitter=GrandparentSplitter(valid_name='test')
).dataloaders(path, path=path, bs=128, seq_len=72)

In [None]:
dls_clas.show_batch(max_n=3)

Unnamed: 0,text,category
0,"xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero",pos
1,"xxbos * * attention xxmaj spoilers * * \n\n xxmaj first of all , let me say that xxmaj rob xxmaj roy is one of the best films of the 90 's . xxmaj it was an amazing achievement for all those involved , especially the acting of xxmaj liam xxmaj neeson , xxmaj jessica xxmaj lange , xxmaj john xxmaj hurt , xxmaj brian xxmaj cox , and xxmaj tim xxmaj roth . xxmaj michael xxmaj canton xxmaj jones painted a wonderful portrait of the honor and dishonor that men can represent in themselves . xxmaj but alas … \n\n it constantly , and unfairly gets compared to "" braveheart "" . xxmaj these are two entirely different films , probably only similar in the fact that they are both about xxmaj scots in historical xxmaj scotland . xxmaj yet , this comparison frequently bothers me because it seems",pos
2,"xxbos xxmaj by now you 've probably heard a bit about the new xxmaj disney dub of xxmaj miyazaki 's classic film , xxmaj laputa : xxmaj castle xxmaj in xxmaj the xxmaj sky . xxmaj during late summer of 1998 , xxmaj disney released "" kiki 's xxmaj delivery xxmaj service "" on video which included a preview of the xxmaj laputa dub saying it was due out in "" 1 xxrep 3 9 "" . xxmaj it 's obviously way past that year now , but the dub has been finally completed . xxmaj and it 's not "" laputa : xxmaj castle xxmaj in xxmaj the xxmaj sky "" , just "" castle xxmaj in xxmaj the xxmaj sky "" for the dub , since xxmaj laputa is not such a nice word in xxmaj spanish ( even though they use the word xxmaj laputa many times",pos


In [None]:
learn = text_classifier_learner(dls_clas, AWD_LSTM, drop_mult=0.5, 
                                metrics=accuracy).to_fp16()

In [None]:
learn = learn.load_encoder('finetuned')

### Fine-Tuning the Classifier

In [None]:
learn.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.248818,0.180218,0.93148,00:34


In [None]:
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))

epoch,train_loss,valid_loss,accuracy,time
0,0.223589,0.166465,0.9378,00:40


In [None]:
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

epoch,train_loss,valid_loss,accuracy,time
0,0.193567,0.153479,0.94272,00:54


In [None]:
learn.unfreeze()
learn.fit_one_cycle(2, slice(1e-3/(2.6**4),1e-3))

epoch,train_loss,valid_loss,accuracy,time
0,0.157099,0.149862,0.94484,01:08
1,0.147127,0.149495,0.94584,01:08
