In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
from fastai.text import *

In [None]:
bs, bptt=128, 80

Download IMDB data

In [None]:
path=untar_data(URLs.IMDB)

## Language Model data collection

Load only text column data as a databunch for language model and save it

In [None]:
data_lm=(TextList.from_folder(path).filter_by_folder(include=['train', 'test', 'unsup']).split_by_rand_pct(0.1).label_for_lm().databunch(bs=bs, bptt=bptt))
data_lm.show_batch()
data_lm.save('data_lm.pkl')

Load LM data from pickle file (forward model)

In [None]:
data_lm = load_data(path, 'data_lm.pkl', bs=bs, bptt=bptt)
data_lm.show_batch()

LM Data for backward model

In [None]:
data_bwd = load_data(path, 'data_lm.pkl', bs=bs, bptt=bptt, backwards=True)
data_bwd.show_batch()

## Fine-tuning the forward Language model

AWD-LSTM/QRNN inspired by https://arxiv.org/abs/1708.02182****

In [None]:
learn = language_model_learner(data_lm, AWD_LSTM)
learn = learn.to_fp16(clip=0.1)

In [None]:
learn.model

Fit the model for one Epoch would cost about 18 mins

In [None]:
%%time
learn.fit_one_cycle(1, 1e-2, moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn.unfreeze()
learn.fit_one_cycle(10, 1e-3, moms=(0.8,0.7), wd=0.1)

Load data with labels to create a new databunch and pass the previous language model vacabulary to it

In [None]:
learn.save_encoder('bwd_enc')

## Fine-tuning the backward Language model

In [None]:
learn = language_model_learner(data_bwd, AWD_LSTM)
learn = learn.to_fp16(clip=0.1)

In [None]:
%%time
learn.fit_one_cycle(1, 1e-2, moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn.unfreeze()
learn.fit_one_cycle(10, 1e-3, moms=(0.8,0.7), wd=0.1)

In [None]:
learn.save_encoder('bwd_enc')

## Classification data collection

In [None]:
data_clas = (TextList.from_folder(path, vocab=data_lm.vocab)
             #grab all the text files in path
             .split_by_folder(valid='test')
             #split by train and valid folder (that only keeps 'train' and 'test' so no need to filter)
             .label_from_folder(classes=['neg', 'pos'])
             #label them all with their folders
             .databunch(bs=bs))

data_clas.save('data_clas.pkl')

Load classification data from pickle file (forward model)

In [None]:
data_clas = load_data(path, 'data_clas.pkl', bs=bs)
data_clas.show_batch()

Load classification data from pickle file (backward model)

In [None]:
data_clas_bwd = load_data(path, 'data_clas.pkl', bs=bs, backwards=True)
data_clas_bwd.show_batch()

## Fine-tuning the forward classifier

In [None]:
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5, pretrained=False)
learn.load_encoder('fwd_enc')


Then we train the model using gradual unfreezing (partially training the model from everything but the classification head frozen to the whole model trianing by unfreezing one layer at a time) and differential learning rate (deeper layer gets a lower learning rate).

In [None]:
lr=5e-2

In [None]:
%%time
learn.fit_one_cycle(1, lr, moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn.freeze_to(-2)
lr /= 2
learn.fit_one_cycle(1, slice(lr/(2.6**4),lr), moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn.freeze_to(-3)
lr /= 2
learn.fit_one_cycle(1, slice(lr/(2.6**4),lr), moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn.unfreeze()
lr /= 5
learn.fit_one_cycle(2, slice(lr/(2.6**4),lr), moms=(0.8,0.7), wd=0.1)

In [None]:
learn.save('fwd_clas')

## Fine-tuning the forward classifier

In [None]:
learn_bwd = text_classifier_learner(data_clas_bwd, AWD_LSTM, drop_mult=0.5, pretrained=False)
learn_bwd.load_encoder('bwd_enc')

In [None]:
%%time
learn_bwd.fit_one_cycle(1, lr, moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn_bwd.freeze_to(-2)
lr /= 2
learn_bwd.fit_one_cycle(1, slice(lr/(2.6**4),lr), moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn_bwd.freeze_to(-3)
lr /= 2
learn_bwd.fit_one_cycle(1, slice(lr/(2.6**4),lr), moms=(0.8,0.7), wd=0.1)

In [None]:
%%time
learn_bwd.unfreeze()
lr /= 5
learn_bwd.fit_one_cycle(2, slice(lr/(2.6**4),lr), moms=(0.8,0.7), wd=0.1)

In [None]:
learn_bwd.save('bwd_clas')

## Ensembling the two models

In [None]:
pred_fwd,lbl_fwd = learn.get_preds(ordered=True)

In [None]:
pred_bwd,lbl_bwd = learn_bwd.get_preds(ordered=True)

In [None]:
final_pred = (pred_fwd+pred_bwd)/2

In [None]:
accuracy(pred, lbl_fwd)

1. [fastai_text](docs.fast.ai/text.html)
1. [ULMFit example]( https://github.com/fastai/fastai/blob/master/examples/ULMFit.ipynb)