In [1]:
!pip install -Uqq fastai

In [2]:
from fastai import *
from fastai.text.all import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown

In [3]:
# Download the data
path = untar_data(URLs.IMDB)
list(path.ls())

In [4]:
Markdown(Path.open(path/'README').read())

In [5]:
# Create DataLoaders for our language model
lm_dls = TextDataLoaders.from_folder(path, is_lm=True)

In [6]:
# Show one batch of data
lm_dls.show_batch(max_n=3)

In [7]:
x, y = lm_dls.one_batch()

In [8]:
x[0]

In [9]:
y[0]

In [10]:
# Check that x[1:] and y[:-1] are the same
(x[0][1:] == y[0][:-1]).all()

In [11]:
lm_learn = language_model_learner(lm_dls, AWD_LSTM, pretrained=True)

In [12]:
lm_learn.model

In [13]:
# The model is trained on wikitext, so it will be able to
# generate semi-sensible sentences, but they will likely
# have nothing to do with movies.
lm_learn.predict('This movie was terrible, ', n_words=10)

In [15]:
LR = 1e-3

In [16]:
lm_learn.fit_one_cycle(1, LR)

In [17]:
lm_learn.predict('This movie was terrible, ', n_words=10)

In [18]:
# Save the model
lm_learn.save_encoder('lm_encoder')

In [19]:
# Create dataloaders for text classification.
# Let's make sure to pass our vocab from our pre-training
# so the word indices are the same.
dls = TextDataLoaders.from_folder(path, train='train', valid='test', text_vocab=lm_dls.vocab)

In [20]:
dls.show_batch(max_n=3)

In [21]:
cbs = [
    EarlyStoppingCallback(),
    SaveModelCallback()
]

In [22]:
# Create a learner for text classification.
learn = text_classifier_learner(dls, AWD_LSTM, metrics=[accuracy, error_rate], cbs=cbs)

In [23]:
# Load our weights from fine-tuning on full IMDB
learn = learn.load_encoder('lm_encoder')

In [24]:
learn.fine_tune(10, 1e-3)

In [25]:
learn.predict('This movie was horrible!')

In [26]:
learn.predict('This movie was great!')

In [27]:
learn.save('clf_fine_tuned')

In [28]:
interp = ClassificationInterpretation.from_learner(learn)

In [36]:
interp.plot_confusion_matrix()

In [39]:
interp.plot_top_losses(10)

In [30]:
cbs = [
    EarlyStoppingCallback(),
    SaveModelCallback()
]

In [31]:
control = text_classifier_learner(dls, AWD_LSTM, pretrained=False, metrics=[accuracy, error_rate], cbs=cbs).to_fp16()

In [32]:
control.fine_tune(2, base_lr=1e-3)

In [33]:
cbs = [
    EarlyStoppingCallback(),
    SaveModelCallback()
]

In [34]:
control = text_classifier_learner(dls, AWD_LSTM, pretrained=True, metrics=[accuracy, error_rate], cbs=cbs).to_fp16()

In [35]:
control.fine_tune(2, base_lr=1e-3)