In [1]:
# This notebook is based on 
# https://www.kaggle.com/aidenkim/commonlit-fastai-baseline which comes from
# https://www.kaggle.com/aninda/commonlit-fastai
# to fast build a baseline model for a web service prototype
# Without any fine tuning, it goes straight to exporting a model as a pkl file.

In [2]:
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk('commonlitreadabilityprize'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

commonlitreadabilityprize/test.csv
commonlitreadabilityprize/train.csv
commonlitreadabilityprize/sample_submission.csv


In [3]:
# All training data exist under commonlitreadabilityprize/

train = pd.read_csv('commonlitreadabilityprize/train.csv')
train.head(1)

Unnamed: 0,id,url_legal,license,excerpt,target,standard_error
0,c12129c31,,,When the young people returned to the ballroom...,-0.340259,0.464009


In [4]:
# Let's turn off some cumbersome warnings, for now

import warnings
warnings.filterwarnings('ignore')

In [5]:
# Fire up fastai NLP library

from fastai.data.all import *
from fastai.text.all import *

In [6]:
# Load train data

text_lm = DataBlock(blocks=TextBlock.from_df('excerpt', is_lm=True),
                    get_x=ColReader('text'),
                    splitter=RandomSplitter(seed=42))
dls_lm = text_lm.dataloaders(train, bs=64)
dls_lm.show_batch(max_n=3)

Unnamed: 0,text,text_
0,"xxbos a compass is an instrument used for navigation and xxunk that shows direction relative to the geographic "" cardinal directions "" , or "" points "" . xxmaj usually , a diagram called a compass rose , shows the directions north , south , east , and west as abbreviated xxunk marked on the compass . xxmaj when the compass is used , the rose can be xxunk with the corresponding","a compass is an instrument used for navigation and xxunk that shows direction relative to the geographic "" cardinal directions "" , or "" points "" . xxmaj usually , a diagram called a compass rose , shows the directions north , south , east , and west as abbreviated xxunk marked on the compass . xxmaj when the compass is used , the rose can be xxunk with the corresponding geographic"
1,", the changing light of the xxmaj moon takes about xxunk days to go from dark to bright to dark again . xxmaj that is where the idea of "" month "" came from . xxmaj however , now most months have 30 or 31 days so they fit into one year . xxbos xxmaj it was already candle - light when we reached the hamlet , and i shall never forget","the changing light of the xxmaj moon takes about xxunk days to go from dark to bright to dark again . xxmaj that is where the idea of "" month "" came from . xxmaj however , now most months have 30 or 31 days so they fit into one year . xxbos xxmaj it was already candle - light when we reached the hamlet , and i shall never forget how"
2,". \n xxmaj before long two queer - looking objects came toward me , xxunk along in a sort of hollow log . xxmaj seeing plainly that they were not bears , i felt much afraid of them . xxmaj my mother had often talked to me about some fierce creatures called "" men , "" and had told me always to keep out of their way . xxbos xxmaj the clever","\n xxmaj before long two queer - looking objects came toward me , xxunk along in a sort of hollow log . xxmaj seeing plainly that they were not bears , i felt much afraid of them . xxmaj my mother had often talked to me about some fierce creatures called "" men , "" and had told me always to keep out of their way . xxbos xxmaj the clever xxmaj"


In [7]:
# Change train data into regression setting

text_class = DataBlock(
    blocks=(TextBlock.from_df('excerpt', vocab=dls_lm.vocab), RegressionBlock),
    get_x=ColReader('text'),
    get_y=ColReader('target'),
    splitter=RandomSplitter(seed=42)
)
dls = text_class.dataloaders(train, bs=64)
dls.show_batch(max_n=3)

Unnamed: 0,text,text_
0,"xxbos xxmaj in those days all cloth had to be made at home . xxmaj aunt xxmaj xxunk , xxmaj aunt xxmaj xxunk , and xxmaj aunt xxmaj dilsey were kept busy spinning and weaving to make clothes for the whole plantation . \n xxmaj one day xxmaj mary 's mother said , "" aunt xxmaj dilsey , xxmaj mary needs a new dress , and i want you to weave some cloth at once . xxmaj can you weave some very fine cloth ? "" \n "" yes , ma'am , "" said xxmaj aunt xxmaj dilsey . "" i have some cotton xxmaj i 've been saving to make her a dress . "" \n xxmaj aunt xxmaj dilsey got out the cards and carded the cotton smooth and fine . xxmaj then she fastened a roll of this cotton to the spindle and sent the wheel xxunk",0.0887636393308639
1,"xxbos xxmaj mother xxmaj goat passes by . "" will you go to the fair with me ? "" she asks xxmaj mother xxmaj hen . \n "" i 'm busy with my eggs , "" xxmaj mother xxmaj hen answers . xxmaj one , two , three , four , five , six . \n xxmaj six round eggs ! xxmaj mother xxmaj pig passes by . "" will you go to the fair with me ? "" she asks . "" i 'm busy with my eggs , "" answers xxmaj mother xxmaj hen . xxmaj one , two , three , four , five , six . xxmaj six round eggs ! xxmaj mother xxmaj cow passes by . "" will you go to the fair with me ? "" she asks xxmaj mother xxmaj hen . "" i 'm busy with my eggs , "" answers",0.6820314526557922
2,"xxbos xxmaj the xxmaj battle of xxmaj xxunk is considered the turning point of the xxmaj american xxmaj revolution . xxmaj the battle was fought in late 1 xxrep 3 7 . xxup i. xxmaj it was actually two xxunk : the xxmaj battle of xxmaj xxunk 's xxmaj farm ( september 19 ) and the xxmaj battle of xxmaj xxunk xxmaj heights ( october 7 ) . xxmaj the xxmaj americans were led by xxmaj general xxmaj xxunk xxmaj gates . xxmaj the xxmaj british were led by xxmaj general xxmaj john xxmaj burgoyne . xxmaj on xxmaj october 17 xxmaj burgoyne xxunk his army of nearly 6 , xxrep 3 0 xxmaj british soldiers . xxmaj the xxmaj american victory helped convince xxmaj france to come to the aid the xxmaj continental xxmaj army . xxmaj it also helped them recognize the xxmaj united xxmaj states . \n",-0.8143802881240845


In [8]:
# Let's begin with an LSTM model first
# TODO: move into BERT

learn = text_classifier_learner(dls, AWD_LSTM, metrics=rmse)

In [9]:
# Learn for 2 epochs with a random learning rate
# TODO: train for more epochs with a fine tuned learning rate

learn.fit_one_cycle(2, 2e-2)

epoch,train_loss,valid_loss,_rmse,time
0,1.061295,0.594067,0.770757,04:54
1,0.680838,0.438433,0.662143,04:51


In [10]:
# Export the trained model

learn.export('model_lstm.pkl')

In [11]:
# Load the model into the learner again

learn = load_learner('model_lstm.pkl')

In [12]:
# Let's briefly test the learner with custom data

def how_readable(sentences):
    for sentence in sentences:
        print(learn.predict(sentence)[0][0], ':', sentence)

In [15]:
# Sentences should have different scores
# The more readable a sentence is, the more score should be predicted.

sentences = [
    'I am very happy', 
    'I am full of happiness',
    'The number of unemployment-benefit recipients is falling at a faster rate'
]

how_readable(sentences)

-0.22363343834877014 : I am very happy


-0.8798486590385437 : I am full of happiness


-1.3434250354766846 : The number of unemployment-benefit recipients is falling at a faster rate


In [14]:
# Luckily, this quick-and-dirty model does not break itself.
# Now let's move the trained model into a web page.