In [None]:
from fastai import *        # Quick accesss to most common functionality
from fastai.text import *   # Quick accesss to NLP functionality

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
path

PosixPath('/home/ubuntu/.fastai/data/imdb_sample')

In [None]:
def open_text(fn:PathOrStr):
    with open(fn,'r') as f: return ''.join(f.readlines())

In [None]:
def _treat_html(o:str)->str:
    return o.replace('\n','\\n')

def _text2html_table(items:Collection[Collection[str]], widths:Collection[int])->str:
    html_code = f"<table>"
    for w in widths: html_code += f"  <col width='{w}%'>"
    for line in items:
        html_code += "  <tr>\n"
        html_code += "\n".join([f"    <th>{_treat_html(o)}</th>" for o in line if len(o) >= 1])
        html_code += "\n  </tr>\n"
    return html_code + "</table>\n"

In [None]:
class Text(ItemBase):
    def __init__(self, ids, text): self.data,self.text = ids,text
    def __str__(self):  return str(self.text)
    
    def show_batch(self, idxs:Collection[int], rows:int, ds:Dataset, figsize:Tuple[int,int]=(9,10))->None:
        from IPython.display import clear_output, display, HTML
        items = [['text', 'label']]
        for i in idxs[:rows]:
            x,y = ds[i]
            items.append([str(x), str(y)])
        display(HTML(_text2html_table(items, [90,10])))

class NumericalizedTextList(ItemList):
    def __init__(self, items:Iterator, vocab:Vocab=None, create_func:Callable=None, path:PathOrStr='.', xtra=None):
        super().__init__(items, create_func=create_func, path=path, xtra=xtra)
        self.vocab = vocab
        
    def new(self, items:Iterator, xtra:Any=None)->'NumericalizedTextList':
        return super().new(items=items, vocab=self.vocab, xtra=xtra)
    
    def get(self, i):
        o = super().get(i)
        return Text(o, self.vocab.textify(o))
    
class TokenizedTextList(NumericalizedTextList):
    def preprocess(self, vocab:Vocab=None, max_vocab:int=60000, min_freq:int=2):
        self.vocab = ifnone(vocab, Vocab.create(self.items, max_vocab, min_freq))
        self.preprocess_kwargs = {'vocab': self.vocab}
        self.items = np.array([self.vocab.numericalize(t) for t in self.items])
    
class TextList(TokenizedTextList):
    def preprocess(self, tokenizer:Tokenizer=None, chunksize:int=10000, vocab:Vocab=None, 
                   max_vocab:int=60000, min_freq:int=2):
        tokenizer = ifnone(tokenizer, Tokenizer())
        tokens = []
        for i in progress_bar(range(0,len(self.items),chunksize), leave=False):
            tokens += tokenizer.process_all(self.items[i:i+chunksize])
        self.items = tokens
        super().preprocess(vocab, max_vocab, min_freq)
    
class TextFilesList(TextList):
    def __init__(self, items:Iterator, create_func:Callable=None, path:PathOrStr='.'):
        texts = [open_text(fn) for fn in items]
        super().__init__(texts, create_func, path)

In [None]:
il = (TextList.from_csv(path, 'texts.csv', create_func=None, col='text')
        .random_split_by_pct()
        .label_from_df(cols=0)
     )

In [None]:
sd = il.preprocess()

In [None]:
len(sd.valid.vocab.itos), len(sd.train.vocab.itos)

(5961, 5961)

In [None]:
df = pd.read_csv(path/'texts.csv')
df.head()

Unnamed: 0,label,text,is_valid
0,negative,Un-bleeping-believable! Meg Ryan doesn't even ...,False
1,positive,This is a extremely well-made film. The acting...,False
2,negative,Every once in a long while a movie will come a...,False
3,positive,Name just says it all. I watched this movie wi...,False
4,negative,This movie succeeds at being one of the most u...,False


In [None]:
sd.add_test(df['text'].values)

LabelLists;
Train: LabelList
y: CategoryList (800 items)
['positive' 'negative' 'positive' 'negative' ... 'positive' 'negative' 'negative' 'positive']
Path: .
x: TextList (800 items)
[list([14, 9, 6, 734, 86, 19, 107, 25, 3, 2, 114, 4, 242, 5, 395, 19, 183, 33, 41, 103, 19, 1145, 3, 2, 270, 9, 61, 4, 112, 4, 163, 10, 9, 682, 449, 11, 2, 25, 4, 67, 199, 33, 145, 2708, 0, 3, 48, 33, 66, 72, 0, 11, 2, 205, 4, 163, 513, 1659, 92, 37, 1550, 3, 2, 409, 205, 87, 44, 352, 324, 23, 2, 242, 3, 18, 24, 10, 9, 246, 8, 117, 4, 90, 48, 9, 66, 61, 142, 8, 6, 829, 43, 2, 38, 1551, 3, 10, 9, 190, 0, 8, 2406, 2, 628, 21, 1015, 4557, 5, 3141, 450, 236, 96, 4, 5, 110, 0, 0, 115, 88, 113, 3142, 3, 48, 9, 57, 3143, 11, 14, 554, 4, 24, 10, 16, 93, 222, 13, 66, 38, 761, 4557, 5, 3141, 11, 2, 4558, 8, 0, 236, 96, 22, 39, 83, 185, 2, 71, 7, 0, 3, 10, 180, 63, 1454, 13, 2, 628, 278, 231, 2, 0, 161, 2, 0, 5, 69, 1146, 201, 8, 4559, 115, 8, 73, 202, 629, 3, 18, 2, 1370, 9, 13, 48, 9, 94, 3717, 5, 0, 11, 2, 829, 5,

In [None]:
len(sd.valid.vocab.itos), len(sd.train.vocab.itos), len(sd.test.vocab.itos)

(5961, 5961, 5961)

In [None]:
data = sd.databunch()

In [None]:
data.show_batch()

text,label
"this film has a rotting core of xxunk morality , and yet a xxunk sense of justice . so many of the regular xxunk among us would love to "" stick it to the xxup man "" . the "" xxup man "" in this case is represented by several different characters . mr . keller , who xxunk reports to at her office . later , paul xxunk 70 large to mr . xxunk the club owner . and then there is paul 's xxunk officer . there seems to be so much question about this last character 's side story . reviewers point it out as a weakness in an otherwise well crafted xxunk game of xxunk - xxunk between our two protagonists , escalating xxunk - for - xxunk until their lives change dramatically . they are xxunk to each agent of the "" xxup man "" . one or both could be fired , killed , or xxunk if they do n't do as they are told . \n\n the film has a sense of relief at the end . xxunk finally gets laid . her boss is forced out for being a jerk . mr . club owner is a xxunk mess in his own bathroom . they get the $ xxunk and ... they need not worry about xxunk in to the xxunk officer , because xxup his moral weakness leads him to xxunk his xxunk wife in the basement ( or whatever the police found to xxunk him ) . it is a critical xxunk xxunk to the lock xxunk that wound us up so tight . never mind that someone else may get paul 's file later to xxunk his release ; for the moment they are free ! they might even get away with it ! \n\n xxunk ... \n\n they xxup stuck it to xxup the xxup man !",positive
all i have to say is if you do n't like it then there is something wrong with you . plus jessica is just all kinds of hot xxrep 5 ! the only reason you may not like it is because it is set in the future where xxunk has gone to hell . that and you my not like it cause the future they show could very well happen .,positive
"i saw the film and i got xxunk , because the film was foolish and boring . i thought xxunk xxunk xxunk will justify his work but unfortunately he failed and the whole film got spoiled and they spoiled "" xxunk "" . the cast and crew was bad . the whole theater slept while watching the movie some people ran away in the middle . xxunk xxunk 's acting is poor , i thought this movie will be greatest hit of the year but this film will be the greatest flop of the year , sure . nobody did justice to their work , including xxunk xxunk . this film do n't deserve any audiences . i bet that this film will flop . \n\n "" xxup finally xxup this xxup movie xxup sucks """,negative
"i managed to grab a viewing of this with the aid of xxup mst3k , and oh boy , even with the xxunk this movie was excruciatingly bad . imagine someone whose xxunk with a camera could be out done by a monkey . \n\n the highlights ( what little there were ) came from the special effects , which were "" ok "" . the acting for the most part was also "" ok "" ; though nothing special , it was of a higher quality than other b - movies i have seen in the past . \n\n the rest of this movie is xxunk bad , the camera work often looks like they 've just put the camera man on roller xxunk and pushed him along . the story ( if it can be called that ) is so full of holes it 's almost funny , it never really explains why the hell he survived in the first place , or needs human flesh in order to survive . the script is poorly written and the dialogue xxunk on just plane stupid . the climax to movie ( if there is one ) is absolutely laughable . \n\n if you ca n't find the xxup mst3k version , avoid this at all costs .",negative
""" atlantis : the lost xxunk "" was everything the previews xxunk it would be . it is not often you find that . most of the time , the previews show only the best parts and then the rest of the movie is terrible . not so with this one . i was pleased with the original plot , even though the sub - plots were not . the animation was not break through like "" shrek "" but it was good , none the less . the plot and the story line were well presented and there were only a few slow spots in them . this keeps you interested . i found myself enjoying this one . "" atlantis "" gets and keeps your attention . you also have to think a little bit , but not too much . once you think about it a little , you can figure out what needs to happen but you really do n't know for sure how it is going to happen . \n\n the casting was also good . michael j. fox , as xxunk was an excellent choice . his personality fits nicely . the gruff xxunk commander rourke was also well chosen with james xxunk . his character reminded me of his performance in "" xxunk "" which i also liked . i really liked the casting of xxunk christian as xxunk xxunk . her ability to play a no nonsense personality makes the film more interesting . it 's just too bad she is a villain . \n\n over all , definitely worth you while ( 8 out of 10 ) .",positive
"this is one of my all - time favorite films , and while it may move too slowly for some , it 's well worth seeing . a corporate lawyer ( richard xxunk ) is dragged into a case involving "" city "" aborigines , and this is no ordinary case . ok , a man has died but it was n't exactly a normal killing . there has also been a greater than average amount of rain lately , and the atmosphere of most of the film is somewhat claustrophobic & xxunk . the aborigines are xxunk a secret and refuse to xxunk the xxunk . this has a lot to do with white men making xxunk about "" city "" vs. "" xxunk "" aborigines , and of course no xxunk in the big city would practice xxunk ways . xxunk huh . xxunk is having strange dreams and he is somehow the key to what 's happening , although no matter how many times i 've seen this i ca n't quite grasp the exact connection . this is a very eerie and creepy film , and is a fine example of peter xxunk 's ability to create tension out of nothing . the ending is a little xxunk but i take it literally , it 's the xxunk way out and the scariest . 10 out of 10 and highly recommended .",positive
"this movie is witty , watchable and utterly touching . and now often do you get to see jean harlow ( or any actress of this era , for that matter ) give another woman a xxunk punch in the xxunk ? ( twice ! ) \n\n after harlow 's ruby is sent to a xxunk after getting mixed up with gable 's edward hall ( he of that cheesy yet endearing xxunk smile ) , her xxunk becomes all the more complicated when she discovers that she is pregnant , and she 's convinced that this xxunk has abandoned her , but in fact , her love has xxunk him and he comes to see her , despite the fact that he will be arrested , and from the help of a xxunk , are married . \n\n the wonderful relationship that harlow shares with her fellow xxunk is second only to her electric chemistry with gable , who was her most frequent leading man . her cynical character is a perfect match for gable 's smooth - talking xxunk . what 's not to like ? \n\n "" you know , you would n't be a bad looking dame - if it was n't for your face ! "" ruby xxunk remarks to gypsy , her xxunk . "" if you 're going to get that close to me , i 'll have to open the other window ! "" \n\n priceless ! ! !",positive
"douglas sirk directs this over - acted drama about the unhappy xxunk . kyle xxunk xxunk mitch xxunk hudson ) are xxunk friends with different looks on life . kyle is the xxunk son of an oil xxunk ; mitch works for the hadley oil company . both fall in love with the same woman , lucy moore ; but it is kyle that has the means to wow her off her feet and marry her . sister xxunk xxunk to be the town 's xxunk and carrying a xxunk for mitch , who always seems to be the one to clean up the hadley 's xxunk . ambitious with pretension ; a little over the top , but the stars make it a movie to see . i was most impressed with malone . rounding out the cast : robert keith , edward xxunk , john xxunk and robert j. xxunk .",positive
