In [None]:
from fastai import *        # Quick accesss to most common functionality
from fastai.text import *   # Quick accesss to NLP functionality

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
path

PosixPath('/home/jhoward/.fastai/data/imdb_sample')

In [None]:
def open_text(fn:PathOrStr):
    with open(fn,'r') as f: return ''.join(f.readlines())

In [None]:
def _treat_html(o:str)->str:
    return o.replace('\n','\\n')

def _text2html_table(items:Collection[Collection[str]], widths:Collection[int])->str:
    html_code = f"<table>"
    for w in widths: html_code += f"  <col width='{w}%'>"
    for line in items:
        html_code += "  <tr>\n"
        html_code += "\n".join([f"    <th>{_treat_html(o)}</th>" for o in line if len(o) >= 1])
        html_code += "\n  </tr>\n"
    return html_code + "</table>\n"

In [None]:
class Text(ItemBase):
    def __init__(self, ids, text): self.data,self.text = ids,text
    def __str__(self):  return str(self.text)
    
    def show_batch(self, idxs:Collection[int], rows:int, ds:Dataset, figsize:Tuple[int,int]=(9,10))->None:
        from IPython.display import clear_output, display, HTML
        items = [['text', 'label']]
        for i in idxs[:rows]:
            x,y = ds[i]
            items.append([str(x), str(y)])
        display(HTML(_text2html_table(items, [90,10])))

class NumericalizedTextList(ItemList):
    def __init__(self, items:Iterator, vocab:Vocab=None, create_func:Callable=None, path:PathOrStr='.', xtra=None):
        super().__init__(items, create_func=create_func, path=path, xtra=xtra)
        self.vocab = vocab
        
    def new(self, items:Iterator, xtra:Any=None)->'NumericalizedTextList':
        return super().new(items=items, vocab=self.vocab, xtra=xtra)
    
    def get(self, i):
        o = super().get(i)
        return Text(o, self.vocab.textify(o))
    
class TokenizedTextList(NumericalizedTextList):
    def preprocess(self, vocab:Vocab=None, max_vocab:int=60000, min_freq:int=2):
        self.vocab = ifnone(vocab, Vocab.create(self.items, max_vocab, min_freq))
        self.preprocess_kwargs = {'vocab': self.vocab}
        self.items = np.array([self.vocab.numericalize(t) for t in self.items])
    
class TextList(TokenizedTextList):
    def preprocess(self, tokenizer:Tokenizer=None, chunksize:int=10000, vocab:Vocab=None, 
                   max_vocab:int=60000, min_freq:int=2):
        tokenizer = ifnone(tokenizer, Tokenizer())
        tokens = []
        for i in progress_bar(range(0,len(self.items),chunksize), leave=False):
            tokens += tokenizer.process_all(self.items[i:i+chunksize])
        self.items = tokens
        super().preprocess(vocab, max_vocab, min_freq)
    
class TextFilesList(TextList):
    def __init__(self, items:Iterator, create_func:Callable=None, path:PathOrStr='.'):
        texts = [open_text(fn) for fn in items]
        super().__init__(texts, create_func, path)

In [None]:
il = (TextList.from_csv(path, 'texts.csv', create_func=None, col='text')
        .random_split_by_pct()
        .label_from_df(cols=0)
     )

In [None]:
sd = il.preprocess()

In [None]:
len(sd.valid.vocab.itos), len(sd.train.vocab.itos)

(6052, 6052)

In [None]:
data = sd.databunch()

In [None]:
data.show_batch()

text,label
"i was surprised , that ' ' the secret fury '' was an enjoyable good film xxrep 6 . probably because , i did n't have any expectations for this movie xxrep 5 . though , the film does have it 's plot holes xxrep 5 . i would say , that you could n't guess who was behind the whole scheme , until the very end of the movie xxrep 5 . at first , i thought , it was robert ryan , using the same method , like ' ' xxunk '' where husband tries to drive his wife mad , but i was wrong xxrep 6 . the main problem , with the movie is , they drive at a whole other direction , which gave no clues at the beginning xxrep 6 . i thought , robert ryan & xxunk colbert carried their parts well xxrep 6 . plus , xxunk xxunk , a fine character actress , who steals scenes in this one xxrep 6 . those who like movies , that keeps you guessing , will like this one xxrep 6 .",positive
"i do n't think most of us would tend to apply the term "" must - see "" to action films , but i was very impressed at how good this film was and it deservedly gets the "" must - see "" xxunk from me . \n\n mandy played by xxunk lee ( daughter of the late and great bruce lee and sister of the late xxunk lee ) is xxunk by martin , a professional thief to help pull off a diamond heist at a museum for a criminal xxunk , and get rewarded xxunk for it . little do they know that another pair of thieves ( lucy and tommy , a pair of xxunk ) , who were xxunk earlier by mandy and martin to get in on the deal , are also planning to steal the diamond . \n\n how each pair of thieves plans out the heist is a xxunk to watch . things go awry , as martin and mandy xxunk find themselves a step behind lucy and tommy . \n\n you 'll find yourself rooting for these thieves as they find that they need each other to stay alive from the crime xxunk , who are not happy at all that the diamond is not in its hands . \n\n action fans will not be disappointed , as there 's a healthy dose of gun battles , martial arts , and hand - to - hand combat sequences . \n\n what is xxunk is that , it 's not just the action that carries this film , but the romance and laughs ( and i do n't mean your typical one - liners prevalent in action films ) that xxunk in . \n\n it 's not easy to root for bad guys , but we get to see the human side of these thieves and the chemistry they develop . \n\n a great film and one xxup not to miss ! \n\n 9 out of 10",positive
"a retired diplomat , played nicely by michael york , goes to russia to get revenge on the russian gangster that murdered the diplomat 's xxunk son . there the diplomat meets an exceptionally strong and decent russian cop who helps him bring the russian gangster to justice . \n\n i remembered the old action flicks of the 1980s that always portray the russians as evil bad guys out to xxunk the righteous u.s . government . it 's interesting to see this time the russian guy as a hero . \n\n not a great flick , it 's really typically a "" b "" action flick . michael york lends some class to this mediocre movie . xxunk xxunk , who plays the russian cop is kind of "" blah "" but surprisingly has some chemistry with michael york . face it , michael york is such a good actor that he 'd have chemistry with anyone he 's doing a scene with . xxunk , the handsome adrian paul gets killed within the first 15 minutes into the movie . now , if adrian paul was in this movie longer , it would 've been an above average "" b "" action flick . all i can say about adrian paul is that he is real nice to look at for the first 15 minutes of the movie . the villain , played by richard xxunk , is your typical bad guy . he 's very blonde and very xxunk in this movie . \n\n rent this flick if there is nothing else on tv to watch . it 's okay . it does n't suck too bad . the action scenes are decent . the acting could be better , the plot could 've moved much xxunk , but hey , you get to see what russia looks like today !",negative
"douglas xxunk directs this over - acted drama about the unhappy xxunk . kyle xxunk xxunk mitch xxunk hudson ) are xxunk friends with different looks on life . kyle is the xxunk son of an oil xxunk ; mitch works for the xxunk oil company . both fall in love with the same woman , lucy moore ; but it is kyle that has the means to wow her off her feet and marry her . sister xxunk xxunk to be the town 's nymphomaniac and carrying a xxunk for mitch , who always seems to be the one to clean up the xxunk 's xxunk . ambitious with xxunk ; a little over the top , but the stars make it a movie to see . i was most impressed with malone . rounding out the cast : robert keith , edward xxunk , john xxunk and robert j. xxunk .",positive
"dear dear dear dear dear ... me ! i had the strength to see it through ... but why ? ! \n\n the first two films where fun and actually somewhat good . but this is so bad we had problems seeing the whole thing . this was some kind of tremors for kids . i ca n't believe this movie was made at all .. seems like the xxunk where taken from some bad western series of some kind ( for kids ) and they just did whatever they could with it . \n\n what audience is this movie for ? i can only think of 12 - 14 year olds . if you 're older than 14 you 'll have serious problems with this movie . it 's not only slow , but it 's so utterly boring . the characters are xxunk ( not just a little either ) and so xxunk it 's fun for a while .. but not long enough to not make you want to fling xxunk at the screen . you know everything that is going to happen too , cause yes ... xxunk seen it a xxup xxunk times before in any hero series on tv for kids . i picked all the survivors and all the xxunk fodder the second the characters got introduced . it 's so bad .. so wrong .. so ... crap . \n\n but ok , we did get a laugh now and then . not just at the silly plot holes , but some scenes where worth a replay or two ... or one scene that is , where two baby tremors fling themselves at one of the obvious xxunk fodder guys .. it 's really a great scene which made us replay it over and over and laugh xxunk . still makes me grin when i think about it . but that only happened one more time sadly .. and that 's when the "" xxunk "" xxunk shows up and xxunk his part wonderfully ... that and one comment "" they xxunk from the ground like some xxup demonic xxup xxunk ! "" at this point we where almost crying with laughter . but after that .. nothing could ever top that .. xxunk .. so it 's pretty much downhill from there . \n\n so tops here are demonic xxunk and xxunk . if anybody ever tells you this is a good movie ... he 's either a "" plant "" , xxunk or someone very evil . this movie has got to be the worst of the tremors by far . looking forward to seeing tremors 3 , it 's bound to be box office hit compared to this ... this ... xxunk * no .. xxunk leave it at that .",negative
it 's terrific when a funny movie does n't make smile you . what a pity ! ! this film is very boring and so long . it 's simply xxunk . the story is xxunk without goal and no fun . \n\n you feel better when it 's finished .,negative
"skip mccoy ( richard widmark ) pick - xxunk candy 's ( jean peters ) wallet which contains an important microfiche that is intended for the communist cause . she is being followed by 2 xxunk agents that are waiting to xxunk once she hands the microfiche over to her contact . however , skip steals the xxunk on the xxunk under everyone 's xxunk and so starts a hunt for him by both the police and joey ( richard xxunk ) and candy who want the microfiche back . skip can only be xxunk through xxunk ( xxunk xxunk ) who xxunk information on xxunk . it is made clear to skip that what he has stolen is important and both sides want the film , but he xxunk to hold out for a high price . this leads to joey hunting after him and a conflict between joey and jean , who has fallen in love with skip . joey has a xxunk to deliver the microfiche to his boss . \n\n its a well - acted film and it has a good beginning that gets you involved straight away . its a bit unrealistic how jean peters immediately falls in love with widmark , but this point is necessary as otherwise why would she later hold out from joey . its a good film .",positive
"the story is seen before , but that xxunk matter if you can figure out to make a proper storyboard . it is clear that the director have n't spent his work on the storyboard . alongside this , the xxunk spent far too much time xxunk angles that do not match the message of the movie . the funniest is , however , if you take a look at the movie 's website , you can read that it was on purpose that the director has chosen to make the film with bad camera angles . because it remind us about hunting . but i have never heard of hunting with poor camera angles xxunk it will have 1 stars because the story is ok . it is a pity that xxunk west , has not spent more time to review his story . it is as if the movie was more important than the planning . because you have a camera does not mean you should make a movie right away ... come . everyone can make a movie , but not all will be just as good . so a word of advice to xxunk west are : stop and xxunk what you want . use your time to start planning and not filming until everything has come down on a storyboard . you certainly have the ability and desire - so do n't abuse your talent .",negative
