In [None]:
from fastai import *        # Quick accesss to most common functionality
from fastai.text import *   # Quick accesss to NLP functionality

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
path

PosixPath('/home/ubuntu/.fastai/data/imdb_sample')

In [None]:
def open_text(fn:PathOrStr):
    with open(fn,'r') as f: return ''.join(f.readlines())

In [None]:
def _treat_html(o:str)->str:
    return o.replace('\n','\\n')

def _text2html_table(items:Collection[Collection[str]], widths:Collection[int])->str:
    html_code = f"<table>"
    for w in widths: html_code += f"  <col width='{w}%'>"
    for line in items:
        html_code += "  <tr>\n"
        html_code += "\n".join([f"    <th>{_treat_html(o)}</th>" for o in line if len(o) >= 1])
        html_code += "\n  </tr>\n"
    return html_code + "</table>\n"

In [None]:
class Text(ItemBase):
    def __init__(self, ids, text): self.data,self.text = ids,text
    def __str__(self):  return str(self.text)
    
    def show_batch(self, idxs:Collection[int], rows:int, ds:Dataset, figsize:Tuple[int,int]=(9,10))->None:
        from IPython.display import clear_output, display, HTML
        items = [['text', 'label']]
        for i in idxs[:rows]:
            x,y = ds[i]
            items.append([str(x), str(y)])
        display(HTML(_text2html_table(items, [90,10])))

class NumericalizedTextList(ItemList):
    def __init__(self, items:Iterator, vocab:Vocab=None, create_func:Callable=None, path:PathOrStr='.', xtra=None):
        super().__init__(items, create_func=create_func, path=path, xtra=xtra)
        self.vocab = vocab
        
    def new(self, items:Iterator, xtra:Any=None)->'NumericalizedTextList':
        return self.__class__(items=items, vocab=self.vocab, create_func=self.create_func, path=self.path, xtra=xtra)
    
    def get(self, o): return Text(o, self.vocab.textify(o))
    
class TokenizedTextList(NumericalizedTextList):
    
    def preprocess(self, vocab:Vocab=None, max_vocab:int=60000, min_freq:int=2):
        self.vocab = ifnone(vocab, Vocab.create(self.items, max_vocab, min_freq))
        self.preprocess_kwargs = {'vocab': self.vocab}
        self.items = np.array([self.vocab.numericalize(t) for t in self.items])
    
class TextList(TokenizedTextList):
    def preprocess(self, tokenizer:Tokenizer=None, chunksize:int=10000, vocab:Vocab=None, 
                 max_vocab:int=60000, min_freq:int=2):
        tokenizer = ifnone(tokenizer, Tokenizer())
        tokens = []
        for i in progress_bar(range(0,len(self.items),chunksize), leave=False):
            tokens += tokenizer.process_all(self.items[i:i+chunksize])
        self.items = tokens
        super().preprocess(vocab, max_vocab, min_freq)
    
class TextFilesList(TextList):
    def __init__(self, items:Iterator, create_func:Callable=None, path:PathOrStr='.'):
        texts = [open_text(fn) for fn in items]
        super().__init__(texts, create_func, path)

In [None]:
il = (TextList.from_csv(path, 'texts.csv', create_func=None, col='text')
      .random_split_by_pct()
      .label_from_df(CategoryList, cols=0))

In [None]:
sd = il.preprocess()

In [None]:
len(sd.valid.vocab.itos), len(sd.train.vocab.itos)

(6176, 6176)

In [None]:
data = sd.databunch()

In [None]:
data.show_batch()

text,label
i watched lion king more times that all my friends put xxunk . having a baby sister .. you know how it is . by now i xxunk both the plot and the lines . after lion king 2 came out i was like ok well let me see ... the second one was significantly weaker ... then i saw an ad for lion king 1 and 1 / 2 ... i was like ok there we go again . after watching the 1 1 / 2 i was like wow . all my expectations ( for xxunk ) were broken . a truly lovely and original plot keeps you glued to your seat for the entire time . i have noticed that the cartoon was filled with so many comical moments that xxunk will apply here 100 % . \n\n i xxunk recommend seeing the cartoon .,positive
"oh , this is so bad , it is funny . the only way one could explain something like this is a porn party with drugs that resulted in the resolution to make a movie just for fun . i mean : you get to see porn actresses topless , having sex , then killed by human xxunk . there is plenty of gore , including the classic "" something is wrong with her , oh no , it 's half the person she used to be "" and the xxunk murder caused by panic . but you can also find funny stuff like intestines pulled through someone 's ass and a guy running in the woods then finding himself decapitated by a wire tied between two trees ( that makes a xxunk xxunk sound afterward , like in cartoons ) . somehow there is a market for people going beyond porn , they really need to know what 's inside an actress , mere xxunk are not enough . therefore you get to see plenty of summary xxunk on xxunk xxunk . there is no dialog . jenna and xxunk have really small parts and i really wonder what richard xxunk wanted when xxunk a role here . i think this is a film one must watch with the button on fast forward and watch only the xxunk stuff , just to be reminded of the old school c class horror movies .",negative
"well , what are the odds ! at the exact right moment that a few xxunk amateur - scientists discover xxunk paintings xxunk that some type of dinosaur monster might have inhabited the area thousands of years ago , a burning xxunk xxunk into the lake and xxunk xxunk a monster 's xxunk that has been lying there for over a thousand years , i suppose ! "" the crater lake monster "" is a movie that literally must be seen to be believed , but you better do so in the company of many friends and a pile of xxunk in order to make the xxunk a little bit easier to xxunk . yes , this is a terrible film with the xxunk xxunk screenplay xxunk and numerous irrelevant padding xxunk that are downright embarrassing , but it 's also xxunk charming and so clumsily put together that you simply have to cherish some kind of xxunk for it . half of the film  at least  revolves on the wacky adventures of xxunk and mitch . these two local xxunk own and run a boat renting shop near the lake , but spend most of their days picking their xxunk and xxunk over fascinating stuff like to spell the word "" bait "" . it is mostly during their xxunk laurel & hardy situations that new xxunk of blood or decapitated heads are discovered in the lake . steve xxunk , the heroic but not exactly sharp sheriff is on the case , but only if he 's not too busy chasing big city thugs traveling through the area . halfway through the film , there suddenly is an abrupt scene about a thug xxunk a xxunk store and killing two people in the process . this xxunk "" xxup xxunk "" moment appears to take on the complete other side of the country , like in new york city or something , and has absolutely nothing to do with the events going on at crater lake . only like twenty minutes later the xxunk pops up again in xxunk and there 's an "" xxunk "" chase through the woods , ending in the xxunk 's hungry xxunk . the absurd little details in "" the crater lake monster "" are too numerous to mention ! for example , this is probably the only creature - feature in which the players discover the obligatory xxunk xxunk xxup after they already xxunk the actual monster . the xxunk in continuity should be legendary as far as i 'm concerned . it 's like everybody forgot to pay attention to it . night turns into twilight into day and back into twilight all during one and the same xxunk course ! the monster is xxunk the best aspect about the film , especially since it 's accomplished through good old fashioned and adorable xxunk motion effects . the cute xxunk is a xxunk ; meaning an xxunk dinosaur looking like a xxunk between xxunk the last dinosaur and an xxunk . "" crater lake monster "" is a unique and unforgettable xxunk experience that i can only encourage to track down ! the miserable 1 out of 10 rating is just out of principle ( and because basically , this is a very bad film ) should be put into perspective , because i might as well could have given it 10 out of 10 for sheer entertainment value .",negative
"the first time i ever saw this movie was back in the 1980s as a xxunk xxunk . my dad actually recorded it off the tv . i must have watched is over 20 times , before the relatively recent release on xxup dvd . \n\n i of course bought and watched the xxup dvd and was taken xxunk by how much the dialogue had changed . in the first version , which i still have on xxup vhs , the mood of the film , thanks to the dialogue , was actually very dark . however the new version , featuring van der beek et al , is more comic . \n\n to put it another way , it 's like watching the original us release of xxunk with that dub , before watching the xxunk version with the literal translation of the japanese . \n\n i 'm in no way xxunk , the story and quality of animation are not xxunk from , but it does raise the question of whether miyazaki intended for a xxunk or darker narrative and theme in his film xxrep 5 .",positive
""" xxunk "" is quite possibly the most incompetent , embarrassing , xxunk "" film "" i have ever seen . the acting is criminal , the direction practically non - existent , and the special effects presumably put together by xxunk a xxunk with learning difficulties on a xxunk xxunk computer . \n\n far be it from me to xxunk creativity , but i actually believe things like this should n't be made . i am sure the "" film xxunk makers will say that , yes , the "" film "" was xxunk by a low ( as in nothing ) budget - but in that case they just really should n't have bothered . as it is , they have xxunk the world something so dire , so xxunk , that only xxunk could get the xxunk xxunk of enjoyment from it . \n\n starting the "" movie "" it was n't as though i was expecting "" citizen kane "" or anything . i was expecting a low budget little horror with perhaps a xxunk of xxunk , a hint of fun , and even some energy . what i got was the cinematic equivalent of a used xxunk . \n\n the plot ? well , our xxunk xxunk scientist , who works in his parents ' basement , is seen xxunk stirring some xxunk in a cup . apparently , such high - level science is the end - result of years of research . his parents then send him off to a strange hotel - type place in the countryside to relax . there are some other people there , who are simply too awful to write about . anyway , the scientist drops his test - xxunk onto some mushrooms - and soon the mushrooms grow and kill some people . ( wow , i 'm getting xxunk just writing the plot summary ) . our heroes save the day by xxunk a barrel of xxunk xxunk ( by xxunk a "" xxunk "" - really a piece of string - to it ) . the barrel xxunk xxunk with the power of a small nuclear weapon , destroying all the mushrooms . the end . ( thank xxunk ) . \n\n that summary is as good as the "" film "" gets ( and actually makes it sound a lot more interesting than it actually is ) . it really should never have got past this stage of development ( by which i mean a plot xxunk xxunk on the back of an xxunk with xxunk ) . somebody should have really xxunk in and given someone a xxunk shake and said "" no . "" \n\n and those "" special "" effects . well , they 're "" special "" all right . this is xxup cgi gone crazy . and done by a person who i can only assume believes the xxunk xxunk to be the xxunk of modern technology . and when the xxunk monsters are not in the style of a xxunk home computer graphics xxunk , they are represented by actors xxunk along covered in a xxunk ( i kid you not ) . \n\n one of the most xxunk things about the movie is its xxunk . this can be summed up by the scene in which the hero xxunk his guns ( a la clint eastwood ) and then fails miserably to get them in his xxunk . i mean come on , a couple of xxunk and he could have pulled it off , but just to leave it as it is - really weak . \n\n i can not believe money was spent on this camcorder - shot rubbish . the "" film xxunk makers should hang their heads in shame and be xxunk from going within fifty xxunk of any movie - making xxunk . \n\n i also think it 's wrong that friends and family of the makers come onto imdb and post xxunk reviews and give stupidly high user ratings which give a totally inaccurate picture of the "" movie . "" "" xxunk "" is an absolute xxunk of film - making . mr xxunk is either very , very stupid or very , very cynical .",negative
this is n't another xxunk look at the xxunk but rather an intimate story about the events that took place on a small street in berlin and some of the people that were involved . this film starts in the present time in new york city where ruth xxunk ( xxunk xxunk ) is in xxunk over the death of her husband and family members have all xxunk to her side . ruth 's daughter xxunk ( maria xxunk ) slowly learns that her mother was raised by an xxunk woman named lena xxunk ( xxunk xxunk ) and so she xxunk to germany and xxunk the 90 year old who tells her about the events on xxunk . \n\n xxrep 5 * xxup spoiler xxup alert xxrep 5 * \n\n lena talks about berlin in xxunk where the xxunk would hold all the xxunk xxunk in a building on xxunk street even though they are supposed to have xxunk for being married to xxunk and for nine days a group of women would wait outside and xxunk for their release . xxunk year old ruth ( xxunk xxunk ) xxunk for her mother to come out and has nowhere to go but she meets xxunk year old lena ( xxunk xxunk ) who takes her in . lena 's husband xxunk ( martin xxunk ) is also inside and eventually she tries to xxunk with nazi officers to get them to do something . \n\n this film is directed by xxunk von xxunk who is making her first feature film in almost 10 years after working in television and while this is clearly not one of her more xxunk efforts she remains one of the most xxunk directors in europe . this is not one of those nazi films where we view horrible acts of xxunk to jews although we do see some xxunk treatment being xxunk out but instead this is more of a xxunk of a small event that meant life and death to the people involved . this film is n't trying to shock anyone or open the door to xxunk on the circumstances but what it simply wants to do is just shed a light on a small but true life event that occurred during an historical period . part of the films strength comes from its actors and there are some good performances that shine through especially by xxunk and young xxunk and it 's always good to see xxunk ( xxunk & xxunk ) in a pivotal role . this is n't a great film or something that 's going to change your perspective on xxup wwii but considering that innocent lives were put to death because of the events that took place i think that reason alone is important enough to xxunk this true story .,positive
"xxunk delon visits xxunk , sure vengeance on the ruthless crime family that employed him as a hit - man in the xxunk tessari thriller "" big guns "" after they accidentally murder his wife and child . tessari and xxunk xxunk xxunk , xxunk xxunk of "" a minute to xxunk , a second to die , "" and franco xxunk of "" ring of death "" take this xxunk about a career xxunk for the mob right down to the wire . indeed , "" big guns "" is rather predictable , but it still qualifies as solid entertainment with lots of savage and often sudden killings . xxunk delon of "" the xxunk "" is appropriately xxunk as he xxunk deals out death to the heads of the mob families who xxunk to let him xxunk so that he could enjoy life with his young son and daughter . richard xxunk of "" the godfather "" plays a xxunk crime boss who wants to xxunk the xxunk with the delon character , but the rest of his hard - xxunk xxunk want the hit - man dead . like most crime thrillers in the 1960s and 1970s , "" big guns "" xxunk to the cinematic morality that crime does not pay . interestingly , the one man who has nothing to do with the murder of the wife and son of the hero survives while another xxunk the hero with extreme xxunk . tessari does not waste a second in this xxunk xxunk up . apart from the mother and son dying in a car bomb meant for the father , the worst thing that takes place occurs in an xxunk salvage yard when an xxunk of the hero is crushed in a xxunk car . xxunk , "" big guns "" is a rather xxunk outing , but it does have a high body count for a 1973 xxunk melodrama . only at the last minute does our protagonist let his guard down and so the contrived morality of an eye for an eye remains intact . tessari stages a couple of decent car chases and the death of a don in a train traveling through a train xxunk is as bloody as this violent xxunk gets . the photography and the xxunk are excellent .",positive
"i loved that this film xxunk the intelligence of the viewer , allowing the layers to xxunk from the characters through their xxunk with each other about the xxunk loss that has so affected each of them . \n\n the cinematography is a beautiful , and is an inspired reflection of the vision of someone i believe is an extremely talented new filmmaker with the xxunk and artistic insight to tell a story that others with much more experience have failed to accomplish . i see a bright future for this writer / producer / director who had the ability to focus on a goal and accomplish it with integrity . \n\n kudos for this xxunk .",positive
