In [None]:
#export
from local.imports import *
from local.test import *
from local.core import *
from local.data.transform import *
from local.data.core import *
from local.data.source import *
from local.data.external import *
from local.data.pipeline import *
from local.text.core import *
from local.notebook.showdoc import show_doc

In [None]:
#default_exp text.data
#default_cls_lvl 3

# Text data

> Functions and transforms to help gather text data in a `DataSource`

## Numericalizing

In [None]:
def make_vocab(count, min_freq=3, max_vocab=60000):
    "Create a vocab of `max_vocab` size from `Counter` `count` with items present more than `min_freq`"
    vocab = [o for o,c in count.most_common(max_vocab) if c >= min_freq]
    for o in reversed(defaults.text_spec_tok): #Make sure all special tokens are in the vocab
        if o in vocab: vocab.remove(o)
        vocab.insert(0, o)
    vocab = vocab[:max_vocab]
    return vocab + ['xxfake' for _ in range(0, 8-len(vocab)%8)]

In [None]:
count = Counter(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'd'])
test_eq(set(make_vocab(count)), set(defaults.text_spec_tok + 'a xxfake'.split()))
test_eq(len(make_vocab(count))%8, 0)
test_eq(set(make_vocab(count, min_freq=1)), set(defaults.text_spec_tok + 'a b c d xxfake'.split()))
test_eq(set(make_vocab(count,max_vocab=12, min_freq=1)), set(defaults.text_spec_tok + 'a b c xxfake'.split()))

In [None]:
# export
class Numericalize(ItemTransform):
    "Reversible transform of tokenized texts to numericalized ids"
    def __init__(self, vocab=None, min_freq=3, max_vocab=60000, sep=' '):
        self.vocab,self.min_freq,self.max_vocab,self.sep = vocab,min_freq,max_vocab,sep
        self.o2i = None if vocab is None else defaultdict(int, {v:k for k,v in enumerate(vocab)})
    
    def setup(self, dsrc):
        if dsrc is None: return
        if self.vocab is None:
            dsrc = getattr(dsrc,'train',dsrc)
            count = Counter(p for o in dsrc for p in o.split(self.sep))
            self.vocab = make_vocab(count, min_freq=self.min_freq, max_vocab=self.max_vocab)
            self.o2i = defaultdict(int, {v:k for k,v in enumerate(self.vocab) if v != 'xxfake'})

    def encodes(self, o):      return [self.o2i[o_] for o_ in o.split(self.sep)]
    def decodes(self, o)->Str: return self.sep.join([self.vocab[o_] for o_ in o if self.vocab[o_] != PAD])

In [None]:
num = Numericalize(min_freq=1, sep=' ')
num.setup(L('This is an example of text', 'this is another text'))
test_eq(set(num.vocab), set(defaults.text_spec_tok + 'This is an example of text this another xxfake'.split()))
test_eq(len(num.vocab)%8, 0)
start = 'This is an example of text'
t = num(start)
test_eq(t, [11, 9, 12, 13, 14, 10])
test_eq(num.decode(t), start)

In [None]:
num = Numericalize(min_freq=2, sep=' ')
num.setup(L('This is an example of text', 'this is another text'))
test_eq(set(num.vocab), set(defaults.text_spec_tok + 'is text xxfake'.split()))
test_eq(len(num.vocab)%8, 0)
t = num(start)
test_eq(t, [0, 9, 0, 0, 0, 10])
test_eq(num.decode(t), f'{UNK} is {UNK} {UNK} {UNK} text')

## LM_Dataset -

In [None]:
#export
from IPython.display import display, HTML

class TensorText(TensorBase):
    def get_ctxs(self, max_samples=10, **kwargs):
        n_samples = min(self.shape[0], max_samples)
        df = pd.DataFrame({'index': range(n_samples)})
        return [df.iloc[i] for i in range(n_samples)]
    
    def display(self, ctxs): display(HTML(pd.DataFrame(ctxs).to_html(index=False)))

In [None]:
#export
def apply_coords(f, *dims):
    "Create coord array of size `dims` and apply `f` to each cell"
    gs = np.meshgrid(*map(range, dims), indexing='ij')
    return np.apply_along_axis(f, 0, np.stack(gs))

In [None]:
apply_coords(str,2,3,4)

array([[['[0 0 0]', '[0 0 1]', '[0 0 2]', '[0 0 3]'],
        ['[0 1 0]', '[0 1 1]', '[0 1 2]', '[0 1 3]'],
        ['[0 2 0]', '[0 2 1]', '[0 2 2]', '[0 2 3]']],

       [['[1 0 0]', '[1 0 1]', '[1 0 2]', '[1 0 3]'],
        ['[1 1 0]', '[1 1 1]', '[1 1 2]', '[1 1 3]'],
        ['[1 2 0]', '[1 2 1]', '[1 2 2]', '[1 2 3]']]], dtype='<U7')

In [None]:
class LM_Sampler(Sampler):
    def __init__(self, ds): self.ds,self.bs,self.spb = ds,ds.bs,len(ds)//ds.bs
    def __iter__(self): return ((i%self.bs)*self.spb + (i//self.bs) for i in L.range(self.ds))

In [None]:
#export
class LM_Dataset(BaseDS):
    def __init__(self, ds, lens=None, bs=64, seq_len=72, shuffle=False, cache=2):
        super().__init__(ReindexCollection(ds, cache=cache))
        self.bs,self.seq_len,self.shuffle = bs,seq_len,shuffle
        if lens is None: lens = [len(o[0]) for o in ds]
        self.lens = ReindexCollection(lens, idxs=self.ds.idxs)
        # The "-1" is to allow for final label
        self.n = round_multiple(sum(lens)-1, bs*seq_len, round_down=True)
        self.reset()
        
    def __len__(self): return self.n//(self.seq_len)
    def reset(self):
        if self.shuffle: self.ds.shuffle()
        self.cum_lens = np.cumsum(self.lens)
    
    def __getitem__(self, seq):
        def _f(o):
            tokidx = seq*self.seq_len + o[0] + o[1]
            docidx = np.searchsorted(self.cum_lens, tokidx+1)
            return self.ds[docidx][0][tokidx-self.cum_lens[docidx]]
        res = apply_coords(_f, 2, self.seq_len)
        return tuple(TensorText(tensor(o)) for o in res)

In [None]:
rev = {v:k for k,v in enumerate(string.ascii_lowercase)}
ints = [([rev[o_] for o_ in o],) for o in 'abcde fghijk lmnopqrs tu vwx y'.split()]

In [None]:
bs,sl=(4,3)
t = LM_Dataset(ints, bs=bs, seq_len=sl)
s = LM_Sampler(t)

In [None]:
dl = TfmdDL(t, bs=bs, sampler=s)
list(dl)

[(tensor([[ 0,  1,  2],
          [ 6,  7,  8],
          [12, 13, 14],
          [18, 19, 20]]), tensor([[ 1,  2,  3],
          [ 7,  8,  9],
          [13, 14, 15],
          [19, 20, 21]])), (tensor([[ 3,  4,  5],
          [ 9, 10, 11],
          [15, 16, 17],
          [21, 22, 23]]), tensor([[ 4,  5,  6],
          [10, 11, 12],
          [16, 17, 18],
          [22, 23, 24]]))]

In [None]:
x,y = dl.one_batch()

In [None]:
bs,sl = 4,3
txts = [(o,) for o in 'abcde fghijk lmnopqrs tu vwx y'.split()]

In [None]:
txts

[('abcde',), ('fghijk',), ('lmnopqrs',), ('tu',), ('vwx',), ('y',)]

In [None]:
#t = LM_Dataset(txts, bs=bs, seq_len=sl)
#test_eq(list(t),
#        [[[['a','b','c'], ['g','h','i'], ['m','n','o'], ['s', 't', 'u']],
#          [['b','c','d'], ['h','i','j'], ['n','o','p'], ['t','u','v']]],
#         [[['d','e','f'], ['j','k','l'], ['p','q','r'], ['v','w','x']],
#          [['e','f','g'], ['k','l','m'], ['q','r','s'], ['w','x','y']]]])

In [None]:
#t = LM_Dataset(txts[:-1], bs=bs, seq_len=sl)
#test_eq(list(t),
#        [[[['a','b','c'], ['d','e','f'], ['g','h','i'], ['j','k','l']],
#          [['b','c','d'], ['e','f','g'], ['h','i','j'], ['k','l','m']]]])

## Integration example

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
df = pd.read_csv(path/'texts.csv')
df.head(2)

Unnamed: 0,label,text,is_valid
0,negative,"Un-bleeping-believable! Meg Ryan doesn't even look her usual pert lovable self in this, which normally makes me forgive her shallow ticky acting schtick. Hard to believe she was the producer on this dog. Plus Kevin Kline: what kind of suicide trip has his career been on? Whoosh... Banzai!!! Finally this was directed by the guy who did Big Chill? Must be a replay of Jonestown - hollywood style. Wooofff!",False
1,positive,"This is a extremely well-made film. The acting, script and camera-work are all first-rate. The music is good, too, though it is mostly early in the film, when things are still relatively cheery. There are no really superstars in the cast, though several faces will be familiar. The entire cast does an excellent job with the script.<br /><br />But it is hard to watch, because there is no good end to a situation like the one presented. It is now fashionable to blame the British for setting Hindus and Muslims against each other, and then cruelly separating them into two countries. There is som...",False


In [None]:
df_tok,count = tokenize_df(df, 'text')
df_tok.head(2)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 7: invalid start byte

Process Process-17:
Process Process-10:
Process Process-11:
Process Process-12:
Process Process-7:
Process Process-13:
Process Process-14:
Process Process-18:
Process Process-15:
Process Process-9:
Process Process-16:
Process Process-6:
Process Process-4:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most r

  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/connection.py", line 206, in send
    self._send_bytes(_ForkingPickler.dumps(obj))
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/sgugger/anaconda3/lib/python3.7/multiprocessing/conne

In [None]:
texts,lengths = df_tok['text'].values,df_tok['text_lengths'].values.astype(np.int)

In [None]:
splits = RandomSplitter()(L(t for t in texts))
dsrc = DataSource(L(t for t in texts), type_tfms=[Numericalize(make_vocab(count))], filts=splits)

In [None]:
dsrc.decode_at(0)

NameError: name 'dsrc' is not defined

In [None]:
bs = 16
ds = LM_Dataset(dsrc.train, lens=lengths[splits[0]], bs=bs)
samp = LM_Sampler(ds)
dl = TfmdDL(ds, bs=bs, sampler=samp, num_workers=0)

NameError: name 'dsrc' is not defined

In [None]:
x,y = dl.one_batch()
x.shape

In [None]:
ds.decode((x[0],))

In [None]:
dl.show_batch()

## Classification

In [None]:
def pad_collate(samples, pad_idx=1, pad_first=True, backwards=False):
    "Function that collect samples and adds padding. Flips token order if needed"
    max_len = max([len(s[0]) for s in samples])
    res = torch.zeros(len(samples), max_len).long() + pad_idx
    if backwards: pad_first = not pad_first
    for i,s in enumerate(samples):
        if pad_first: res[i,-len(s[0]):] = LongTensor(s[0])
        else:         res[i,:len(s[0]):] = LongTensor(s[0])
    if backwards: res = res.flip(1)
    return TensorTextBase(res), tensor(np.array([s[1] for s in samples]))

In [None]:
splits = RandomSplitter()(range(len(df)))
_get_txt = lambda i: df_tok["text"][i]
_get_lbl = lambda i: df_tok["label"][i]
dsrc = DataSource(range(len(df)), type_tfms=[[_get_txt, Numericalize(make_vocab(count))], [_get_lbl, Categorize()]], filts=splits)
dl = TfmdDL(dsrc, collate_fn=TfmdCollate(collate_fn=pad_collate))

In [None]:
dl.show_batch()

index,text,category
0,"xxbos▁xxmaj▁un▁-▁xxunk▁-▁believable▁!▁xxmaj▁meg▁xxmaj▁ryan▁does▁n't▁even▁look▁her▁usual▁xxunk▁lovable▁self▁in▁this▁,▁which▁normally▁makes▁me▁forgive▁her▁shallow▁xxunk▁acting▁xxunk▁.▁xxmaj▁hard▁to▁believe▁she▁was▁the▁producer▁on▁this▁dog▁.▁xxmaj▁plus▁xxmaj▁kevin▁xxmaj▁kline▁:▁what▁kind▁of▁suicide▁trip▁has▁his▁career▁been▁on▁?▁xxmaj▁xxunk▁...▁xxmaj▁xxunk▁!▁!▁!▁xxmaj▁finally▁this▁was▁directed▁by▁the▁guy▁who▁did▁xxmaj▁big▁xxmaj▁xxunk▁?▁xxmaj▁must▁be▁a▁replay▁of▁xxmaj▁jonestown▁-▁hollywood▁style▁.▁xxmaj▁xxunk▁!",negative
1,"xxbos▁xxmaj▁this▁is▁a▁extremely▁well▁-▁made▁film▁.▁xxmaj▁the▁acting▁,▁script▁and▁camera▁-▁work▁are▁all▁first▁-▁rate▁.▁xxmaj▁the▁music▁is▁good▁,▁too▁,▁though▁it▁is▁mostly▁early▁in▁the▁film▁,▁when▁things▁are▁still▁relatively▁xxunk▁.▁xxmaj▁there▁are▁no▁really▁xxunk▁in▁the▁cast▁,▁though▁several▁faces▁will▁be▁familiar▁.▁xxmaj▁the▁entire▁cast▁does▁an▁excellent▁job▁with▁the▁script▁.▁\n\n▁xxmaj▁but▁it▁is▁hard▁to▁watch▁,▁because▁there▁is▁no▁good▁end▁to▁a▁situation▁like▁the▁one▁presented▁.▁xxmaj▁it▁is▁now▁xxunk▁to▁blame▁the▁xxmaj▁british▁for▁setting▁xxmaj▁hindus▁and▁xxmaj▁muslims▁against▁each▁other▁,▁and▁then▁xxunk▁xxunk▁them▁into▁two▁countries▁.▁xxmaj▁there▁is▁some▁merit▁in▁this▁view▁,▁but▁it▁'s▁also▁true▁that▁no▁one▁forced▁xxmaj▁hindus▁and▁xxmaj▁muslims▁in▁the▁region▁to▁xxunk▁each▁other▁as▁they▁did▁around▁the▁time▁of▁partition▁.▁xxmaj▁it▁seems▁more▁likely▁that▁the▁xxmaj▁british▁simply▁saw▁the▁xxunk▁between▁the▁xxunk▁and▁were▁clever▁enough▁to▁exploit▁them▁to▁their▁own▁ends▁.▁\n\n▁xxmaj▁the▁result▁is▁that▁there▁is▁much▁cruelty▁and▁inhumanity▁in▁the▁situation▁and▁this▁is▁very▁unpleasant▁to▁remember▁and▁to▁see▁on▁the▁screen▁.▁xxmaj▁but▁it▁is▁never▁painted▁as▁a▁black▁-▁and▁-▁white▁case▁.▁xxmaj▁there▁is▁xxunk▁and▁xxunk▁on▁both▁sides▁,▁and▁also▁the▁hope▁for▁change▁in▁the▁younger▁generation▁.▁\n\n▁xxmaj▁there▁is▁redemption▁of▁a▁sort▁,▁in▁the▁end▁,▁when▁xxmaj▁xxunk▁has▁to▁make▁a▁hard▁choice▁between▁a▁man▁who▁has▁ruined▁her▁life▁,▁but▁also▁truly▁loved▁her▁,▁and▁her▁family▁which▁has▁xxunk▁her▁,▁then▁later▁come▁looking▁for▁her▁.▁xxmaj▁but▁by▁that▁point▁,▁she▁has▁no▁xxunk▁that▁is▁without▁great▁pain▁for▁her▁.▁\n\n▁xxmaj▁this▁film▁carries▁the▁message▁that▁both▁xxmaj▁muslims▁and▁xxmaj▁hindus▁have▁their▁grave▁faults▁,▁and▁also▁that▁both▁can▁be▁xxunk▁and▁caring▁people▁.▁xxmaj▁the▁reality▁of▁partition▁makes▁that▁xxunk▁all▁the▁more▁wrenching▁,▁since▁there▁can▁never▁be▁real▁xxunk▁across▁the▁xxmaj▁india▁/▁xxmaj▁pakistan▁border▁.▁xxmaj▁in▁that▁sense▁,▁it▁is▁similar▁to▁""▁mr▁&▁xxmaj▁mrs▁xxmaj▁xxunk▁""▁.▁\n\n▁xxmaj▁in▁the▁end▁,▁we▁were▁glad▁to▁have▁seen▁the▁film▁,▁even▁though▁the▁resolution▁was▁heartbreaking▁.▁xxmaj▁if▁the▁xxup▁uk▁and▁xxup▁us▁could▁deal▁with▁their▁own▁xxunk▁of▁racism▁with▁this▁kind▁of▁xxunk▁,▁they▁would▁certainly▁be▁better▁off▁.",positive
2,"xxbos▁xxmaj▁every▁once▁in▁a▁long▁while▁a▁movie▁will▁come▁along▁that▁will▁be▁so▁awful▁that▁i▁feel▁compelled▁to▁warn▁people▁.▁xxmaj▁if▁i▁labor▁all▁my▁days▁and▁i▁can▁save▁but▁one▁soul▁from▁watching▁this▁movie▁,▁how▁great▁will▁be▁my▁joy▁.▁\n\n▁xxmaj▁where▁to▁begin▁my▁discussion▁of▁pain▁.▁xxmaj▁for▁xxunk▁,▁there▁was▁a▁musical▁montage▁every▁five▁minutes▁.▁xxmaj▁there▁was▁no▁character▁development▁.▁xxmaj▁every▁character▁was▁a▁stereotype▁.▁xxmaj▁we▁had▁xxunk▁guy▁,▁fat▁guy▁who▁eats▁donuts▁,▁goofy▁foreign▁guy▁,▁etc▁.▁xxmaj▁the▁script▁felt▁as▁if▁it▁were▁being▁written▁as▁the▁movie▁was▁being▁shot▁.▁xxmaj▁the▁production▁value▁was▁so▁incredibly▁low▁that▁it▁felt▁like▁i▁was▁watching▁a▁junior▁high▁video▁presentation▁.▁xxmaj▁have▁the▁directors▁,▁producers▁,▁etc▁.▁ever▁even▁seen▁a▁movie▁before▁?▁xxmaj▁xxunk▁is▁getting▁worse▁and▁worse▁with▁every▁new▁entry▁.▁xxmaj▁the▁concept▁for▁this▁movie▁sounded▁so▁funny▁.▁xxmaj▁how▁could▁you▁go▁wrong▁with▁xxmaj▁gary▁xxmaj▁coleman▁and▁a▁handful▁of▁somewhat▁legitimate▁actors▁.▁xxmaj▁but▁trust▁me▁when▁i▁say▁this▁,▁things▁went▁wrong▁,▁xxup▁very▁xxup▁wrong▁.",negative
3,"xxbos▁xxmaj▁name▁just▁says▁it▁all▁.▁i▁watched▁this▁movie▁with▁my▁dad▁when▁it▁came▁out▁and▁having▁served▁in▁xxmaj▁xxunk▁he▁had▁great▁admiration▁for▁the▁man▁.▁xxmaj▁the▁disappointing▁thing▁about▁this▁film▁is▁that▁it▁only▁concentrate▁on▁a▁short▁period▁of▁the▁man▁'s▁life▁-▁interestingly▁enough▁the▁man▁'s▁entire▁life▁would▁have▁made▁such▁an▁epic▁bio▁-▁xxunk▁that▁it▁is▁staggering▁to▁imagine▁the▁cost▁for▁production▁.▁\n\n▁xxmaj▁some▁posters▁xxunk▁to▁the▁flawed▁xxunk▁about▁the▁man▁,▁which▁are▁cheap▁shots▁.▁xxmaj▁the▁theme▁of▁the▁movie▁""▁duty▁,▁xxmaj▁honor▁,▁xxmaj▁country▁""▁are▁not▁just▁mere▁words▁xxunk▁from▁the▁lips▁of▁a▁high▁-▁xxunk▁officer▁-▁it▁is▁the▁deep▁xxunk▁of▁one▁man▁'s▁total▁devotion▁to▁his▁country▁.▁\n\n▁xxmaj▁ironically▁xxmaj▁xxunk▁being▁the▁liberal▁that▁he▁was▁xxunk▁a▁better▁understanding▁of▁the▁man▁.▁xxmaj▁he▁does▁a▁great▁job▁showing▁the▁xxunk▁general▁xxunk▁with▁the▁xxunk▁side▁of▁the▁man▁.",positive
4,"xxbos▁xxmaj▁this▁movie▁succeeds▁at▁being▁one▁of▁the▁most▁unique▁movies▁you▁'ve▁seen▁.▁xxmaj▁however▁this▁comes▁from▁the▁fact▁that▁you▁ca▁n't▁make▁heads▁or▁xxunk▁of▁this▁mess▁.▁xxmaj▁it▁almost▁seems▁as▁a▁series▁of▁challenges▁set▁up▁to▁determine▁whether▁or▁not▁you▁are▁willing▁to▁walk▁out▁of▁the▁movie▁and▁give▁up▁the▁money▁you▁just▁paid▁.▁xxmaj▁if▁you▁do▁n't▁want▁to▁feel▁xxunk▁you▁'ll▁sit▁through▁this▁horrible▁film▁and▁develop▁a▁real▁sense▁of▁pity▁for▁the▁actors▁involved▁,▁they▁'ve▁all▁seen▁better▁days▁,▁but▁then▁you▁realize▁they▁actually▁got▁paid▁quite▁a▁bit▁of▁money▁to▁do▁this▁and▁you▁'ll▁lose▁pity▁for▁them▁just▁like▁you▁'ve▁already▁done▁for▁the▁film▁.▁i▁ca▁n't▁go▁on▁enough▁about▁this▁horrible▁movie▁,▁its▁almost▁something▁that▁xxmaj▁ed▁xxmaj▁wood▁would▁have▁made▁and▁in▁that▁case▁it▁surely▁would▁have▁been▁his▁masterpiece▁.▁\n\n▁xxmaj▁to▁start▁you▁are▁forced▁to▁sit▁through▁an▁opening▁dialogue▁the▁likes▁of▁which▁you▁'ve▁never▁seen▁/▁heard▁,▁this▁thing▁has▁got▁to▁be▁five▁minutes▁long▁.▁xxmaj▁on▁top▁of▁that▁it▁is▁narrated▁,▁as▁to▁suggest▁that▁you▁the▁viewer▁can▁not▁read▁.▁xxmaj▁then▁we▁meet▁xxmaj▁mr▁.▁xxmaj▁xxunk▁and▁the▁xxunk▁of▁terrible▁lines▁gets▁xxunk▁,▁it▁is▁as▁if▁he▁is▁operating▁solely▁to▁get▁lines▁on▁to▁the▁movie▁poster▁tag▁line▁.▁xxmaj▁soon▁we▁meet▁xxmaj▁stephen▁xxmaj▁xxunk▁,▁who▁i▁typically▁enjoy▁)▁and▁he▁does▁his▁best▁not▁to▁drown▁in▁this▁but▁ultimately▁he▁does▁.▁xxmaj▁then▁comes▁the▁ultimate▁insult▁,▁xxmaj▁tara▁xxmaj▁reid▁playing▁an▁intelligent▁role▁,▁oh▁help▁us▁!▁xxmaj▁tara▁xxmaj▁reid▁is▁not▁a▁very▁talented▁actress▁and▁somehow▁she▁xxunk▁gets▁roles▁in▁movies▁,▁in▁my▁opinion▁though▁she▁should▁stick▁to▁movies▁of▁the▁xxmaj▁american▁pie▁type▁.▁\n\n▁xxmaj▁all▁in▁all▁you▁just▁may▁want▁to▁see▁this▁for▁yourself▁when▁it▁comes▁out▁on▁video▁,▁i▁know▁that▁i▁got▁a▁kick▁out▁of▁it▁,▁i▁mean▁lets▁all▁be▁honest▁here▁,▁sometimes▁its▁comforting▁to▁xxunk▁in▁the▁shortcomings▁of▁others▁.",negative
5,"xxbos▁xxmaj▁from▁the▁start▁,▁you▁know▁how▁this▁movie▁will▁end▁.▁xxmaj▁it▁'s▁so▁full▁of▁clichés▁your▁typical▁xxup▁xxunk▁member▁will▁not▁even▁like▁this▁movie▁.▁i▁give▁it▁2▁out▁of▁10▁,▁only▁because▁of▁the▁acting▁of▁xxmaj▁william▁xxmaj▁xxunk▁.▁i▁ca▁n't▁believe▁people▁voted▁6▁+▁for▁this▁movie▁.▁xxmaj▁it▁'s▁so▁biased▁towards▁a▁'▁certain▁point▁of▁view▁'▁(▁once▁a▁thief▁...▁)▁.▁xxmaj▁people▁are▁n't▁born▁bad▁.▁xxmaj▁neither▁are▁they▁born▁good▁.▁xxmaj▁they▁are▁born▁with▁a▁clean▁xxunk▁.▁xxmaj▁it▁'s▁society▁,▁parents▁and▁education▁what▁makes▁them▁who▁they▁are▁.▁xxmaj▁and▁if▁they▁take▁the▁wrong▁turn▁,▁somewhere▁down▁the▁line▁,▁it▁certainly▁is▁n't▁going▁to▁be▁the▁xxmaj▁american▁xxunk▁system▁that▁gets▁them▁back▁on▁track▁!▁xxmaj▁anyway▁,▁avoid▁this▁movie▁like▁the▁plague▁.▁i▁bet▁you▁have▁better▁things▁to▁do▁with▁your▁time▁than▁waste▁it▁on▁this▁piece▁of▁crap▁.",negative
6,"xxbos▁xxmaj▁there▁were▁a▁lot▁of▁truly▁great▁horror▁movies▁produced▁in▁the▁seventies▁-▁but▁this▁film▁certainly▁is▁n't▁one▁of▁them▁!▁xxmaj▁it▁'s▁a▁shame▁xxmaj▁the▁xxmaj▁child▁is▁n't▁better▁as▁it▁works▁from▁a▁decent▁idea▁that▁takes▁in▁a▁couple▁of▁sometimes▁successful▁horror▁themes▁.▁xxmaj▁we▁have▁the▁idea▁of▁a▁xxunk▁child▁,▁which▁worked▁so▁well▁in▁classic▁films▁such▁as▁xxmaj▁the▁xxmaj▁bad▁xxmaj▁xxunk▁and▁then▁we▁have▁the▁central▁zombie▁theme▁,▁which▁of▁course▁has▁been▁the▁xxunk▁of▁many▁a▁successful▁horror▁movie▁.▁xxmaj▁the▁plot▁is▁basically▁this▁:▁young▁girl▁blames▁a▁load▁of▁people▁for▁the▁death▁of▁her▁mother▁,▁so▁she▁goes▁to▁the▁graveyard▁and▁raises▁the▁dead▁to▁get▁revenge▁(▁as▁you▁do▁)▁.▁xxmaj▁this▁is▁all▁well▁and▁good▁,▁except▁for▁the▁fact▁that▁it▁'s▁boring▁!▁xxmaj▁nothing▁happens▁for▁most▁of▁the▁film▁,▁and▁although▁it▁does▁pick▁up▁at▁the▁end▁with▁some▁nice▁gore▁;▁it▁'s▁not▁enough▁of▁a▁finale▁to▁justify▁sitting▁through▁the▁rest▁of▁it▁.▁xxmaj▁the▁film▁was▁obviously▁shot▁on▁a▁budget▁as▁the▁locations▁look▁cheap▁and▁all▁the▁actors▁are▁rubbish▁.▁xxmaj▁there▁'s▁really▁not▁much▁i▁can▁say▁about▁the▁film▁overall▁as▁there▁is▁n't▁much▁to▁it▁.▁xxmaj▁the▁xxmaj▁child▁is▁a▁xxunk▁seventies▁horror▁flick▁and▁i▁certainly▁do▁n't▁recommend▁it▁.",negative
7,"xxbos▁i▁was▁xxunk▁enough▁to▁meet▁xxmaj▁george▁xxmaj▁pal▁(▁and▁still▁have▁my▁xxup▁xxunk▁:▁xxunk▁poster▁xxunk▁by▁him▁)▁at▁a▁convention▁shortly▁after▁the▁release▁,▁and▁asked▁him▁why▁he▁chose▁to▁do▁the▁film▁""▁camp▁""▁.▁xxmaj▁before▁he▁could▁answer▁,▁two▁studio▁flacks▁xxunk▁and▁xxunk▁me▁on▁how▁the▁studio▁""▁knew▁best▁""▁and▁how▁""▁no▁one▁will▁take▁such▁a▁film▁seriously▁""▁.▁i▁had▁been▁reading▁the▁xxmaj▁bantam▁xxunk▁for▁a▁couple▁of▁years▁thanks▁to▁a▁friend▁(▁xxunk▁xxunk▁of▁the▁1970s▁will▁recall▁xxmaj▁xxunk▁and▁his▁band▁?▁i▁was▁in▁a▁couple▁of▁years▁of▁that▁with▁him▁)▁,▁and▁had▁higher▁hopes▁than▁what▁we▁got▁.▁\n\n▁xxmaj▁the▁flacks▁xxunk▁that▁no▁high▁adventure▁would▁ever▁be▁done▁seriously▁,▁and▁so▁doing▁'▁camp▁'▁was▁the▁only▁way▁.▁xxmaj▁several▁other▁fans▁jumped▁in▁on▁my▁side▁,▁with▁xxmaj▁pal▁listening▁as▁best▁he▁could▁.▁xxmaj▁at▁the▁end▁of▁the▁little▁event▁,▁xxmaj▁pal▁came▁up▁to▁us▁and▁xxunk▁,▁xxunk▁he▁could▁have▁done▁more▁and▁better▁.▁\n\n▁xxup▁star▁xxup▁wars▁put▁the▁lie▁to▁the▁flacks▁,▁and▁a▁year▁after▁xxmaj▁pal▁'s▁death▁,▁xxmaj▁spielberg▁and▁xxmaj▁lucas▁proved▁that▁xxmaj▁doc▁xxmaj▁savage▁could▁have▁easily▁been▁the▁next▁major▁movie▁franchise▁...▁if▁it▁had▁n't▁been▁for▁the▁flacks▁.▁\n\n▁xxmaj▁tear▁out▁the▁memory▁or▁history▁of▁xxmaj▁doc▁,▁and▁the▁film▁would▁have▁been▁worth▁a▁6▁/▁10▁rating▁as▁nothing▁more▁than▁a▁mindless▁popcorn▁xxunk▁.▁\n\n▁xxmaj▁but▁destroying▁the▁legacy▁like▁that▁was▁no▁less▁an▁xxunk▁than▁killing▁a▁baby▁in▁the▁xxunk▁.▁\n\n▁xxmaj▁doc▁xxmaj▁savage▁can▁still▁come▁to▁the▁screen▁,▁and▁survive▁the▁inevitable▁comparisons▁by▁the▁ill▁-▁informed▁to▁xxmaj▁indiana▁xxmaj▁jones▁,▁but▁it▁would▁have▁to▁be▁done▁in▁all▁xxunk▁and▁earnest▁to▁xxunk▁the▁glory▁that▁we▁should▁expect▁from▁the▁xxmaj▁first▁xxmaj▁american▁xxmaj▁superhero▁.▁\n\n▁xxup▁xxunk▁:▁xxmaj▁yes▁,▁there▁was▁a▁second▁script▁for▁xxup▁xxunk▁xxup▁of▁xxup▁evil▁,▁and▁it▁'s▁a▁lot▁more▁serious▁.▁xxmaj▁yes▁,▁there▁was▁xxunk▁footage▁shot▁,▁but▁mostly▁establishing▁shots▁and▁very▁little▁with▁actors▁.▁xxmaj▁and▁,▁yes▁,▁there▁_▁is▁_▁a▁one▁-▁xxunk▁of▁xxmaj▁ron▁xxmaj▁xxunk▁xxunk▁over▁a▁xxunk▁wall▁and▁xxunk▁at▁something▁over▁his▁shoulder▁with▁a▁specially▁built▁bronze▁xxunk▁.▁xxmaj▁xxunk▁'s▁wearing▁a▁xxunk▁over▁a▁button▁down▁white▁shirt▁with▁a▁bronze▁tie▁,▁and▁the▁words▁""▁doc▁xxup▁savage▁:▁xxup▁xxunk▁xxup▁of▁evil▁...▁coming▁xxmaj▁next▁xxmaj▁summer▁!▁""▁xxup▁xxunk▁:▁xxmaj▁if▁anyone▁knows▁who▁the▁studio▁flacks▁were▁that▁accompanied▁xxmaj▁george▁xxmaj▁pal▁in▁1975▁to▁xxmaj▁san▁xxmaj▁diego▁for▁the▁convention▁,▁xxunk▁the▁idiots▁up▁the▁side▁of▁the▁head▁and▁call▁them▁the▁idiots▁that▁they▁are▁.▁xxmaj▁at▁the▁time▁,▁they▁were▁doing▁xxunk▁and▁xxmaj▁fu▁xxmaj▁xxunk▁in▁xxunk▁and▁xxunk▁xxunk▁pants▁,▁and▁carrying▁xxmaj▁xxunk▁xxunk▁.",negative
8,"xxbos▁xxmaj▁this▁film▁is▁the▁freshman▁effort▁of▁xxmaj▁xxunk▁xxmaj▁xxunk▁and▁her▁new▁production▁company▁.▁xxmaj▁while▁it▁suffers▁from▁a▁few▁problems▁,▁as▁every▁low▁budget▁production▁does▁,▁it▁is▁a▁good▁start▁for▁xxmaj▁ms▁.▁xxmaj▁xxunk▁and▁her▁company▁.▁\n\n▁xxmaj▁the▁story▁is▁not▁terribly▁new▁having▁been▁done▁in▁films▁like▁xxmaj▁the▁xxmaj▁burning▁and▁every▁xxmaj▁friday▁the▁13th▁since▁part▁2▁.▁xxmaj▁but▁,▁the▁performances▁are▁heartfelt▁.▁xxmaj▁so▁many▁big▁budget▁movies▁just▁have▁the▁actors▁going▁through▁the▁motions▁,▁its▁always▁nice▁to▁see▁actors▁really▁trying▁to▁xxunk▁their▁craft▁.▁\n\n▁xxmaj▁the▁story▁deals▁with▁the▁xxunk▁possible▁return▁)▁of▁a▁disfigured▁xxunk▁.▁xxmaj▁the▁others▁are▁xxunk▁to▁xxunk▁,▁but▁the▁xxunk▁of▁the▁event▁xxunk▁each▁person▁in▁different▁directions▁in▁their▁xxunk▁.▁xxmaj▁ten▁years▁later▁,▁the▁friends▁are▁murdered▁one▁by▁one▁by▁a▁gruesome▁xxunk▁known▁as▁""▁the▁xxmaj▁xxunk▁""▁.▁xxmaj▁who▁will▁survive▁?▁xxmaj▁you▁have▁to▁watch▁.▁\n\n▁xxmaj▁if▁you▁are▁xxmaj▁roger▁xxmaj▁ebert▁or▁any▁number▁of▁arrogant▁critics▁,▁you▁probably▁should▁n't▁bother▁.▁xxmaj▁but▁if▁your▁taste▁run▁more▁towards▁xxmaj▁joe▁xxmaj▁bob▁xxmaj▁briggs▁and▁you▁want▁to▁see▁a▁group▁of▁people▁xxunk▁their▁craft▁,▁then▁check▁out▁""▁the▁xxmaj▁xxunk▁""▁.",negative
9,"xxbos▁xxmaj▁greg▁xxmaj▁davis▁and▁xxmaj▁xxunk▁xxmaj▁xxunk▁take▁some▁xxunk▁statements▁by▁a▁xxunk▁,▁add▁some▁commentary▁by▁a▁bunch▁of▁xxunk▁-▁right▁xxunk▁,▁xxunk▁the▁most▁extreme▁positions▁of▁the▁most▁xxunk▁xxmaj▁xxunk▁on▁the▁planet▁to▁everyone▁who▁calls▁themselves▁a▁xxmaj▁xxunk▁,▁and▁presents▁this▁as▁the▁xxunk▁of▁xxmaj▁islam▁.▁xxmaj▁maybe▁their▁next▁film▁will▁involve▁xxunk▁xxmaj▁fred▁xxmaj▁phelps▁and▁the▁xxunk▁of▁the▁xxmaj▁xxunk▁xxmaj▁baptist▁xxmaj▁church▁,▁adding▁commentary▁by▁some▁xxunk▁atheist▁""▁xxunk▁,▁and▁call▁their▁film▁""▁what▁the▁xxmaj▁world▁xxmaj▁needs▁to▁xxmaj▁know▁xxmaj▁about▁xxmaj▁christianity▁.▁""▁xxmaj▁ultimately▁,▁this▁film▁suffers▁from▁both▁poor▁production▁values▁and▁lack▁of▁attention▁to▁the▁most▁basic▁standards▁of▁xxunk▁.▁xxmaj▁do▁n't▁waste▁your▁time▁and▁money▁;▁just▁turn▁on▁your▁xxup▁am▁radio▁and▁listen▁to▁xxmaj▁rush▁xxmaj▁xxunk▁for▁a▁couple▁of▁days▁for▁free▁and▁you▁'ll▁get▁the▁same▁message▁with▁the▁same▁level▁of▁intellectual▁analysis▁.",negative
