In [None]:
#export
from local.imports import *
from local.test import *
from local.core import *
from local.data.transform import *
from local.data.core import *
from local.data.source import *
from local.data.external import *
from local.data.pipeline import *
from local.text.core import *
from local.notebook.showdoc import show_doc

In [None]:
#default_exp text.data
#default_cls_lvl 3

# Text data

> Functions and transforms to help gather text data in a `DataSource

## Numericalizing

In [None]:
def make_vocab(count, min_freq=3, max_vocab=60000):
    "Create a vocab of `max_vocab` size from `Counter` `count` with items present more than `min_freq`"
    vocab = [o for o,c in count.most_common(max_vocab) if c >= min_freq]
    for o in reversed(defaults.text_spec_tok): #Make sure all special tokens are in the vocab
        if o in vocab: vocab.remove(o)
        vocab.insert(0, o)
    vocab = vocab[:max_vocab]
    if len(vocab) < max_vocab and len(vocab)%8 != 0: 
        #Make sure vocab size is a multiple of 8 for fast mixed precision training
        vocab += ['xxfake' for _ in range(0, 8-len(vocab)%8)]
    return vocab

In [None]:
count = Counter(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'd'])
test_eq(set(make_vocab(count)), set(defaults.text_spec_tok + 'a xxfake'.split()))
test_eq(len(make_vocab(count))%8, 0)
test_eq(set(make_vocab(count, min_freq=1)), set(defaults.text_spec_tok + 'a b c d xxfake'.split()))
test_eq(set(make_vocab(count,max_vocab=12, min_freq=1)), set(defaults.text_spec_tok + 'a b c'.split()))

In [None]:
# export
class Numericalize(ItemTransform):
    "Reversible transform of tokenized texts to numericalized ids"
    def __init__(self, vocab=None, min_freq=3, max_vocab=60000, sep=None):
        self.sep = sep or defaults.text_token_sep
        self.vocab,self.min_freq,self.max_vocab = vocab,min_freq,max_vocab
        self.o2i = None if vocab is None else defaultdict(int, {v:k for k,v in enumerate(vocab)})
    
    def setup(self, dsrc):
        if dsrc is None: return
        if self.vocab is None:
            dsrc = getattr(dsrc,'train',dsrc)
            count = Counter(p for o in dsrc for p in o.split(self.sep))
            self.vocab = make_vocab(count, min_freq=self.min_freq, max_vocab=self.max_vocab)
            self.o2i = defaultdict(int, {v:k for k,v in enumerate(self.vocab) if v != 'xxfake'})

    def encodes(self, o):      return [self.o2i[o_] for o_ in o.split(self.sep)]
    def decodes(self, o)->Str: return self.sep.join([self.vocab[o_] for o_ in o if self.vocab[o_] != PAD])

In [None]:
num = Numericalize(min_freq=1, sep=' ')
num.setup(L('This is an example of text', 'this is another text'))
test_eq(set(num.vocab), set(defaults.text_spec_tok + 'This is an example of text this another xxfake'.split()))
test_eq(len(num.vocab)%8, 0)
start = 'This is an example of text'
t = num(start)
test_eq(t, [11, 9, 12, 13, 14, 10])
test_eq(num.decode(t), start)

In [None]:
num = Numericalize(min_freq=2, sep=' ')
num.setup(L('This is an example of text', 'this is another text'))
test_eq(set(num.vocab), set(defaults.text_spec_tok + 'is text xxfake'.split()))
test_eq(len(num.vocab)%8, 0)
t = num(start)
test_eq(t, [0, 9, 0, 0, 0, 10])
test_eq(num.decode(t), f'{UNK} is {UNK} {UNK} {UNK} text')

## LMPreloader -

In [None]:
class LMCollate():
    def __init__(self, bs=64, seq_len=72): self.bs,self.seq_len,self.offset = bs,seq_len,None
    def __call__(self, samples):
        #Samples has more than bs elements if more than one text is needed to make one of the batch
        i,res,s_len,s_txt = 0,[],0,[]
        for s in samples:
            s = tensor(s).long()
            l = self.seq_len-s_len
            s_txt.append(s[0][self.offset[i]:self.offset[i]+l+1])
            s_len += len(s_txt[-1])
            self.offset[i] = self.offset[i]+l if self.offset[i]+l < len(s[0]) else 0
            if s_len >= self.seq_len+1:
                i += 1
                res.append(torch.cat(s_txt))
                s_len,s_txt = 0,[]
        res = torch.stack(res, dim=0)
        return res[:,:-1],res[:,1:]

In [None]:
items = [(range(21),), (range(32),), (range(10),), (range(16),), (range(26),)]
cumlen = tensor([len(t) for t in items]).cumsum(0)
tst = LMCollate(bs=5, seq_len=10)
tst.offset = [0,0,21,0,5]
res = tst([items[0], items[1], items[1], items[3], items[4]])

for i in [0,1,3]: 
    test_eq(res[0][i], tensor(range(10)))
    test_eq(res[1][i], tensor(range(1,11)))
test_eq(res[0][2], tensor(range(21,31)))
test_eq(res[1][2], tensor(range(22,32)))
test_eq(res[0][4], tensor(range(5,15)))
test_eq(res[1][4], tensor(range(6,16)))
test_eq(tst.offset, [10, 10, 31, 10, 15])

res = tst([items[0], items[1], items[1], items[2], items[3], items[4], items[4]])
for i in [0,1]: 
    test_eq(res[0][i], tensor(range(10,20)))
    test_eq(res[1][i], tensor(range(11,21)))
test_eq(res[0][2], torch.cat([tensor([31]), tensor(range(9))]))
test_eq(res[1][2], tensor(range(10)))
test_eq(res[0][3], torch.cat([tensor(range(10,16)), tensor(range(4))]))
test_eq(res[1][3], torch.cat([tensor(range(11,16)), tensor(range(5))]))
test_eq(res[0][4], tensor(range(15,25)))
test_eq(res[1][4], tensor(range(16,26)))
test_eq(tst.offset, [20, 20, 9, 4, 25])

In [None]:
class LMSampler(BatchSampler):
    def __init__(self, ds, cf, lengths=None, bs=64, seq_len=72, shuffle=False):
        self.ds,self.cf,self.bs,self.seq_len,self.shuffle = ds,cf,bs,seq_len,shuffle
        self.lengths = [len(o[0]) for o in ds] if lengths is None else lengths
        self.n_batch = sum(self.lengths) // bs
    
    def __iter__(self):
        self.batchify()
        for i in range(0, self.n_batch-1, self.seq_len):
            idx = tensor(range(self.bs)) * self.n_batch + i + self.seq_len
            end_idx = len(self.cumlen) - (self.cumlen[:,None] > idx[None]).sum(0)
            s_idx = [list(range(i1,i2+1)) for (i1,i2) in zip(self.start_idx, end_idx)]
            yield [self.idxs[i] for s in s_idx for i in s]
            self.start_idx = end_idx
        
    def batchify(self):
        self.idxs = torch.randperm(len(self.ds)) if self.shuffle else tensor(range(len(self.ds)))
        self.cumlen = (tensor(self.lengths)[self.idxs] if self.shuffle else tensor(self.lengths)).cumsum(0)
        idx = tensor(range(self.bs)) * self.n_batch
        self.start_idx = len(self.cumlen) - (self.cumlen[:,None] > idx[None]).sum(0)
        self.cf.offset = idx - torch.cat([tensor([0]), self.cumlen])[self.start_idx]
        
    def __len__(self): return (self.n_batch-1) // self.seq_len

In [None]:
items = [(range(21),), (range(32),), (range(10),), (range(16),), (range(26),)]
cf = LMCollate(bs=5, seq_len=10)
s = LMSampler(items, cf, bs=5, seq_len=10)
s.batchify()
test_eq(cf.offset, tensor([0,0,21,0,5]))
itr = iter(s)

b1 = next(itr)
test_eq(b1, [0, 1, 1, 3, 4])

b2 = next(itr)
test_eq(b2, [0, 1, 1, 2, 3, 4, 4])

test_fail(lambda: next(itr))

In [None]:
#TODO: make better
cf = LMCollate(bs=5, seq_len=10)
s = LMSampler(items, cf, bs=5, seq_len=10, shuffle=True)
itr = iter(s)
b1 = next(itr)
b2 = next(itr)
test_fail(lambda: next(itr))

In [None]:
#export
from IPython.display import display, HTML

class TensorText(TensorBase):
    
    def get_ctxs(self, max_samples=10, **kwargs):
        n_samples = min(self.shape[0], max_samples)
        df = pd.DataFrame({'index': range(n_samples)})
        return [df.iloc[i] for i in range(n_samples)]
    
    def display(self, ctxs):
        df = pd.DataFrame(ctxs)
        with pd.option_context('display.max_colwidth', -1): 
            display(HTML(df.to_html(index=False)))

In [None]:
#Other approach
class LM_PreLoader(GetAttr):
    "An intermediate between a dataset with texts and a DataLoader"
    _xtra = ['show', 'decode', 'show_at', 'decode_at', 'decode_batch']
    def __init__(self, ds, lengths=None, bs=64, seq_len=70, shuffle=False):
        self.ds,self.bs,self.seq_len,self.shuffle = ds,bs,seq_len,shuffle
        self.lengths = [len(o[0]) for o in ds] if lengths is None else lengths
        self.n_batch = sum(self.lengths) // bs
        self.batchify()
        self.default = self.ds
    
    def __len__(self): return ((self.n_batch-1) // self.seq_len) * self.bs
    
    def __getitem__(self, i):
        k = (i % self.bs) * self.n_batch + (i // self.bs) * self.seq_len
        item_idx = (self.cumlen > k).nonzero().min().item()
        offset = k if item_idx==0 else k-self.cumlen[item_idx-1]
        text = self.ds[self.idxs[item_idx]][0][offset:]
        while len(text) <= self.seq_len:
            item_idx += 1
            text += self.ds[self.idxs[item_idx]][0]
        return TensorText(tensor(text[:self.seq_len])),TensorText(tensor(text[1:self.seq_len+1]))
    
    def batchify(self):
        self.idxs = torch.randperm(len(ds)) if self.shuffle else tensor(range(len(self.ds)))
        self.cumlen = (tensor(self.lengths)[self.idxs] if self.shuffle else tensor(self.lengths)).cumsum(0)

In [None]:
lengths = [10,7,19,23,5,42]
ds = LM_PreLoader([(list(range(l)), 0) for l in lengths], lengths=lengths, bs=5, seq_len=4)
x,y = ds[0]
test_eq(x[1:], y[:-1])
test_eq(x+1, y)
#Going on the seq dimension reads the text in order
test_eq(torch.cat([ds[5*i][0] for i in range(5)]), 
        tensor(list(range(10))+list(range(7))+list(range(3))))
#3 is skipped for the next sample in the natch since it's the last target
test_eq(torch.cat([ds[5*i+1][0] for i in range(5)]),
        tensor(list(range(4,19))+list(range(5))))

In [None]:
class LM_Dataset(GetAttr):
    _xtra = ['show', 'decode', 'show_at', 'decode_at', 'decode_batch']
    def __init__(self, ds, lens=None, bs=64, seq_len=70, shuffle=False, cache=None):
        self.default = self.ds = ReindexCollection(ds, cache=cache)
        self.bs,self.seq_len,self.shuffle = bs,seq_len,shuffle
        if lens is None: lens = [len(o[0]) for o in ds]
        self.lens = ReindexCollection(lens, idxs=self.ds.idxs)
        # The "-1" is to allow for final label
        self.n = round_multiple(sum(lens)-1, bs*seq_len, round_down=True)
        self.cs = self.n//bs   # chunk size
        self.reset()
        
    def reset(self):
        if self.shuffle: self.ds.shuffle()
        self.cum_lens = np.cumsum(self.lens)
        
    def get_loc(self, sl, b, it, xy):
        tokidx = sl*self.seq_len + b*self.cs + it + xy
        docidx = np.searchsorted(self.cum_lens, tokidx+1)
        return self.ds[docidx][0][tokidx-self.cum_lens[docidx]]

    def __len__(self): return self.n//(self.bs*self.seq_len)
    def __getitem__(self, sl):
        if sl>=len(self): raise IndexError
        return [[[self.get_loc(sl, b, it, xy) for it in range(self.seq_len)]
                for b in range(self.bs)] for xy in range(2)]

In [None]:
bs,sl = 4,3
src = 'abcde/ fghijk/ lmnopqrs/ tuv/ wxy/ z/ '
src += src.upper()
txts = [[o] for o in src.split()]

In [None]:
t = LM_Dataset(txts, bs=bs, seq_len=sl)
list(t)

[[[['a', 'b', 'c'], ['n', 'o', 'p'], ['z', '/', 'A'], ['L', 'M', 'N']],
  [['b', 'c', 'd'], ['o', 'p', 'q'], ['/', 'A', 'B'], ['M', 'N', 'O']]],
 [[['d', 'e', '/'], ['q', 'r', 's'], ['B', 'C', 'D'], ['O', 'P', 'Q']],
  [['e', '/', 'f'], ['r', 's', '/'], ['C', 'D', 'E'], ['P', 'Q', 'R']]],
 [[['f', 'g', 'h'], ['/', 't', 'u'], ['E', '/', 'F'], ['R', 'S', '/']],
  [['g', 'h', 'i'], ['t', 'u', 'v'], ['/', 'F', 'G'], ['S', '/', 'T']]],
 [[['i', 'j', 'k'], ['v', '/', 'w'], ['G', 'H', 'I'], ['T', 'U', 'V']],
  [['j', 'k', '/'], ['/', 'w', 'x'], ['H', 'I', 'J'], ['U', 'V', '/']]],
 [[['/', 'l', 'm'], ['x', 'y', '/'], ['J', 'K', '/'], ['/', 'W', 'X']],
  [['l', 'm', 'n'], ['y', '/', 'z'], ['K', '/', 'L'], ['W', 'X', 'Y']]]]

In [None]:
t = LM_Dataset(txts, bs=bs, seq_len=sl, shuffle=True)
list(t)

[[[['F', 'G', 'H'], ['C', 'D', 'E'], ['s', '/', 'z'], ['O', 'P', 'Q']],
  [['G', 'H', 'I'], ['D', 'E', '/'], ['/', 'z', '/'], ['P', 'Q', 'R']]],
 [[['I', 'J', 'K'], ['/', 't', 'u'], ['/', 'T', 'U'], ['R', 'S', '/']],
  [['J', 'K', '/'], ['t', 'u', 'v'], ['T', 'U', 'V'], ['S', '/', 'Z']]],
 [[['/', 'a', 'b'], ['v', '/', 'l'], ['V', '/', 'w'], ['Z', '/', 'W']],
  [['a', 'b', 'c'], ['/', 'l', 'm'], ['/', 'w', 'x'], ['/', 'W', 'X']]],
 [[['c', 'd', 'e'], ['m', 'n', 'o'], ['x', 'y', '/'], ['X', 'Y', '/']],
  [['d', 'e', '/'], ['n', 'o', 'p'], ['y', '/', 'L'], ['Y', '/', 'f']]],
 [[['/', 'A', 'B'], ['p', 'q', 'r'], ['L', 'M', 'N'], ['f', 'g', 'h']],
  [['A', 'B', 'C'], ['q', 'r', 's'], ['M', 'N', 'O'], ['g', 'h', 'i']]]]

In [None]:
t = LM_Dataset(string.ascii_lowercase[:25], bs=bs, seq_len=sl)
list(t),len(t)

([[[['a', 'b', 'c'], ['g', 'h', 'i'], ['m', 'n', 'o'], ['s', 't', 'u']],
   [['b', 'c', 'd'], ['h', 'i', 'j'], ['n', 'o', 'p'], ['t', 'u', 'v']]],
  [[['d', 'e', 'f'], ['j', 'k', 'l'], ['p', 'q', 'r'], ['v', 'w', 'x']],
   [['e', 'f', 'g'], ['k', 'l', 'm'], ['q', 'r', 's'], ['w', 'x', 'y']]]],
 2)

In [None]:
t = LM_Dataset(string.ascii_lowercase[:24], bs=bs, seq_len=sl)
list(t),len(t)

([[[['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i'], ['j', 'k', 'l']],
   [['b', 'c', 'd'], ['e', 'f', 'g'], ['h', 'i', 'j'], ['k', 'l', 'm']]]],
 1)

In [None]:
#Other approach v2
class LMBatchDS(BatchDS):
    "An intermediate between a dataset with texts and a DataLoader"
    def __init__(self, ds, lengths=None, bs=64, seq_len=70, shuffle=False, sampler=None):
        super().__init__(ds, bs=bs, shuffle=shuffle, sampler=sampler)
        self.bs,self.seq_len = bs,seq_len
        self.lengths = [len(o[0]) for o in ds] if lengths is None else lengths
        self.n_batch = sum(self.lengths) // bs
    
    def __len__(self): return ((self.n_batch-1) // self.seq_len)
    
    def get_batches(self, idxs):
        self.idxs = sum(idxs, [])
        assert self.nw < 2, "LMBatchDS doesn't support multiprocessing"
        cumlen = tensor(self.lengths)[self.idxs].cumsum(0)
        for i in range(0, self.n_batch-1, self.seq_len):
            src_idx = tensor(range(self.bs)) * self.n_batch + i
            item_idx = len(cumlen) - (cumlen[:,None] > src_idx[None]).sum(0)
            offset = src_idx - torch.cat([tensor([0]), cumlen])[item_idx]
            yield [self._get_text(i, o) for (i,o) in zip(item_idx, offset)]
        
    def _get_text(self, i, offset):
        text = self.ds[self.idxs[i]][0][offset:]
        while len(text) <= self.seq_len:
            i += 1
            text += self.ds[self.idxs[i]][0]
        return TensorText(tensor(text[:self.seq_len])),TensorText(tensor(text[1:self.seq_len+1]))

In [None]:
lengths = [10,7,19,23,5,42]
ds = LMBatchDS([(list(range(l)), 0) for l in lengths], lengths=lengths, bs=5, seq_len=4)

In [None]:
itr = iter(ds)
a = next(itr)
a

[(tensor([0, 1, 2, 3]), tensor([1, 2, 3, 4])),
 (tensor([4, 5, 6, 7]), tensor([5, 6, 7, 8])),
 (tensor([6, 7, 8, 9]), tensor([ 7,  8,  9, 10])),
 (tensor([4, 0, 1, 2]), tensor([0, 1, 2, 3])),
 (tensor([20, 21, 22, 23]), tensor([21, 22, 23, 24]))]

In [None]:
dl = dataloader(ds, num_workers=0)

In [None]:
itr = iter(dl)
a = next(itr)

In [None]:
a

[tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 6,  7,  8,  9],
         [ 4,  0,  1,  2],
         [20, 21, 22, 23]]), tensor([[ 1,  2,  3,  4],
         [ 5,  6,  7,  8],
         [ 7,  8,  9, 10],
         [ 0,  1,  2,  3],
         [21, 22, 23, 24]])]

In [None]:
b = next(itr)
b

[tensor([[ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [10, 11, 12, 13],
         [ 3,  4,  5,  6],
         [24, 25, 26, 27]]), tensor([[ 5,  6,  7,  8],
         [ 9, 10, 11, 12],
         [11, 12, 13, 14],
         [ 4,  5,  6,  7],
         [25, 26, 27, 28]])]

## Integration example

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
df = pd.read_csv(path/'texts.csv')

In [None]:
df.head()

In [None]:
df_tok,count = tokenize_df(df, 'text')

In [None]:
df_tok.head()

Unnamed: 0,label,is_valid,text,text_lengths
0,negative,False,xxbos▁xxmaj▁un▁-▁bleeping▁-▁believable▁!▁xxmaj...,103.0
1,positive,False,xxbos▁xxmaj▁this▁is▁a▁extremely▁well▁-▁made▁fi...,462.0
2,negative,False,xxbos▁xxmaj▁every▁once▁in▁a▁long▁while▁a▁movie...,220.0
3,positive,False,xxbos▁xxmaj▁name▁just▁says▁it▁all▁.▁i▁watched▁...,184.0
4,negative,False,xxbos▁xxmaj▁this▁movie▁succeeds▁at▁being▁one▁o...,398.0


In [None]:
texts,lengths = df_tok['text'].values,df_tok['text_lengths'].map(int).values

In [None]:
splits = RandomSplitter()(L(t for t in texts))
dsrc = DataSource(L(t for t in texts), type_tfms=[Numericalize(make_vocab(count))], filts=splits)

In [None]:
dsrc.decode_at(0)

("xxbos▁xxmaj▁un▁-▁xxunk▁-▁believable▁!▁xxmaj▁meg▁xxmaj▁ryan▁does▁n't▁even▁look▁her▁usual▁xxunk▁lovable▁self▁in▁this▁,▁which▁normally▁makes▁me▁forgive▁her▁shallow▁xxunk▁acting▁xxunk▁.▁xxmaj▁hard▁to▁believe▁she▁was▁the▁producer▁on▁this▁dog▁.▁xxmaj▁plus▁xxmaj▁kevin▁xxmaj▁kline▁:▁what▁kind▁of▁suicide▁trip▁has▁his▁career▁been▁on▁?▁xxmaj▁xxunk▁...▁xxmaj▁xxunk▁!▁!▁!▁xxmaj▁finally▁this▁was▁directed▁by▁the▁guy▁who▁did▁xxmaj▁big▁xxmaj▁xxunk▁?▁xxmaj▁must▁be▁a▁replay▁of▁xxmaj▁jonestown▁-▁hollywood▁style▁.▁xxmaj▁xxunk▁!",)

In [None]:
bs = 64
ds = LM_PreLoader(dsrc.train, lengths=lengths[splits[0]], bs=64, seq_len=512)
dl = TfmdDL(ds, bs=bs, num_workers=0)

In [None]:
%time x,y = dl.one_batch()

CPU times: user 43 ms, sys: 3.38 ms, total: 46.3 ms
Wall time: 45.5 ms


In [None]:
ds = LM_Dataset(dsrc.train, lens=lengths[splits[0]], bs=64, seq_len=512)
dl = TfmdDL(ds, bs=bs, num_workers=0)

In [None]:
%time x,y = dl.one_batch()

CPU times: user 1min 39s, sys: 2.89 s, total: 1min 42s
Wall time: 1min 38s


In [None]:
dl.decode_batch((x,y))

(#10) [('xxbos▁xxmaj▁this▁is▁a▁really▁interesting▁movie▁.▁xxmaj▁it▁is▁an▁action▁movie▁with▁comedy▁mixed▁in▁.▁xxmaj▁foxx▁teams▁up▁with▁comedian▁xxmaj▁xxunk▁in▁this▁movie▁to▁give▁it▁a▁comedic▁spin▁.▁xxmaj▁it▁will▁keep▁you▁wondering▁what▁s▁going▁to▁happen▁to▁xxmaj▁foxx▁next▁.▁xxmaj▁it▁was▁a▁well▁shot▁movie▁,▁the▁director▁used▁the▁right▁colors▁in▁this',),('confusion▁and▁disappointment▁on▁their▁faces▁.▁\n\n▁xxmaj▁to▁the▁makers▁of▁this▁movie▁,▁you▁owe▁me▁two▁hours▁.▁xxbos▁a▁man▁xxunk▁to▁life▁alone▁after▁45▁years▁of▁marriage▁.▁xxmaj▁he▁also▁has▁to▁solve▁the▁problem▁of▁the▁family▁xxunk▁xxunk▁,▁xxmaj▁tulip▁,▁which▁refuses▁to▁allow▁itself▁to▁be▁xxunk▁.▁xxmaj▁until▁,▁that▁is▁,▁he▁xxunk▁his',),('xxunk▁,▁and▁xxunk▁knock▁it▁out▁of▁the▁number▁one▁spot▁.▁xxmaj▁every▁time▁i▁hear▁someone▁declare▁"▁titanic▁"▁is▁the▁greatest▁film▁they▁\'ve▁ever▁seen▁,▁i▁think▁to▁myself▁,▁"▁you▁do▁n\'t▁see▁a▁lot▁of▁movies▁,▁do▁you▁?▁"▁xxmaj▁what▁a▁travesty▁.▁xxmaj▁you▁could▁make▁50▁good▁films▁that▁are▁a▁lot▁better▁than▁"',

In [None]:
ds.decode((x[0],))

('xxbos▁xxmaj▁this▁is▁a▁really▁interesting▁movie▁.▁xxmaj▁it▁is▁an▁action▁movie▁with▁comedy▁mixed▁in▁.▁xxmaj▁foxx▁teams▁up▁with▁comedian▁xxmaj▁xxunk▁in▁this▁movie▁to▁give▁it▁a▁comedic▁spin▁.▁xxmaj▁it▁will▁keep▁you▁wondering▁what▁s▁going▁to▁happen▁to▁xxmaj▁foxx▁next▁.▁xxmaj▁it▁was▁a▁well▁shot▁movie▁,▁the▁director▁used▁the▁right▁colors▁in▁this',)

### Showing

In [None]:
dsrc = DataSource(L(t for t in texts), type_tfms=[Numericalize(make_vocab(count))], filts=splits)
bs = 16
ds = LM_PreLoader(dsrc.train, lengths=lengths[splits[0]], bs=bs)
dl = TfmdDL(ds, bs=bs, num_workers=0)

In [None]:
dl.show_batch()

index,text
0,"xxbos▁xxmaj▁this▁is▁a▁really▁interesting▁movie▁.▁xxmaj▁it▁is▁an▁action▁movie▁with▁comedy▁mixed▁in▁.▁xxmaj▁foxx▁teams▁up▁with▁comedian▁xxmaj▁xxunk▁in▁this▁movie▁to▁give▁it▁a▁comedic▁spin▁.▁xxmaj▁it▁will▁keep▁you▁wondering▁what▁s▁going▁to▁happen▁to▁xxmaj▁foxx▁next▁.▁xxmaj▁it▁was▁a▁well▁shot▁movie▁,▁the▁director▁used▁the▁right▁colors▁in▁this"
1,"confusion▁and▁disappointment▁on▁their▁faces▁.▁\n\n▁xxmaj▁to▁the▁makers▁of▁this▁movie▁,▁you▁owe▁me▁two▁hours▁.▁xxbos▁a▁man▁xxunk▁to▁life▁alone▁after▁45▁years▁of▁marriage▁.▁xxmaj▁he▁also▁has▁to▁solve▁the▁problem▁of▁the▁family▁xxunk▁xxunk▁,▁xxmaj▁tulip▁,▁which▁refuses▁to▁allow▁itself▁to▁be▁xxunk▁.▁xxmaj▁until▁,▁that▁is▁,▁he▁xxunk▁his"
2,"xxunk▁,▁and▁xxunk▁knock▁it▁out▁of▁the▁number▁one▁spot▁.▁xxmaj▁every▁time▁i▁hear▁someone▁declare▁""▁titanic▁""▁is▁the▁greatest▁film▁they▁'ve▁ever▁seen▁,▁i▁think▁to▁myself▁,▁""▁you▁do▁n't▁see▁a▁lot▁of▁movies▁,▁do▁you▁?▁""▁xxmaj▁what▁a▁travesty▁.▁xxmaj▁you▁could▁make▁50▁good▁films▁that▁are▁a▁lot▁better▁than▁"""
3,"bad▁-▁guy▁-▁like▁things▁,▁just▁in▁case▁you▁did▁n't▁quite▁catch▁on▁.▁xxmaj▁the▁good▁guy▁does▁brave▁and▁noble▁things▁just▁in▁case▁you▁did▁n't▁know▁he▁was▁the▁good▁guy▁.▁xxmaj▁and▁oh▁lord▁,▁the▁plane▁scene▁.▁""▁my▁xxup▁husband▁xxup▁is▁xxup▁missing▁!▁xxup▁oh▁xxup▁my▁xxup▁god▁!▁xxup▁it▁'s▁xxup▁impossible▁xxup"
4,",▁unrealistic▁dialogue▁.▁xxmaj▁that▁it▁got▁as▁many▁positive▁votes▁suggests▁some▁people▁may▁be▁joking▁.▁xxbos▁an▁very▁good▁storyline▁,▁good▁xxunk▁to▁it▁...▁but▁the▁10▁last▁seconds▁destroyed▁the▁whole▁movie▁...▁what▁happened▁?▁extremely▁well▁made▁and▁an▁good▁story▁destroyed▁in▁the▁last▁seconds▁...▁sorry▁to▁say▁but▁a▁1▁in▁vote▁...▁that▁s▁what▁it▁it"
5,models▁.▁xxmaj▁even▁the▁shooting▁was▁bad▁-▁in▁one▁scene▁the▁xxmaj▁xxunk▁xxunk▁his▁six▁-▁shooter▁about▁nine▁times▁and▁could▁n't▁hit▁any▁of▁three▁large▁men▁who▁were▁only▁about▁twenty▁feet▁away▁.▁i▁had▁to▁turn▁it▁off▁after▁about▁15▁minutes▁of▁this▁xxunk▁.▁xxmaj▁perhaps▁those▁who▁xxunk▁in▁this▁movie▁could▁have▁taken▁some▁lessons▁at▁the▁xxmaj
6,"and▁never▁develops▁(▁just▁because▁the▁subject▁matter▁is▁deep▁,▁does▁n't▁mean▁the▁story▁is▁good▁)▁.▁a▁good▁story▁has▁dynamic▁characters▁.▁a▁dynamic▁character▁is▁one▁that▁experiences▁a▁major▁character▁change▁,▁and▁is▁xxunk▁for▁that▁change▁over▁the▁course▁of▁the▁movie▁.▁xxmaj▁in▁xxmaj▁georgia▁xxmaj▁rule▁,▁the▁character▁changes▁were▁abrupt▁and▁undeveloped▁.▁xxmaj▁secondly"
7,"project▁with▁its▁shaky▁camera▁work▁and▁xxmaj▁the▁xxmaj▁texas▁xxmaj▁chainsaw▁xxmaj▁massacre▁in▁its▁bare▁-▁bones▁approach▁to▁the▁material▁but▁succeeds▁at▁being▁neither▁.▁xxmaj▁what▁we▁get▁instead▁are▁xxunk▁scenes▁of▁chase▁,▁torture▁,▁release▁;▁chase▁torture▁,▁release▁,▁in▁that▁order▁for▁xxunk▁minutes▁with▁long▁interludes▁of▁xxunk▁,▁xxunk▁,▁and▁incoherent▁rambling▁acting▁as▁plenty"
8,"now▁and▁rent▁it▁!▁(▁do▁n't▁forget▁popcorn▁!▁)▁xxbos▁i▁saw▁this▁movie▁years▁ago▁on▁late▁night▁television▁.▁xxmaj▁back▁then▁it▁went▁by▁the▁title▁of▁""▁xxunk▁to▁xxmaj▁heaven▁""▁.▁xxmaj▁even▁as▁a▁young▁boy▁,▁i▁remember▁being▁deeply▁moved▁by▁the▁story▁and▁xxunk▁by▁the▁visual▁effects▁of▁the▁court▁trial▁(▁those▁who▁have▁seen"
9,"are▁interesting▁and▁have▁some▁historical▁value▁as▁evidence▁of▁what▁this▁environment▁looked▁like▁in▁1930▁.▁xxmaj▁the▁shots▁of▁the▁xxmaj▁xxunk▁and▁other▁natives▁are▁also▁interesting▁,▁although▁these▁suffer▁from▁the▁xxmaj▁johnsons▁'▁xxunk▁to▁stage▁events▁in▁a▁manner▁that▁makes▁the▁natives▁look▁'▁wild▁'▁and▁alien▁.▁\n\n▁xxmaj▁the▁best▁(▁and▁funniest▁)▁scene▁in▁'▁congorilla"


## Classification

In [None]:
def pad_collate(samples, pad_idx=1, pad_first=True, backwards=False):
    "Function that collect samples and adds padding. Flips token order if needed"
    max_len = max([len(s[0]) for s in samples])
    res = torch.zeros(len(samples), max_len).long() + pad_idx
    if backwards: pad_first = not pad_first
    for i,s in enumerate(samples):
        if pad_first: res[i,-len(s[0]):] = LongTensor(s[0])
        else:         res[i,:len(s[0]):] = LongTensor(s[0])
    if backwards: res = res.flip(1)
    return TensorTextBase(res), tensor(np.array([s[1] for s in samples]))

In [None]:
splits = RandomSplitter()(range(len(df)))
_get_txt = lambda i: df_tok["text"][i]
_get_lbl = lambda i: df_tok["label"][i]
dsrc = DataSource(range(len(df)), type_tfms=[[_get_txt, Numericalize(make_vocab(count))], [_get_lbl, Categorize()]], filts=splits)
dl = TfmdDL(dsrc, collate_fn=TfmdCollate(collate_fn=pad_collate))

In [None]:
dl.show_batch()

index,text,category
0,"xxbos▁xxmaj▁un▁-▁xxunk▁-▁believable▁!▁xxmaj▁meg▁xxmaj▁ryan▁does▁n't▁even▁look▁her▁usual▁xxunk▁lovable▁self▁in▁this▁,▁which▁normally▁makes▁me▁forgive▁her▁shallow▁xxunk▁acting▁xxunk▁.▁xxmaj▁hard▁to▁believe▁she▁was▁the▁producer▁on▁this▁dog▁.▁xxmaj▁plus▁xxmaj▁kevin▁xxmaj▁kline▁:▁what▁kind▁of▁suicide▁trip▁has▁his▁career▁been▁on▁?▁xxmaj▁xxunk▁...▁xxmaj▁xxunk▁!▁!▁!▁xxmaj▁finally▁this▁was▁directed▁by▁the▁guy▁who▁did▁xxmaj▁big▁xxmaj▁xxunk▁?▁xxmaj▁must▁be▁a▁replay▁of▁xxmaj▁jonestown▁-▁hollywood▁style▁.▁xxmaj▁xxunk▁!",negative
1,"xxbos▁xxmaj▁this▁is▁a▁extremely▁well▁-▁made▁film▁.▁xxmaj▁the▁acting▁,▁script▁and▁camera▁-▁work▁are▁all▁first▁-▁rate▁.▁xxmaj▁the▁music▁is▁good▁,▁too▁,▁though▁it▁is▁mostly▁early▁in▁the▁film▁,▁when▁things▁are▁still▁relatively▁xxunk▁.▁xxmaj▁there▁are▁no▁really▁xxunk▁in▁the▁cast▁,▁though▁several▁faces▁will▁be▁familiar▁.▁xxmaj▁the▁entire▁cast▁does▁an▁excellent▁job▁with▁the▁script▁.▁\n\n▁xxmaj▁but▁it▁is▁hard▁to▁watch▁,▁because▁there▁is▁no▁good▁end▁to▁a▁situation▁like▁the▁one▁presented▁.▁xxmaj▁it▁is▁now▁xxunk▁to▁blame▁the▁xxmaj▁british▁for▁setting▁xxmaj▁hindus▁and▁xxmaj▁muslims▁against▁each▁other▁,▁and▁then▁xxunk▁xxunk▁them▁into▁two▁countries▁.▁xxmaj▁there▁is▁some▁merit▁in▁this▁view▁,▁but▁it▁'s▁also▁true▁that▁no▁one▁forced▁xxmaj▁hindus▁and▁xxmaj▁muslims▁in▁the▁region▁to▁xxunk▁each▁other▁as▁they▁did▁around▁the▁time▁of▁partition▁.▁xxmaj▁it▁seems▁more▁likely▁that▁the▁xxmaj▁british▁simply▁saw▁the▁xxunk▁between▁the▁xxunk▁and▁were▁clever▁enough▁to▁exploit▁them▁to▁their▁own▁ends▁.▁\n\n▁xxmaj▁the▁result▁is▁that▁there▁is▁much▁cruelty▁and▁inhumanity▁in▁the▁situation▁and▁this▁is▁very▁unpleasant▁to▁remember▁and▁to▁see▁on▁the▁screen▁.▁xxmaj▁but▁it▁is▁never▁painted▁as▁a▁black▁-▁and▁-▁white▁case▁.▁xxmaj▁there▁is▁xxunk▁and▁xxunk▁on▁both▁sides▁,▁and▁also▁the▁hope▁for▁change▁in▁the▁younger▁generation▁.▁\n\n▁xxmaj▁there▁is▁redemption▁of▁a▁sort▁,▁in▁the▁end▁,▁when▁xxmaj▁xxunk▁has▁to▁make▁a▁hard▁choice▁between▁a▁man▁who▁has▁ruined▁her▁life▁,▁but▁also▁truly▁loved▁her▁,▁and▁her▁family▁which▁has▁xxunk▁her▁,▁then▁later▁come▁looking▁for▁her▁.▁xxmaj▁but▁by▁that▁point▁,▁she▁has▁no▁xxunk▁that▁is▁without▁great▁pain▁for▁her▁.▁\n\n▁xxmaj▁this▁film▁carries▁the▁message▁that▁both▁xxmaj▁muslims▁and▁xxmaj▁hindus▁have▁their▁grave▁faults▁,▁and▁also▁that▁both▁can▁be▁xxunk▁and▁caring▁people▁.▁xxmaj▁the▁reality▁of▁partition▁makes▁that▁xxunk▁all▁the▁more▁wrenching▁,▁since▁there▁can▁never▁be▁real▁xxunk▁across▁the▁xxmaj▁india▁/▁xxmaj▁pakistan▁border▁.▁xxmaj▁in▁that▁sense▁,▁it▁is▁similar▁to▁""▁mr▁&▁xxmaj▁mrs▁xxmaj▁xxunk▁""▁.▁\n\n▁xxmaj▁in▁the▁end▁,▁we▁were▁glad▁to▁have▁seen▁the▁film▁,▁even▁though▁the▁resolution▁was▁heartbreaking▁.▁xxmaj▁if▁the▁xxup▁uk▁and▁xxup▁us▁could▁deal▁with▁their▁own▁xxunk▁of▁racism▁with▁this▁kind▁of▁xxunk▁,▁they▁would▁certainly▁be▁better▁off▁.",positive
2,"xxbos▁xxmaj▁every▁once▁in▁a▁long▁while▁a▁movie▁will▁come▁along▁that▁will▁be▁so▁awful▁that▁i▁feel▁compelled▁to▁warn▁people▁.▁xxmaj▁if▁i▁labor▁all▁my▁days▁and▁i▁can▁save▁but▁one▁soul▁from▁watching▁this▁movie▁,▁how▁great▁will▁be▁my▁joy▁.▁\n\n▁xxmaj▁where▁to▁begin▁my▁discussion▁of▁pain▁.▁xxmaj▁for▁xxunk▁,▁there▁was▁a▁musical▁montage▁every▁five▁minutes▁.▁xxmaj▁there▁was▁no▁character▁development▁.▁xxmaj▁every▁character▁was▁a▁stereotype▁.▁xxmaj▁we▁had▁xxunk▁guy▁,▁fat▁guy▁who▁eats▁donuts▁,▁goofy▁foreign▁guy▁,▁etc▁.▁xxmaj▁the▁script▁felt▁as▁if▁it▁were▁being▁written▁as▁the▁movie▁was▁being▁shot▁.▁xxmaj▁the▁production▁value▁was▁so▁incredibly▁low▁that▁it▁felt▁like▁i▁was▁watching▁a▁junior▁high▁video▁presentation▁.▁xxmaj▁have▁the▁directors▁,▁producers▁,▁etc▁.▁ever▁even▁seen▁a▁movie▁before▁?▁xxmaj▁xxunk▁is▁getting▁worse▁and▁worse▁with▁every▁new▁entry▁.▁xxmaj▁the▁concept▁for▁this▁movie▁sounded▁so▁funny▁.▁xxmaj▁how▁could▁you▁go▁wrong▁with▁xxmaj▁gary▁xxmaj▁coleman▁and▁a▁handful▁of▁somewhat▁legitimate▁actors▁.▁xxmaj▁but▁trust▁me▁when▁i▁say▁this▁,▁things▁went▁wrong▁,▁xxup▁very▁xxup▁wrong▁.",negative
3,"xxbos▁xxmaj▁name▁just▁says▁it▁all▁.▁i▁watched▁this▁movie▁with▁my▁dad▁when▁it▁came▁out▁and▁having▁served▁in▁xxmaj▁xxunk▁he▁had▁great▁admiration▁for▁the▁man▁.▁xxmaj▁the▁disappointing▁thing▁about▁this▁film▁is▁that▁it▁only▁concentrate▁on▁a▁short▁period▁of▁the▁man▁'s▁life▁-▁interestingly▁enough▁the▁man▁'s▁entire▁life▁would▁have▁made▁such▁an▁epic▁bio▁-▁xxunk▁that▁it▁is▁staggering▁to▁imagine▁the▁cost▁for▁production▁.▁\n\n▁xxmaj▁some▁posters▁xxunk▁to▁the▁flawed▁xxunk▁about▁the▁man▁,▁which▁are▁cheap▁shots▁.▁xxmaj▁the▁theme▁of▁the▁movie▁""▁duty▁,▁xxmaj▁honor▁,▁xxmaj▁country▁""▁are▁not▁just▁mere▁words▁xxunk▁from▁the▁lips▁of▁a▁high▁-▁xxunk▁officer▁-▁it▁is▁the▁deep▁xxunk▁of▁one▁man▁'s▁total▁devotion▁to▁his▁country▁.▁\n\n▁xxmaj▁ironically▁xxmaj▁xxunk▁being▁the▁liberal▁that▁he▁was▁xxunk▁a▁better▁understanding▁of▁the▁man▁.▁xxmaj▁he▁does▁a▁great▁job▁showing▁the▁xxunk▁general▁xxunk▁with▁the▁xxunk▁side▁of▁the▁man▁.",positive
4,"xxbos▁xxmaj▁this▁movie▁succeeds▁at▁being▁one▁of▁the▁most▁unique▁movies▁you▁'ve▁seen▁.▁xxmaj▁however▁this▁comes▁from▁the▁fact▁that▁you▁ca▁n't▁make▁heads▁or▁xxunk▁of▁this▁mess▁.▁xxmaj▁it▁almost▁seems▁as▁a▁series▁of▁challenges▁set▁up▁to▁determine▁whether▁or▁not▁you▁are▁willing▁to▁walk▁out▁of▁the▁movie▁and▁give▁up▁the▁money▁you▁just▁paid▁.▁xxmaj▁if▁you▁do▁n't▁want▁to▁feel▁xxunk▁you▁'ll▁sit▁through▁this▁horrible▁film▁and▁develop▁a▁real▁sense▁of▁pity▁for▁the▁actors▁involved▁,▁they▁'ve▁all▁seen▁better▁days▁,▁but▁then▁you▁realize▁they▁actually▁got▁paid▁quite▁a▁bit▁of▁money▁to▁do▁this▁and▁you▁'ll▁lose▁pity▁for▁them▁just▁like▁you▁'ve▁already▁done▁for▁the▁film▁.▁i▁ca▁n't▁go▁on▁enough▁about▁this▁horrible▁movie▁,▁its▁almost▁something▁that▁xxmaj▁ed▁xxmaj▁wood▁would▁have▁made▁and▁in▁that▁case▁it▁surely▁would▁have▁been▁his▁masterpiece▁.▁\n\n▁xxmaj▁to▁start▁you▁are▁forced▁to▁sit▁through▁an▁opening▁dialogue▁the▁likes▁of▁which▁you▁'ve▁never▁seen▁/▁heard▁,▁this▁thing▁has▁got▁to▁be▁five▁minutes▁long▁.▁xxmaj▁on▁top▁of▁that▁it▁is▁narrated▁,▁as▁to▁suggest▁that▁you▁the▁viewer▁can▁not▁read▁.▁xxmaj▁then▁we▁meet▁xxmaj▁mr▁.▁xxmaj▁xxunk▁and▁the▁xxunk▁of▁terrible▁lines▁gets▁xxunk▁,▁it▁is▁as▁if▁he▁is▁operating▁solely▁to▁get▁lines▁on▁to▁the▁movie▁poster▁tag▁line▁.▁xxmaj▁soon▁we▁meet▁xxmaj▁stephen▁xxmaj▁xxunk▁,▁who▁i▁typically▁enjoy▁)▁and▁he▁does▁his▁best▁not▁to▁drown▁in▁this▁but▁ultimately▁he▁does▁.▁xxmaj▁then▁comes▁the▁ultimate▁insult▁,▁xxmaj▁tara▁xxmaj▁reid▁playing▁an▁intelligent▁role▁,▁oh▁help▁us▁!▁xxmaj▁tara▁xxmaj▁reid▁is▁not▁a▁very▁talented▁actress▁and▁somehow▁she▁xxunk▁gets▁roles▁in▁movies▁,▁in▁my▁opinion▁though▁she▁should▁stick▁to▁movies▁of▁the▁xxmaj▁american▁pie▁type▁.▁\n\n▁xxmaj▁all▁in▁all▁you▁just▁may▁want▁to▁see▁this▁for▁yourself▁when▁it▁comes▁out▁on▁video▁,▁i▁know▁that▁i▁got▁a▁kick▁out▁of▁it▁,▁i▁mean▁lets▁all▁be▁honest▁here▁,▁sometimes▁its▁comforting▁to▁xxunk▁in▁the▁shortcomings▁of▁others▁.",negative
5,"xxbos▁xxmaj▁from▁the▁start▁,▁you▁know▁how▁this▁movie▁will▁end▁.▁xxmaj▁it▁'s▁so▁full▁of▁clichés▁your▁typical▁xxup▁xxunk▁member▁will▁not▁even▁like▁this▁movie▁.▁i▁give▁it▁2▁out▁of▁10▁,▁only▁because▁of▁the▁acting▁of▁xxmaj▁william▁xxmaj▁xxunk▁.▁i▁ca▁n't▁believe▁people▁voted▁6▁+▁for▁this▁movie▁.▁xxmaj▁it▁'s▁so▁biased▁towards▁a▁'▁certain▁point▁of▁view▁'▁(▁once▁a▁thief▁...▁)▁.▁xxmaj▁people▁are▁n't▁born▁bad▁.▁xxmaj▁neither▁are▁they▁born▁good▁.▁xxmaj▁they▁are▁born▁with▁a▁clean▁xxunk▁.▁xxmaj▁it▁'s▁society▁,▁parents▁and▁education▁what▁makes▁them▁who▁they▁are▁.▁xxmaj▁and▁if▁they▁take▁the▁wrong▁turn▁,▁somewhere▁down▁the▁line▁,▁it▁certainly▁is▁n't▁going▁to▁be▁the▁xxmaj▁american▁xxunk▁system▁that▁gets▁them▁back▁on▁track▁!▁xxmaj▁anyway▁,▁avoid▁this▁movie▁like▁the▁plague▁.▁i▁bet▁you▁have▁better▁things▁to▁do▁with▁your▁time▁than▁waste▁it▁on▁this▁piece▁of▁crap▁.",negative
6,"xxbos▁xxmaj▁there▁were▁a▁lot▁of▁truly▁great▁horror▁movies▁produced▁in▁the▁seventies▁-▁but▁this▁film▁certainly▁is▁n't▁one▁of▁them▁!▁xxmaj▁it▁'s▁a▁shame▁xxmaj▁the▁xxmaj▁child▁is▁n't▁better▁as▁it▁works▁from▁a▁decent▁idea▁that▁takes▁in▁a▁couple▁of▁sometimes▁successful▁horror▁themes▁.▁xxmaj▁we▁have▁the▁idea▁of▁a▁xxunk▁child▁,▁which▁worked▁so▁well▁in▁classic▁films▁such▁as▁xxmaj▁the▁xxmaj▁bad▁xxmaj▁xxunk▁and▁then▁we▁have▁the▁central▁zombie▁theme▁,▁which▁of▁course▁has▁been▁the▁xxunk▁of▁many▁a▁successful▁horror▁movie▁.▁xxmaj▁the▁plot▁is▁basically▁this▁:▁young▁girl▁blames▁a▁load▁of▁people▁for▁the▁death▁of▁her▁mother▁,▁so▁she▁goes▁to▁the▁graveyard▁and▁raises▁the▁dead▁to▁get▁revenge▁(▁as▁you▁do▁)▁.▁xxmaj▁this▁is▁all▁well▁and▁good▁,▁except▁for▁the▁fact▁that▁it▁'s▁boring▁!▁xxmaj▁nothing▁happens▁for▁most▁of▁the▁film▁,▁and▁although▁it▁does▁pick▁up▁at▁the▁end▁with▁some▁nice▁gore▁;▁it▁'s▁not▁enough▁of▁a▁finale▁to▁justify▁sitting▁through▁the▁rest▁of▁it▁.▁xxmaj▁the▁film▁was▁obviously▁shot▁on▁a▁budget▁as▁the▁locations▁look▁cheap▁and▁all▁the▁actors▁are▁rubbish▁.▁xxmaj▁there▁'s▁really▁not▁much▁i▁can▁say▁about▁the▁film▁overall▁as▁there▁is▁n't▁much▁to▁it▁.▁xxmaj▁the▁xxmaj▁child▁is▁a▁xxunk▁seventies▁horror▁flick▁and▁i▁certainly▁do▁n't▁recommend▁it▁.",negative
7,"xxbos▁i▁was▁xxunk▁enough▁to▁meet▁xxmaj▁george▁xxmaj▁pal▁(▁and▁still▁have▁my▁xxup▁xxunk▁:▁xxunk▁poster▁xxunk▁by▁him▁)▁at▁a▁convention▁shortly▁after▁the▁release▁,▁and▁asked▁him▁why▁he▁chose▁to▁do▁the▁film▁""▁camp▁""▁.▁xxmaj▁before▁he▁could▁answer▁,▁two▁studio▁flacks▁xxunk▁and▁xxunk▁me▁on▁how▁the▁studio▁""▁knew▁best▁""▁and▁how▁""▁no▁one▁will▁take▁such▁a▁film▁seriously▁""▁.▁i▁had▁been▁reading▁the▁xxmaj▁bantam▁xxunk▁for▁a▁couple▁of▁years▁thanks▁to▁a▁friend▁(▁xxunk▁xxunk▁of▁the▁1970s▁will▁recall▁xxmaj▁xxunk▁and▁his▁band▁?▁i▁was▁in▁a▁couple▁of▁years▁of▁that▁with▁him▁)▁,▁and▁had▁higher▁hopes▁than▁what▁we▁got▁.▁\n\n▁xxmaj▁the▁flacks▁xxunk▁that▁no▁high▁adventure▁would▁ever▁be▁done▁seriously▁,▁and▁so▁doing▁'▁camp▁'▁was▁the▁only▁way▁.▁xxmaj▁several▁other▁fans▁jumped▁in▁on▁my▁side▁,▁with▁xxmaj▁pal▁listening▁as▁best▁he▁could▁.▁xxmaj▁at▁the▁end▁of▁the▁little▁event▁,▁xxmaj▁pal▁came▁up▁to▁us▁and▁xxunk▁,▁xxunk▁he▁could▁have▁done▁more▁and▁better▁.▁\n\n▁xxup▁star▁xxup▁wars▁put▁the▁lie▁to▁the▁flacks▁,▁and▁a▁year▁after▁xxmaj▁pal▁'s▁death▁,▁xxmaj▁spielberg▁and▁xxmaj▁lucas▁proved▁that▁xxmaj▁doc▁xxmaj▁savage▁could▁have▁easily▁been▁the▁next▁major▁movie▁franchise▁...▁if▁it▁had▁n't▁been▁for▁the▁flacks▁.▁\n\n▁xxmaj▁tear▁out▁the▁memory▁or▁history▁of▁xxmaj▁doc▁,▁and▁the▁film▁would▁have▁been▁worth▁a▁6▁/▁10▁rating▁as▁nothing▁more▁than▁a▁mindless▁popcorn▁xxunk▁.▁\n\n▁xxmaj▁but▁destroying▁the▁legacy▁like▁that▁was▁no▁less▁an▁xxunk▁than▁killing▁a▁baby▁in▁the▁xxunk▁.▁\n\n▁xxmaj▁doc▁xxmaj▁savage▁can▁still▁come▁to▁the▁screen▁,▁and▁survive▁the▁inevitable▁comparisons▁by▁the▁ill▁-▁informed▁to▁xxmaj▁indiana▁xxmaj▁jones▁,▁but▁it▁would▁have▁to▁be▁done▁in▁all▁xxunk▁and▁earnest▁to▁xxunk▁the▁glory▁that▁we▁should▁expect▁from▁the▁xxmaj▁first▁xxmaj▁american▁xxmaj▁superhero▁.▁\n\n▁xxup▁xxunk▁:▁xxmaj▁yes▁,▁there▁was▁a▁second▁script▁for▁xxup▁xxunk▁xxup▁of▁xxup▁evil▁,▁and▁it▁'s▁a▁lot▁more▁serious▁.▁xxmaj▁yes▁,▁there▁was▁xxunk▁footage▁shot▁,▁but▁mostly▁establishing▁shots▁and▁very▁little▁with▁actors▁.▁xxmaj▁and▁,▁yes▁,▁there▁_▁is▁_▁a▁one▁-▁xxunk▁of▁xxmaj▁ron▁xxmaj▁xxunk▁xxunk▁over▁a▁xxunk▁wall▁and▁xxunk▁at▁something▁over▁his▁shoulder▁with▁a▁specially▁built▁bronze▁xxunk▁.▁xxmaj▁xxunk▁'s▁wearing▁a▁xxunk▁over▁a▁button▁down▁white▁shirt▁with▁a▁bronze▁tie▁,▁and▁the▁words▁""▁doc▁xxup▁savage▁:▁xxup▁xxunk▁xxup▁of▁evil▁...▁coming▁xxmaj▁next▁xxmaj▁summer▁!▁""▁xxup▁xxunk▁:▁xxmaj▁if▁anyone▁knows▁who▁the▁studio▁flacks▁were▁that▁accompanied▁xxmaj▁george▁xxmaj▁pal▁in▁1975▁to▁xxmaj▁san▁xxmaj▁diego▁for▁the▁convention▁,▁xxunk▁the▁idiots▁up▁the▁side▁of▁the▁head▁and▁call▁them▁the▁idiots▁that▁they▁are▁.▁xxmaj▁at▁the▁time▁,▁they▁were▁doing▁xxunk▁and▁xxmaj▁fu▁xxmaj▁xxunk▁in▁xxunk▁and▁xxunk▁xxunk▁pants▁,▁and▁carrying▁xxmaj▁xxunk▁xxunk▁.",negative
8,"xxbos▁xxmaj▁this▁film▁is▁the▁freshman▁effort▁of▁xxmaj▁xxunk▁xxmaj▁xxunk▁and▁her▁new▁production▁company▁.▁xxmaj▁while▁it▁suffers▁from▁a▁few▁problems▁,▁as▁every▁low▁budget▁production▁does▁,▁it▁is▁a▁good▁start▁for▁xxmaj▁ms▁.▁xxmaj▁xxunk▁and▁her▁company▁.▁\n\n▁xxmaj▁the▁story▁is▁not▁terribly▁new▁having▁been▁done▁in▁films▁like▁xxmaj▁the▁xxmaj▁burning▁and▁every▁xxmaj▁friday▁the▁13th▁since▁part▁2▁.▁xxmaj▁but▁,▁the▁performances▁are▁heartfelt▁.▁xxmaj▁so▁many▁big▁budget▁movies▁just▁have▁the▁actors▁going▁through▁the▁motions▁,▁its▁always▁nice▁to▁see▁actors▁really▁trying▁to▁xxunk▁their▁craft▁.▁\n\n▁xxmaj▁the▁story▁deals▁with▁the▁xxunk▁possible▁return▁)▁of▁a▁disfigured▁xxunk▁.▁xxmaj▁the▁others▁are▁xxunk▁to▁xxunk▁,▁but▁the▁xxunk▁of▁the▁event▁xxunk▁each▁person▁in▁different▁directions▁in▁their▁xxunk▁.▁xxmaj▁ten▁years▁later▁,▁the▁friends▁are▁murdered▁one▁by▁one▁by▁a▁gruesome▁xxunk▁known▁as▁""▁the▁xxmaj▁xxunk▁""▁.▁xxmaj▁who▁will▁survive▁?▁xxmaj▁you▁have▁to▁watch▁.▁\n\n▁xxmaj▁if▁you▁are▁xxmaj▁roger▁xxmaj▁ebert▁or▁any▁number▁of▁arrogant▁critics▁,▁you▁probably▁should▁n't▁bother▁.▁xxmaj▁but▁if▁your▁taste▁run▁more▁towards▁xxmaj▁joe▁xxmaj▁bob▁xxmaj▁briggs▁and▁you▁want▁to▁see▁a▁group▁of▁people▁xxunk▁their▁craft▁,▁then▁check▁out▁""▁the▁xxmaj▁xxunk▁""▁.",negative
9,"xxbos▁xxmaj▁greg▁xxmaj▁davis▁and▁xxmaj▁xxunk▁xxmaj▁xxunk▁take▁some▁xxunk▁statements▁by▁a▁xxunk▁,▁add▁some▁commentary▁by▁a▁bunch▁of▁xxunk▁-▁right▁xxunk▁,▁xxunk▁the▁most▁extreme▁positions▁of▁the▁most▁xxunk▁xxmaj▁xxunk▁on▁the▁planet▁to▁everyone▁who▁calls▁themselves▁a▁xxmaj▁xxunk▁,▁and▁presents▁this▁as▁the▁xxunk▁of▁xxmaj▁islam▁.▁xxmaj▁maybe▁their▁next▁film▁will▁involve▁xxunk▁xxmaj▁fred▁xxmaj▁phelps▁and▁the▁xxunk▁of▁the▁xxmaj▁xxunk▁xxmaj▁baptist▁xxmaj▁church▁,▁adding▁commentary▁by▁some▁xxunk▁atheist▁""▁xxunk▁,▁and▁call▁their▁film▁""▁what▁the▁xxmaj▁world▁xxmaj▁needs▁to▁xxmaj▁know▁xxmaj▁about▁xxmaj▁christianity▁.▁""▁xxmaj▁ultimately▁,▁this▁film▁suffers▁from▁both▁poor▁production▁values▁and▁lack▁of▁attention▁to▁the▁most▁basic▁standards▁of▁xxunk▁.▁xxmaj▁do▁n't▁waste▁your▁time▁and▁money▁;▁just▁turn▁on▁your▁xxup▁am▁radio▁and▁listen▁to▁xxmaj▁rush▁xxmaj▁xxunk▁for▁a▁couple▁of▁days▁for▁free▁and▁you▁'ll▁get▁the▁same▁message▁with▁the▁same▁level▁of▁intellectual▁analysis▁.",negative
