In [None]:
# author - Richard Liao 
# Dec 26 2016
import numpy as np
import pandas as pd
from collections import defaultdict
import re

from bs4 import BeautifulSoup

import sys
import os

os.environ['KERAS_BACKEND']='tensorflow'

from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical

from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Dropout, LSTM, GRU, Bidirectional, TimeDistributed
from keras.models import Model

from keras import backend as K
from keras.engine.topology import Layer, InputSpec


import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))

MAX_SENT_LENGTH = 100
MAX_SENTS = 15
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

def clean_str(string):
    """
    Tokenization/string cleaning for dataset
    Every dataset is lower cased except
    """
    string = re.sub(r"\\", "", string)    
    string = re.sub(r"\'", "", string)    
    string = re.sub(r"\"", "", string)    
    return string.strip().lower()

data_train = pd.read_csv('../data/imdb/labeledTrainData.tsv', sep='\t')
print(data_train.shape)

from nltk import tokenize

reviews = []
labels = []
texts = []

for idx in range(data_train.review.shape[0]):
    'Parsing review ', idx
    text = BeautifulSoup(data_train.review[idx]).get_text()
    text = clean_str(text)#.get_text().encode('ascii','ignore'))
    print('Text:\n' + text)
    texts.append(text)
    sentences = tokenize.sent_tokenize(text)
    reviews.append(sentences)
    for sent in sentences:
      print('Sentence:\n' + sent)
    labels.append(data_train.sentiment[idx])

tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)

data = np.zeros((len(texts), MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')

for i, sentences in enumerate(reviews):
    for j, sent in enumerate(sentences):
        print('Processing review ' + str(i) + ' sentence ' +  str(j))
  
        if j< MAX_SENTS:
            wordTokens = text_to_word_sequence(sent)
            k=0
            for _, word in enumerate(wordTokens):
                if k<MAX_SENT_LENGTH and tokenizer.word_index[word]<MAX_NB_WORDS:
                    data[i,j,k] = tokenizer.word_index[word]
                    k=k+1                    
                    
word_index = tokenizer.word_index
print('Total %s unique tokens.' % len(word_index))

labels = to_categorical(np.asarray(labels))
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)

indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]

print('Number of positive and negative reviews in traing and validation set')
print(y_train.sum(axis=0))
print(y_val.sum(axis=0))

GLOVE_DIR = "../data/glove"
embeddings_index = {}
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
    print('Embedding size %s.' % len(coefs))
f.close()

print('Total %s word vectors.' % len(embeddings_index))


embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        print('Processing word ',word,' vector ', i)
        embedding_matrix[i] = embedding_vector

        
embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SENT_LENGTH,
                            trainable=True)

sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
l_lstm = Bidirectional(LSTM(100))(embedded_sequences)
sentEncoder = Model(sentence_input, l_lstm)

review_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)
l_lstm_sent = Bidirectional(LSTM(100))(review_encoder)
preds = Dense(2, activation='softmax')(l_lstm_sent)
model = Model(review_input, preds)

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print("model fitting - Hierachical LSTM")
print(model.summary())
model.fit(x_train, y_train, validation_data=(x_val, y_val),
          nb_epoch=10, batch_size=50)




(25000, 3)
Text:
with all this stuff going down at the moment with mj ive started listening to his music, watching the odd documentary here and there, watched the wiz and watched moonwalker again. maybe i just want to get a certain insight into this guy who i thought was really cool in the eighties just to maybe make up my mind whether he is guilty or innocent. moonwalker is part biography, part feature film which i remember going to see at the cinema when it was originally released. some of it has subtle messages about mjs feeling towards the press and also the obvious message of drugs are bad mkay.visually impressive but of course this is all about michael jackson so unless you remotely like mj in anyway then you are going to hate this and find it boring. some may call mj an egotist for consenting to the making of this movie but mj and most of his fans would say that he made it for the fans which if true is really nice of him.the actual feature film bit when it finally starts is only



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


Text:
really...and incredible film that though isnt very popular...extremely touching and almost life altering...was for me at least.definitely worth seeing and buying .....added to my favorite movie list....its number one now....this is a very touching movie that all people should see..the man in the moon.....well its just incredible. its now my favorite movie and i only saw it today and id recommend it to anyone above 15 as long as youre somewhat mature......if you dont really try to feel the characters emotions then youll never get the true meaning and value of this movie....but it really is incredible....just watch it because itll alter the way some people look at life....worth seeing 5/5
Sentence:
really...and incredible film that though isnt very popular...extremely touching and almost life altering...was for me at least.definitely worth seeing and buying .....added to my favorite movie list....its number one now....this is a very touching movie that all people should see..the ma

Text:
oh, sam mraovich, we know you tried so hard. this is your magnum opus, a shining example to the rest of us that you are certainly worth nomination into the academy of motion picture arts and sciences (as you state on your 1998-era web site). alas, its better to remain silent and be thought a fool than to speak and remove all doubt. with ben & arthur, you do just that.seemingly assembled with a lack of instruction or education, the films screenplay guides us toward the truly bizarre with each new scene. its this insane excuse of a story that may also be the films best ally. beginning tepidly, the homosexually titular characters ben and arthur attempt to marry, going so far as to fly across country to do so, in the shade of vermonts finest palm trees. but, all of this posturing is merely a lead-in for blood. then more blood, and more and more blood. i mean, there must be at least $20 in fake blood make-up in the final third of this film.the film in its entirety is a technical gaffe

Text:
i read somewhere that when kay francis refused to take a cut in pay, warner bros. retaliated by casting her in inferior projects for the remainder of her contract.she decided to take the money. but her career suffered accordingly.that might explain what she was doing in comet over broadway. (though it doesnt explain why donald crisp and ian hunter are in it, too.) ludicrous is the word that others have used for the plot of this film, and thats right on target. the murder trial. her seedy vaudeville career. her success in london. her final scene with her daughter. no part logically leads to the next part.also, the sets and costumes looked like b-movie stuff. and her hair! turner is showing lots and lots of her movies this month. watch any other one and youll be doing yourself a favor.
Sentence:
i read somewhere that when kay francis refused to take a cut in pay, warner bros. retaliated by casting her in inferior projects for the remainder of her contract.she decided to take the mo

saw a screener of this before last years award season, didnt really know why they gave them out after the voting had ended, but whatever, maybe for exposure, at the least, but the movie was a convoluted mess. sure, some parts were funny in a black humor kind of way, but none of the characters felt very real to me at all. there was not one person that i could connect with, and i think that is where it failed for me. sure, the plot is somewhat interesting and very subversive towards scientology, wow! what a grand idea...lets see if that already hasnt been mined to the point of futility. the whole ordeal feels fake, from the lighting, the casting, the screenplay to the horrible visual effects(which is supposed to be intentional, i can tell, and so can everyone else, no one is laughing with you though). anyways, i hope it makes it out for sale on dvd at least, i wouldnt want a project that a lot of people obviously put a lot of effort into get completely unnoticed. but its tripe either way

Sentence:
that he can, at the movies close, find true happiness with one woman, while still offering his outlandish advice, is the stuff of dreams!
Text:
time for a rant, eh: i thought spirit was a great movie to watch. however, there were a few things that stop me from rating it higher than a 6 or 7 (im being a little bit generous with the 7).point #1: matt damon aggravates me. i was thinking, what a dicky voice they got for the main character, when i first heard him narrate - and then i realized it is matt damon. the man bugs me so very bad - his performance in the departed was terrible and ruined the movie for me (before the movie got a chance to ruin itself, but thats another story for some other time), as it almost did spirit. i was able to get past this fact because of how little narration there actually was... thankfully.point #2: brian adams sucks... the whole score was terrible... the songs were unoriginal, generic, and poorly executed; not once did i find the music to fit; an

this inept adaptation of arguably one of martin amiss weaker novels fails to even draw comparisons with other druggy oeuvres such as requiem for a dream or anything penned by irvine walsh as it struggles to decide whether it is a slap-stick cartoon or a hyper-realistic hallucination.boringly directed by william marsh in over-saturated hues, a group of public school drop-outs converge in a mansion awaiting the appearance of three american friends for a weekend of decadent drug-taking. and thats it. except for the ludicrous sub-plot soon-to-be-the-main-plot nonsense about an extremist cult group who express themselves with the violent killings of the worlds elite figures, be it political or pampered. within the first reel you know exactly where this is going.what is a talented actor like paul bettany doing in this tiresome, badly written bore? made prior to his rise to fame and jennifer connelly one can be assured that had he been offered this garbage now hed have immediately changed age

Text:
i went to see antone fisher not knowing what to expect and was most pleasantly surprised. the acting job by derek luke was outstanding and the story line was excellent. of course denzel washington did his usual fine job of acting as well as directing. it makes you realized that people with mental problems can be helped and this movie is a perfect example of this. dont miss this one.
Sentence:
i went to see antone fisher not knowing what to expect and was most pleasantly surprised.
Sentence:
the acting job by derek luke was outstanding and the story line was excellent.
Sentence:
of course denzel washington did his usual fine job of acting as well as directing.
Sentence:
it makes you realized that people with mental problems can be helped and this movie is a perfect example of this.
Sentence:
dont miss this one.
Text:
routine suspense yarn about a sociopath (dillon) who gives his sperm to a clinic of human reproduction and starts to harrass the lives of the woman (antony) and his h

Text:
the hip hop rendition of a mos def performance (according to the films musical credits)...it is an incredible piece of savage consciousness that slams the violence in your heart with each snap if anyone can tell me someplace this song, live wire snap by mos def from the ground truth, an undeniable duty to see as the americans who might not support the mission but embrace each soul caught inside this savage miscalculation of purpose...they take on the haunting as so many of us can sit back and be angry...live wire snap by mos def, where can it be founddesperate to find it :medically unable to serve
Sentence:
the hip hop rendition of a mos def performance (according to the films musical credits)...it is an incredible piece of savage consciousness that slams the violence in your heart with each snap if anyone can tell me someplace this song, live wire snap by mos def from the ground truth, an undeniable duty to see as the americans who might not support the mission but embrace each 

Sentence:
and i wanted to love it.
Sentence:
i love sci-fi, the old cliffhangers, and i can appreciate the attempt at nods to flash gordon, and metropolis, but my god, what a waste of money.
Sentence:
i used to work for paramount pictures, and i had written sherry lansing in 1993 about using blue screen for screen tests.
Sentence:
she told me theyd never have an interest or need to do it.
Sentence:
10 years later, paramount releases this piece of crap.
Sentence:
sherry was right in 1993, but must have forgotten her own advice when she greenlighted this dog.
Sentence:
blue screen an effect shot, but not an entire movie.
Sentence:
lets not forget, neither jude nor jolie are terrific actors (but easy on the eyes).
Sentence:
paltrows performance reminds me of a high school effort.
Sentence:
too bad - it couldve worked, but only under a skilled director.
Sentence:
the funny thing is, sky captains director will keep getting work, even after this dreck.
Sentence:
its commerce, not art!
Text:


hotel du nord  is the only carné movie from the 1936-1946 era which has dialogs not written by jacques prévert,but by henri jeanson.janson was much more interested in the jouvet/arletty couple than in the pair of lovers,annabella/aumont.the latter is rather bland ,and their story recalls oddly the edith piafs song les amants dun jour,except that the chanteuses tale is a tragic one.whats fascinating today is this popular little world ,the canal saint-martin settings.this movie is dear to the french movies buffs for another very special reason.the pimp jouvet tells his protégée raymonde he wants a change of air(atmosphère) because she does not understand the meaning of the world atmosphère,the whore raymonde (wonderful arletty)thinks its an insult and she delivers this line,that is ,undeniably,the most famous of the whole french cinéma:in french :atmosphère?atmosphère?est-ce que jai une gueule datmosphère? translation attempt:atmosphere?atmosphere?have i got an atmosphere face? this is o

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Sentence:
lets take the second point first.is anthony john colmans greatest role, or even his signature role?
Sentence:
i have my doubts on either level - but it is among his best known roles.
Sentence:
most of his career, ronald colman played decent gentlemen, frequently in dangerous or atypical situations.
Sentence:
he is bulldog drummond (cleaned up in the goldwyn production not to be an arrogant racist) fighting crime.
Sentence:
he is raffles, the great cricket player and even greater burglar, trying to pull off his best burglary to save a friends honor.
Sentence:
he is robert conway, the great imperial political figure, who is kidnapped and brought to that paradise on earth, shangri-la.
Sentence:
he is dick heldar, manfully going to his death after he learns his masterpiece has been destroyed and knowing he is now blind and useless as an artist.
Sentence:
i can add sidney carton and rudolf rassendyll to this list.
Sentence:
but here he is not heroic.
Sentence:
in fact he is uncon

Sentence:
its a shame that i actually sat through this movie, this very tiresome and predictable movie.
Sentence:
whats wrong with it?
Sentence:
acting: there is not one performance that is even remotely close to even being sub-par (atleast they are all very pretty).
Sentence:
soundtrack (songs): if we get orgy on the soundtrack then everyone will know that they are watching a horror film!
Sentence:
; soundtrack (score): okay, but anyone with a keyboard can make an okay soundtrack these days.
Sentence:
dont even get me started on the what the hell?
Sentence:
moments, here are a few: killer can move at the speed of light--door opens actress turns, no one is there, turns back, there is something sitting in front of her.
Sentence:
; out of now where the killer shows up with a power drill, a really big one!
Sentence:
the filmmakers get points for at least plugging it in, but can i really believe that the killer took the time to find the power outlet to plug it in.
Sentence:
i feel like one

Text:
ill say one thing for jeanette and nelson--even when stranded in a mirthless, witless, painfully inept musical like this, theres still that twinkle in their eyes. yes, the chemistry between the famous duo is there even when the material is paper thin. even when the score is practically a throwaway, non-existent one depending on just a couple of catchy tunes. and even when the circumstances are so unbelievable--yes, even for a fantasy.truth to tell, she has more chemistry with nelson than with her own real-life husband gene raymond in smilin through, which, nonetheless, was a considerably better film.sorry, i love jeanette and nelson as much as the next fan, but this is the bottom of the heap. jeanette is more than embarrassing in her one hep number with binnie barnes--and nelson can only come up with a blank stare when faced with the most ludicrous situations.one can only wonder what this was like on broadway in 1938. surely, it must have had more wit and style than is evident in

Text:
holes is a fable about the past and the way it affects the present lives of at least three people. one of them i will name, the other two are mysteries and will remain so. holes is a story about stanley yelnats iv. he is unlucky in life. unlucky in fact characterizes the fates of most of the yelnats men and has been since exploits of stanley ivs `no good-dirty-rotten-pig-stealing-great-great-grandfather. those particular exploits cursed the familys men to many an ill-fated turn. it is during just such a turn that we meet stanley iv. he has been accused, falsely, of stealing a pair of baseball shoes, freshly donated to a homeless shelter auction, by a famous baseball player. he is given the option of jail, or he can go to a character building camp. `ive never been to camp before, says stanley. with that the judge enthusiastically sends him off to camp green lake.camp green lake is an odd place, with an odd philosophy, `if you take a bad boy, make him dig a hole every day in the ho

Text:
fear of a black hat is a superbly crafted film. i was laughing almost continuously from start to finish. if you have the means, i highly recommend viewing this movie it is, by far, the funniest movie i have had the pleasure to experience. grab your stuff!
Sentence:
fear of a black hat is a superbly crafted film.
Sentence:
i was laughing almost continuously from start to finish.
Sentence:
if you have the means, i highly recommend viewing this movie it is, by far, the funniest movie i have had the pleasure to experience.
Sentence:
grab your stuff!
Text:
a may day 1938 when happen a huge rally celebrating hitlers visit to rome serves as the backdrop for a love story between antoniette(sophia loren)married to fascist(john vernon) and gabriel(marcello mastroianni). shes a boring housewife with several sons and hes a unhappy, solitary homosexual fired from radio and pursued by the fascists. shes left alone in her home when her spouse must to attend the historical celebration. then both

unless you understand wretched excess this movie wont really mean much to you. an attempt was made to interject a bit of humanity into a cold and bleak period consumed by alcohol and drugs -- it doesnt work.when salma hayak does her big disco number her voice is so obviously dubbed it is pathetic -- the producers could at least have gotten someone that sounded remotely like her.the documentary that has been playing on television lately is far superior and gives a much truer view of that period of our history.no one, with the exception of mikey myers, could be accused of acting; however, he does an incredible job.
Sentence:
unless you understand wretched excess this movie wont really mean much to you.
Sentence:
an attempt was made to interject a bit of humanity into a cold and bleak period consumed by alcohol and drugs -- it doesnt work.when salma hayak does her big disco number her voice is so obviously dubbed it is pathetic -- the producers could at least have gotten someone that soun

Text:
in all honesty, if someone told me the director of lemony snickets series of unfortunate events, city of angels, and caspers was going to do a neat little low budget indie film and thatd it be real good, id say that person must be joking. but thats what director brad siberling did. and it was really good.10 items or less has a similar conceit to films like before sunrise, lost in translation, or more recently once. it involves the chance meeting of two people who if serendipity didnt put them there, theyd probably never cross paths, or if they did, they wouldnt say word one to each other. like those films, 10 items or less focuses on the relationship that builds and how the characters come to understand each other and build on each others strengths and weaknesses.the story involves morgan freeman, playing an unnamed actor who goes to research his role as a grocery store employee for an upcoming independent movie and because of things beyond his control, ends up spending the day w

Text:
remember when harrison ford was the biggest star in hollywood because he made great movies? those days are feeling like a more and more distant memory.while hollywood homicide is by no means terrible, it is a routine and surprisingly boring buddy cop movie. its a comedy thats not particularly funny, and an action movie thats not especially exciting. an overabundance of subplots cannot mask the weakest of the central storyline.ford at least appears to be enjoying himself more than is his last few projects, and he is able to carry the film most of the time. hartnett is adequate, but he and ford arent exactly newman and redford as far as chemistry is concerned.all in all, hollywood homicide is a reasonably amusing diversion, but just barely. take out ford, and its not even that.
Sentence:
remember when harrison ford was the biggest star in hollywood because he made great movies?
Sentence:
those days are feeling like a more and more distant memory.while hollywood homicide is by no me

this is a strong movie from a historical and epic perspective. while the story is simple it is pure and straightforward. in truth, it is the standard story of a simple, honorable man whose honor comes into conflict with the more educated and wealthier men of the period.poor vs. rich, honorable vs. dishonorable, a classic but well-told tale without much of the glitz of hollywood stinking up the screen.extra points just because you can almost smell the people on the screen. :)
Sentence:
this is a strong movie from a historical and epic perspective.
Sentence:
while the story is simple it is pure and straightforward.
Sentence:
in truth, it is the standard story of a simple, honorable man whose honor comes into conflict with the more educated and wealthier men of the period.poor vs. rich, honorable vs. dishonorable, a classic but well-told tale without much of the glitz of hollywood stinking up the screen.extra points just because you can almost smell the people on the screen.
Sentence:
:)
