In [2]:
def pretty_print_review_and_label(i):
    print(labels[i] + "\t:\t" + reviews[i][:80] + "...")

g = open('reviews.txt','r') # What we know!
reviews = list(map(lambda x:x[:-1],g.readlines()))
g.close()

g = open('labels.txt','r') # What we WANT to know!
labels = list(map(lambda x:x[:-1].upper(),g.readlines()))
g.close()

In [3]:
import time
import sys
import numpy as np

# Let's tweak our network from before to model these phenomena
class SentimentNetwork:
    def __init__(self, reviews,labels,hidden_nodes = 10, learning_rate = 0.1):
       
        # set our random number generator 
        np.random.seed(1)
    
        self.pre_process_data(reviews, labels)
        
        self.init_network(len(self.review_vocab),hidden_nodes, 1, learning_rate)
        
        
    def pre_process_data(self, reviews, labels):
        
        review_vocab = set()
        for review in reviews:
            for word in review.split(" "):
                review_vocab.add(word)
        self.review_vocab = list(review_vocab)
        
        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        
        self.label_vocab = list(label_vocab)
        
        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.review_vocab):
            self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
            self.label2index[label] = i
         
        
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
    
        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        self.learning_rate = learning_rate
        
        self.layer_0 = np.zeros((1,input_nodes))
    
        
    def update_input_layer(self,review):

        # clear out previous state, reset the layer to be all 0s
        self.layer_0 *= 0
        for word in review.split(" "):
            if(word in self.word2index.keys()):
                self.layer_0[0][self.word2index[word]] = 1
                
    def get_target_for_label(self,label):
        if(label == 'POSITIVE'):
            return 1
        else:
            return 0
        
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)
    
    def train(self, training_reviews, training_labels):
        
        assert(len(training_reviews) == len(training_labels))
        
        correct_so_far = 0
        
        start = time.time()
        
        for i in range(len(training_reviews)):
            
            review = training_reviews[i]
            label = training_labels[i]
            
            #### Implement the forward pass here ####
            ### Forward pass ###

            # Input Layer
            self.update_input_layer(review)

            # Hidden layer
            layer_1 = self.layer_0.dot(self.weights_0_1)

            # Output layer
            layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))

            #### Implement the backward pass here ####
            ### Backward pass ###

            # Output error
            layer_2_error = layer_2 - self.get_target_for_label(label) # Output layer error is the difference between desired target and actual output.
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            # Backpropagated error
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T) # errors propagated to the hidden layer
            layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error

            # Update the weights
            self.weights_1_2 -= layer_1.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
            self.weights_0_1 -= self.layer_0.T.dot(layer_1_delta) * self.learning_rate # update input-to-hidden weights with gradient descent step

            if(np.abs(layer_2_error) < 0.5):
                correct_so_far += 1
            
            reviews_per_second = i / float(time.time() - start)
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews)))[:4] + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            if(i % 2500 == 0):
                print("")
    
    def test(self, testing_reviews, testing_labels):
        
        correct = 0
        
        start = time.time()
        
        for i in range(len(testing_reviews)):
            pred = self.run(testing_reviews[i])
            if(pred == testing_labels[i]):
                correct += 1
            
            reviews_per_second = i / float(time.time() - start)
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \
                             + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
                            + "% #Correct:" + str(correct) + " #Tested:" + str(i+1) + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
    
    def run(self, review):
        
        # Input Layer
        self.update_input_layer(review.lower())

        # Hidden layer
        layer_1 = self.layer_0.dot(self.weights_0_1)

        # Output layer
        layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))
        
        if(layer_2[0] > 0.5):
            return "POSITIVE"
        else:
            return "NEGATIVE"

In [4]:
mlp2 = SentimentNetwork(reviews[:],labels[:], learning_rate=0.1)

In [5]:
# train the network
mlp2.train(reviews[:-1000],labels[:-1000])

Progress:0.0% Speed(reviews/sec):0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%
Progress:10.4% Speed(reviews/sec):196.9 #Correct:1793 #Trained:2501 Training Accuracy:71.6%
Progress:20.8% Speed(reviews/sec):191.8 #Correct:3788 #Trained:5001 Training Accuracy:75.7%
Progress:31.2% Speed(reviews/sec):194.8 #Correct:5872 #Trained:7501 Training Accuracy:78.2%
Progress:41.6% Speed(reviews/sec):190.9 #Correct:8006 #Trained:10001 Training Accuracy:80.0%
Progress:52.0% Speed(reviews/sec):192.6 #Correct:10144 #Trained:12501 Training Accuracy:81.1%
Progress:62.5% Speed(reviews/sec):192.6 #Correct:12284 #Trained:15001 Training Accuracy:81.8%
Progress:72.9% Speed(reviews/sec):194.4 #Correct:14404 #Trained:17501 Training Accuracy:82.3%
Progress:83.3% Speed(reviews/sec):196.2 #Correct:16594 #Trained:20001 Training Accuracy:82.9%
Progress:93.7% Speed(reviews/sec):197.5 #Correct:18777 #Trained:22501 Training Accuracy:83.4%
Progress:99.9% Speed(reviews/sec):198.0 #Correct:20099 #Trained:24000 Training 

In [6]:
# evaluate our model before training (just to show how horrible it is)
mlp2.test(reviews[24000:],labels[24000:])


Progress:0.0% Speed(reviews/sec):0.0% #Correct:1 #Tested:1 Testing Accuracy:100.%Progress:0.1% Speed(reviews/sec):340.8% #Correct:1 #Tested:2 Testing Accuracy:50.0%Progress:0.2% Speed(reviews/sec):456.6% #Correct:1 #Tested:3 Testing Accuracy:33.3%Progress:0.3% Speed(reviews/sec):522.9% #Correct:2 #Tested:4 Testing Accuracy:50.0%Progress:0.4% Speed(reviews/sec):556.4% #Correct:3 #Tested:5 Testing Accuracy:60.0%Progress:0.5% Speed(reviews/sec):593.4% #Correct:4 #Tested:6 Testing Accuracy:66.6%Progress:0.6% Speed(reviews/sec):609.5% #Correct:5 #Tested:7 Testing Accuracy:71.4%Progress:0.7% Speed(reviews/sec):626.7% #Correct:6 #Tested:8 Testing Accuracy:75.0%Progress:0.8% Speed(reviews/sec):644.0% #Correct:7 #Tested:9 Testing Accuracy:77.7%Progress:0.9% Speed(reviews/sec):612.2% #Correct:8 #Tested:10 Testing Accuracy:80.0%Progress:1.0% Speed(reviews/sec):628.2% #Correct:9 #Tested:11 Testing Accuracy:81.8%Progress:1.1% Speed(reviews/sec):645.5% #Correct:10 #Tested:12 Testing Accu

Progress:13.8% Speed(reviews/sec):677.4% #Correct:123 #Tested:139 Testing Accuracy:88.4%Progress:13.9% Speed(reviews/sec):676.1% #Correct:124 #Tested:140 Testing Accuracy:88.5%Progress:14.0% Speed(reviews/sec):677.6% #Correct:125 #Tested:141 Testing Accuracy:88.6%Progress:14.1% Speed(reviews/sec):676.6% #Correct:126 #Tested:142 Testing Accuracy:88.7%Progress:14.2% Speed(reviews/sec):676.7% #Correct:127 #Tested:143 Testing Accuracy:88.8%Progress:14.3% Speed(reviews/sec):675.5% #Correct:128 #Tested:144 Testing Accuracy:88.8%Progress:14.4% Speed(reviews/sec):674.8% #Correct:129 #Tested:145 Testing Accuracy:88.9%Progress:14.5% Speed(reviews/sec):675.4% #Correct:130 #Tested:146 Testing Accuracy:89.0%Progress:14.6% Speed(reviews/sec):674.9% #Correct:131 #Tested:147 Testing Accuracy:89.1%Progress:14.7% Speed(reviews/sec):675.2% #Correct:132 #Tested:148 Testing Accuracy:89.1%Progress:14.8% Speed(reviews/sec):676.1% #Correct:133 #Tested:149 Testing Accuracy:89.2%Progress:14.9% Speed

Progress:30.2% Speed(reviews/sec):744.5% #Correct:269 #Tested:303 Testing Accuracy:88.7%Progress:30.3% Speed(reviews/sec):744.8% #Correct:270 #Tested:304 Testing Accuracy:88.8%Progress:30.4% Speed(reviews/sec):745.6% #Correct:271 #Tested:305 Testing Accuracy:88.8%Progress:30.5% Speed(reviews/sec):746.0% #Correct:271 #Tested:306 Testing Accuracy:88.5%Progress:30.6% Speed(reviews/sec):743.7% #Correct:272 #Tested:307 Testing Accuracy:88.5%Progress:30.7% Speed(reviews/sec):744.2% #Correct:272 #Tested:308 Testing Accuracy:88.3%Progress:30.8% Speed(reviews/sec):744.2% #Correct:273 #Tested:309 Testing Accuracy:88.3%Progress:30.9% Speed(reviews/sec):743.1% #Correct:273 #Tested:310 Testing Accuracy:88.0%Progress:31.0% Speed(reviews/sec):743.5% #Correct:274 #Tested:311 Testing Accuracy:88.1%Progress:31.1% Speed(reviews/sec):743.3% #Correct:275 #Tested:312 Testing Accuracy:88.1%Progress:31.2% Speed(reviews/sec):743.6% #Correct:276 #Tested:313 Testing Accuracy:88.1%Progress:31.3% Speed

Progress:46.1% Speed(reviews/sec):757.4% #Correct:407 #Tested:462 Testing Accuracy:88.0%Progress:46.2% Speed(reviews/sec):755.5% #Correct:408 #Tested:463 Testing Accuracy:88.1%Progress:46.3% Speed(reviews/sec):755.4% #Correct:408 #Tested:464 Testing Accuracy:87.9%Progress:46.4% Speed(reviews/sec):755.5% #Correct:409 #Tested:465 Testing Accuracy:87.9%Progress:46.5% Speed(reviews/sec):755.4% #Correct:410 #Tested:466 Testing Accuracy:87.9%Progress:46.6% Speed(reviews/sec):755.8% #Correct:411 #Tested:467 Testing Accuracy:88.0%Progress:46.7% Speed(reviews/sec):755.5% #Correct:412 #Tested:468 Testing Accuracy:88.0%Progress:46.8% Speed(reviews/sec):755.5% #Correct:413 #Tested:469 Testing Accuracy:88.0%Progress:46.9% Speed(reviews/sec):755.8% #Correct:413 #Tested:470 Testing Accuracy:87.8%Progress:47.0% Speed(reviews/sec):756.2% #Correct:414 #Tested:471 Testing Accuracy:87.8%Progress:47.1% Speed(reviews/sec):756.1% #Correct:415 #Tested:472 Testing Accuracy:87.9%Progress:47.2% Speed

Progress:61.2% Speed(reviews/sec):752.4% #Correct:535 #Tested:613 Testing Accuracy:87.2%Progress:61.3% Speed(reviews/sec):752.4% #Correct:536 #Tested:614 Testing Accuracy:87.2%Progress:61.4% Speed(reviews/sec):752.6% #Correct:537 #Tested:615 Testing Accuracy:87.3%Progress:61.5% Speed(reviews/sec):752.7% #Correct:537 #Tested:616 Testing Accuracy:87.1%Progress:61.6% Speed(reviews/sec):752.5% #Correct:538 #Tested:617 Testing Accuracy:87.1%Progress:61.7% Speed(reviews/sec):752.6% #Correct:539 #Tested:618 Testing Accuracy:87.2%Progress:61.8% Speed(reviews/sec):752.6% #Correct:539 #Tested:619 Testing Accuracy:87.0%Progress:61.9% Speed(reviews/sec):752.6% #Correct:539 #Tested:620 Testing Accuracy:86.9%Progress:62.0% Speed(reviews/sec):752.3% #Correct:539 #Tested:621 Testing Accuracy:86.7%Progress:62.1% Speed(reviews/sec):752.4% #Correct:540 #Tested:622 Testing Accuracy:86.8%Progress:62.2% Speed(reviews/sec):752.5% #Correct:540 #Tested:623 Testing Accuracy:86.6%Progress:62.3% Speed

Progress:77.9% Speed(reviews/sec):767.5% #Correct:663 #Tested:780 Testing Accuracy:85.0%Progress:78.0% Speed(reviews/sec):767.2% #Correct:664 #Tested:781 Testing Accuracy:85.0%Progress:78.1% Speed(reviews/sec):767.5% #Correct:665 #Tested:782 Testing Accuracy:85.0%Progress:78.2% Speed(reviews/sec):767.3% #Correct:665 #Tested:783 Testing Accuracy:84.9%Progress:78.3% Speed(reviews/sec):767.4% #Correct:666 #Tested:784 Testing Accuracy:84.9%Progress:78.4% Speed(reviews/sec):767.2% #Correct:667 #Tested:785 Testing Accuracy:84.9%Progress:78.5% Speed(reviews/sec):767.2% #Correct:667 #Tested:786 Testing Accuracy:84.8%Progress:78.6% Speed(reviews/sec):767.1% #Correct:668 #Tested:787 Testing Accuracy:84.8%Progress:78.7% Speed(reviews/sec):766.9% #Correct:669 #Tested:788 Testing Accuracy:84.8%Progress:78.8% Speed(reviews/sec):767.1% #Correct:669 #Tested:789 Testing Accuracy:84.7%Progress:78.9% Speed(reviews/sec):766.7% #Correct:670 #Tested:790 Testing Accuracy:84.8%Progress:79.0% Speed

Progress:90.1% Speed(reviews/sec):740.5% #Correct:770 #Tested:902 Testing Accuracy:85.3%Progress:90.2% Speed(reviews/sec):740.7% #Correct:771 #Tested:903 Testing Accuracy:85.3%Progress:90.3% Speed(reviews/sec):741.0% #Correct:772 #Tested:904 Testing Accuracy:85.3%Progress:90.4% Speed(reviews/sec):741.0% #Correct:773 #Tested:905 Testing Accuracy:85.4%Progress:90.5% Speed(reviews/sec):741.0% #Correct:774 #Tested:906 Testing Accuracy:85.4%Progress:90.6% Speed(reviews/sec):739.1% #Correct:775 #Tested:907 Testing Accuracy:85.4%Progress:90.7% Speed(reviews/sec):737.2% #Correct:776 #Tested:908 Testing Accuracy:85.4%Progress:90.8% Speed(reviews/sec):737.1% #Correct:777 #Tested:909 Testing Accuracy:85.4%Progress:90.9% Speed(reviews/sec):737.1% #Correct:777 #Tested:910 Testing Accuracy:85.3%Progress:91.0% Speed(reviews/sec):737.3% #Correct:778 #Tested:911 Testing Accuracy:85.4%Progress:91.1% Speed(reviews/sec):737.3% #Correct:778 #Tested:912 Testing Accuracy:85.3%Progress:91.2% Speed

Progress:99.5% Speed(reviews/sec):700.8% #Correct:848 #Tested:996 Testing Accuracy:85.1%Progress:99.6% Speed(reviews/sec):701.0% #Correct:849 #Tested:997 Testing Accuracy:85.1%Progress:99.7% Speed(reviews/sec):698.8% #Correct:850 #Tested:998 Testing Accuracy:85.1%Progress:99.8% Speed(reviews/sec):697.6% #Correct:851 #Tested:999 Testing Accuracy:85.1%Progress:99.9% Speed(reviews/sec):697.4% #Correct:852 #Tested:1000 Testing Accuracy:85.2%

In [13]:
import os 
def isBlank (myString):
    if myString and myString.strip():
        #myString is not None AND myString is not empty or blank
        return False
    #myString is None OR myString is empty or blank
    return True

review = []
#print(review)
g = open('review.txt','r', encoding='windows-1251') # What we know!
review = list(map(lambda x:x[:-1],g.readlines()))
g.close()

#mlp2.run("Inception is written, produced, and directed by Christopher Nolan. The film stars Leonardo DiCaprio, Ken Watanabe, Joseph Gordon-Levitt, Marion Cotillard, Ellen Page, Tom Hardy, Cillian Murphy, Dileep Rao, Tom Berenger, and Michael Caine. The musical score is by Hans Zimmer and Wally Pfister is the cinematographer. Plot finds DiCaprio playing Dom Cobb, a specialised spy for hire who steals ideas from the dreams of people. But one day he gets a different offer, one that will enable him to see his estranged children. To get his reward he must enact Inception, the planting of an idea in the mind of the selected target. But Inception is thought impossible and should Cobb and his selected team fail? The consequences are unthinkable.There has already been much written and pondered about as regards Inception in the relatively short running time of its life. One can only imagine what will be written and said about it in ten years time. For although it\'s arguably a bit too early to be talking about it being held in such high regards as the likes of 2001: A Space Odyssey, it\'s inescapable that Nolan\'s movie is this current generation\'s sci-fi classic. That Nolan has managed to make it accessible to the mainstream, and dazzled the eyes as much as the brain in the process, is close to being a piece of genius craftsmanship.Inception is a film that it\'s better to know nothing about before venturing into it, and then it asks, well Nolan asks, for your undivided attention. It's neither as confusing as some have painted it, nor does it have any tricks-peek behind the curtain type-up its sleeve. The truth is is that Inception has something for everyone; thematically speaking, and that's before we pore over the special effects that sees Nolan raising the bar considerably. As is the case with twisty high concept movies, interpretations are many, with the director rightly abstaining from discourse about his movie. What forms the basis is your basic life and death struggles, with the grey areas during and after given a clever cinematic make over. There's also meditations on grief that this reviewer personally found easy to get involved with; that of course wont work for everyone, but that is just one of many strands that Nolan dangles for the discerning viewer.If that all sounds a bit too serious for the man who has redefined the Super Hero genre, rest assured thrill seekers, Inception is also a loud swirly spectacle. The action is raucous, be it gun fights or zero gravity punch ups, Nolan has not lost the ability to take the viewer on an action fuelled roller-coaster ride, aided superbly by Lee Smith's editing and Pfister's perfectly broad photography While Zimmer's score blends electronic action pulse beats with saddened guitar strains (ex-Smiths guitarist Johnny Marr on 12 string) to craft one of the best scores of 2010. As I said, there's something for everyone here, making it perhaps one of the leading nominees for title of ultimate modern day blockbuster.Then there's the strong ensemble cast, led by a quite scintillating performance from DiCaprio. Following on from his cards played close to his chest turn in Shutter Island, DiCaprio has given 2010 two of its best lead performances. Here he gives real depth of emotion, the kind that makes it easy for the viewers to hang their hats on. His unfussy acting is easy to buy into, giving the character the air of believability, he is the glue that binds the whole film together. Murphy is wonderfully vulnerable, very much an axis in the narrative, while Levitt almost usurps DiCaprio with a neatly layered portrayal that carries a delightful whiff of duality about it. Special praise, too, for Ellen Page. Still in her early 20s, she exudes an intelligent sexiness that shines bright in a role that could have been boorishly played as a cipher in a lesser actress' hands. While Hardy provides brawny levity and Berenger leaves a favourable mark.The Matrix meets Heat and Mission Impossible, only it's written by Phillip K. Dick and Richard Matheson; or something like that. A cracking hybrid movie that's fit to grace any summer and sure to improve and enlighten with further viewings. 9.5/10")
'''for dev in devansh:
    if isBlank(dev):
        print("Blank")
    else:
        print(dev)'''

j = 0;
positive = 0;
negative = 0;
for i in range (len(review)):
    if isBlank(review[i]):
        continue
    else:
        j = j + 1
        #print(reviews[i])
        #print(mlp2.run(reviews[i]))
        if(mlp2.run(review[i])=='POSITIVE'):
            #print(review[i])
            positive+=1;
        else:
            negative+=1;

print("Total = " + str(j));
print("Positive = "+str(positive));
print("Negative = "+str(negative));


print("Raiting (out of 10 )"+str((positive/j)*10))
#mlp2.run(reviews[5])
#print(reviews[2])
os.remove("review.txt")

FileNotFoundError: [Errno 2] No such file or directory: 'review.txt'

In [10]:
import sys
print(sys.getdefaultencoding())

utf-8
