# Sentiment Analysis Using RNN

In [2]:
import numpy as np
from string import punctuation
from collections import Counter, OrderedDict
import itertools

import torch 
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn

import nltk
from nltk.corpus import stopwords

In [3]:
with open('deep-learning-v2-pytorch/sentiment-analysis-network/reviews.txt', 'r') as f:
    reviews = f.read()
with open('deep-learning-v2-pytorch/sentiment-analysis-network/labels.txt', 'r') as f:
    labels = f.read()

In [4]:
# this shows that the files were imported character by character
labels[:100]

'positive\nnegative\npositive\nnegative\npositive\nnegative\npositive\nnegative\npositive\nnegative\npositive\nn'

In [5]:
sample = reviews[:3000]

In [6]:
# This function will recieve the imported reviews (ch by ch) and return 
def clean_text(text):
    ''' This Function recieves reviews (ch by ch) and returns a list of
    reviews without punctuation and stopwords'''
    # remove punctuation
    s = ''.join(ch.lower() for ch in text if ch not in punctuation)
    
    # separate each review and add to a list so that I have a list of reviews
    separated_reviews = []

    for review in s.split('\n'):
        review = ''.join(review)
        separated_reviews.append(review)
        
        
    # remove stopwords and return a list of reviews
    clean_text = []
    for review in separated_reviews:
        review_no_stopwords = []
        for word in review.split():
            if word not in stopwords.words('english'):
                review_no_stopwords.append(word)
        clean_text.append(' '.join(review_no_stopwords))
        
    return clean_text

In [7]:
# map each word to a number 

In [8]:
labels = clean_text(labels)

In [9]:
reviews = clean_text(reviews)

In [119]:
sample = reviews[:4]

In [120]:
sample[:2]

['bromwell high cartoon comedy ran time programs school life teachers years teaching profession lead believe bromwell high satire much closer reality teachers scramble survive financially insightful students see right pathetic teachers pomp pettiness whole situation remind schools knew students saw episode student repeatedly tried burn school immediately recalled high classic line inspector sack one teachers student welcome bromwell high expect many adults age think bromwell high far fetched pity',
 'story man unnatural feelings pig starts opening scene terrific example absurd comedy formal orchestra audience turned insane violent mob crazy chantings singers unfortunately stays absurd whole time general narrative eventually making putting even era turned cryptic dialogue would make shakespeare seem easy third grader technical level better might think good cinematography future great vilmos zsigmond future stars sally kirkland frederic forrest seen briefly']

In [None]:
def map_words_to_numbers(text):
    
    all_reviews = ''.join(text)
    all_reviews = all_reviews.split()
    
    count = dict(Counter(all_reviews))
    
     = {}
    index = 1

    for key, value in count.items():
        word_to_num[index] = key
        index += 1
        
    return word_to_num

In [121]:
all_reviews = ''.join(sample)

In [122]:
all_reviews = all_reviews.split()


In [131]:
count = Counter(all_reviews)

In [132]:
type(count)

collections.Counter

In [133]:
dict(count)

{'bromwell': 4,
 'high': 5,
 'cartoon': 1,
 'comedy': 3,
 'ran': 1,
 'time': 6,
 'programs': 1,
 'school': 3,
 'life': 2,
 'teachers': 4,
 'years': 2,
 'teaching': 1,
 'profession': 1,
 'lead': 1,
 'believe': 1,
 'satire': 1,
 'much': 5,
 'closer': 1,
 'reality': 1,
 'scramble': 1,
 'survive': 1,
 'financially': 1,
 'insightful': 1,
 'students': 2,
 'see': 5,
 'right': 3,
 'pathetic': 1,
 'pomp': 1,
 'pettiness': 1,
 'whole': 2,
 'situation': 1,
 'remind': 1,
 'schools': 1,
 'knew': 1,
 'saw': 1,
 'episode': 1,
 'student': 2,
 'repeatedly': 1,
 'tried': 1,
 'burn': 1,
 'immediately': 1,
 'recalled': 1,
 'classic': 2,
 'line': 1,
 'inspector': 1,
 'sack': 1,
 'one': 6,
 'welcome': 1,
 'expect': 1,
 'many': 2,
 'adults': 1,
 'age': 1,
 'think': 4,
 'far': 3,
 'fetched': 1,
 'pitystory': 1,
 'man': 2,
 'unnatural': 1,
 'feelings': 1,
 'pig': 1,
 'starts': 2,
 'opening': 2,
 'scene': 1,
 'terrific': 1,
 'example': 1,
 'absurd': 2,
 'formal': 1,
 'orchestra': 1,
 'audience': 1,
 'turned': 2

In [139]:
word_to_num = {}
index = 1

for key, value in count.items():
    word_to_num[index] = key
    index += 1


In [140]:
word_to_num

{1: 'bromwell',
 2: 'high',
 3: 'cartoon',
 4: 'comedy',
 5: 'ran',
 6: 'time',
 7: 'programs',
 8: 'school',
 9: 'life',
 10: 'teachers',
 11: 'years',
 12: 'teaching',
 13: 'profession',
 14: 'lead',
 15: 'believe',
 16: 'satire',
 17: 'much',
 18: 'closer',
 19: 'reality',
 20: 'scramble',
 21: 'survive',
 22: 'financially',
 23: 'insightful',
 24: 'students',
 25: 'see',
 26: 'right',
 27: 'pathetic',
 28: 'pomp',
 29: 'pettiness',
 30: 'whole',
 31: 'situation',
 32: 'remind',
 33: 'schools',
 34: 'knew',
 35: 'saw',
 36: 'episode',
 37: 'student',
 38: 'repeatedly',
 39: 'tried',
 40: 'burn',
 41: 'immediately',
 42: 'recalled',
 43: 'classic',
 44: 'line',
 45: 'inspector',
 46: 'sack',
 47: 'one',
 48: 'welcome',
 49: 'expect',
 50: 'many',
 51: 'adults',
 52: 'age',
 53: 'think',
 54: 'far',
 55: 'fetched',
 56: 'pitystory',
 57: 'man',
 58: 'unnatural',
 59: 'feelings',
 60: 'pig',
 61: 'starts',
 62: 'opening',
 63: 'scene',
 64: 'terrific',
 65: 'example',
 66: 'absurd',
 6