# 3. SENTENCE - BASED ANALYSIS

In [1]:
import csv
import json
import sys
import numpy as np
from __future__ import division   # Only for Python 2.7, comment out if you're using Python 3.6

In [2]:
from nltk.corpus import wordnet as wn

In [3]:
# Files and inputs
CATEGORIES_FILE = 'categories.json'
DATA_FILE = 'output_data_sentences.csv'
ANALYSIS_FILE = 'analysis_output_sentences.json'
OUTPUT_FILE = 'topics.json'
WORD_BUFFER_SIZE = 400
WORD_OFFSET = 200

---- 

## EMOTIONS

- **Assignes a numerical value for the *EMOTIONS* of each *sentence* in each review.**
- **Creates a .json file with these values**

In [4]:
# Init
categories = {}
input_data = []
max_values = {}
data = []

In [5]:
# Read categories
with open(CATEGORIES_FILE) as f:
    categories = json.load(f)
    emotions = categories['emotion']

In [6]:
# Init max
for c in categories:
    print(c)
    max_values[c] = 0

emotion
color
orientation
sentiment
subjectivity


In [7]:
# Read data file
# elımınates some sentences (empty rows)
input_data = []
with open(DATA_FILE, 'r') as f:
    all_rows = csv.reader(f, delimiter=',')
    headers = next(all_rows, None) # remove header
    rows = []
    for row in all_rows:
        if row != []:
            rows.append(row)
        else:
            continue
    for row in rows:
        entry = {}
        for i, h in enumerate(headers):
            if len(row[i])>1:
                entry[h] = row[i]
            else:
                entry[h] = int(row[i])
        input_data.append(entry)

In [8]:
# list of lists of dictionaries(word + emotions + review number) of each review 
list_of_reviews = []

amount_reviews = set()
for row in input_data:
    i = int(row['review'])
    amount_reviews.add(i)

for i in range(len(amount_reviews)):
    review = []
    for row in input_data:
        if int(row['review']) == i:
            review.append(row)
    list_of_reviews.append(review)

In [9]:
amount_sentences_per_review = []
for review in list_of_reviews:
    amount_sentences = set()
    for row in review:
        i = int(row['sentence'])
        amount_sentences.add(i)
    amount_sentences_per_review.append(max(amount_sentences))

In [10]:
lol_of_reviews = [] # List of lists of reviews

for i, rev in enumerate(list_of_reviews):
    review = []
    for j in range(amount_sentences_per_review[i]):
        sentence = []
        for word in rev:
            if int(word['review']) == i and int(word['sentence']) == j:
                sentence.append(word)
        review.append(sentence)
    lol_of_reviews.append(review)

In [11]:
reviewlist = []

for review in lol_of_reviews:
    sentences = []
    for i, sentence in enumerate(review):
        if len(sentence) != 0:
            sentences.append(sentence)
        else:
            continue
    reviewlist.append(sentences)

In [12]:
scores_all_reviews = []

for review in reviewlist:
    anger_score = []
    fear_score = []
    anticipation_score = []
    trust_score = []
    surprise_score = []
    sadness_score = []
    joy_score = []
    disgust_score = []
    scores = []
    for sentence in review:
        amount_of_words = len(sentence)
        anger = []
        fear = []
        anticipation = []
        trust = []
        surprise = []
        sadness = []
        joy = []
        disgust = []
        for i, word in enumerate(sentence):
            anger.append(sentence[i]['anger'])
            fear.append(sentence[i]['fear'])
            anticipation.append(sentence[i]['anticipation'])
            trust.append(sentence[i]['trust'])
            surprise.append(sentence[i]['surprise'])
            sadness.append(sentence[i]['sadness'])
            joy.append(sentence[i]['joy'])
            disgust.append(sentence[i]['disgust'])

        anger_score.append(sum(anger)/amount_of_words)
        fear_score.append(sum(fear)/amount_of_words)
        anticipation_score.append(sum(anticipation)/amount_of_words)
        trust_score.append(sum(trust)/amount_of_words)
        surprise_score.append(sum(surprise)/amount_of_words)
        sadness_score.append(sum(sadness)/amount_of_words)
        joy_score.append(sum(joy)/amount_of_words)
        disgust_score.append(sum(disgust)/amount_of_words)

    scores.append(anger_score)
    scores.append(fear_score)
    scores.append(anticipation_score)
    scores.append(trust_score)
    scores.append(surprise_score)
    scores.append(sadness_score)
    scores.append(joy_score)
    scores.append(disgust_score)
    
    scores_all_reviews.append(scores)

In [13]:
scores_transposed = []

for review in scores_all_reviews:
    scores_transposed.append(np.transpose(review))

In [14]:
scores_total = []

for review in scores_transposed:
    for sentence in review:
        scores_total.append(list(sentence))

In [15]:
with open(ANALYSIS_FILE, 'w') as f:
    json.dump(scores_total, f)
    print('Successfully wrote '+str(len(scores_total))+' entries to file: '+ANALYSIS_FILE)

Successfully wrote 18185 entries to file: analysis_output_sentences.json


## TOPICS

- **Assignes a numerical value for the *TOPICS* of each *sentence* in each review.**
- **Creates a .json file with these values**

In [16]:
all_reviews=[]
for review in reviewlist:
    reviews=[]
    for sentence in review:
        sentences=[]
        for word in sentence:
            for i,j in word.items():
                if i=="english word":
                    sentences.append(j)
            #sentences.append(words)
        reviews.append(sentences)
    all_reviews.append(reviews)

In [17]:
cleanliness = []
for ss in wn.synsets('clean'):
    for lemma in ss.lemma_names():
        cleanliness.append(lemma)

In [18]:
service = []
for ss in wn.synsets('service'):
    for lemma in ss.lemma_names():
        service.append(lemma)
for ss in wn.synsets('serve'):
    for lemma in ss.lemma_names():
        service.append(lemma)
for ss in wn.synsets('food'):
    for lemma in ss.lemma_names():
        service.append(lemma)
for ss in wn.synsets('breakfast'):
    for lemma in ss.lemma_names():
        service.append(lemma)
for ss in wn.synsets('lunch'):
    for lemma in ss.lemma_names():
        service.append(lemma)
for ss in wn.synsets('dinner'):
    for lemma in ss.lemma_names():
        service.append(lemma)

In [19]:
value = []
for ss in wn.synsets('value'):
    for lemma in ss.lemma_names():
        value.append(lemma)
for ss in wn.synsets('price'):
    for lemma in ss.lemma_names():
        value.append(lemma)

In [20]:
location = []
for ss in wn.synsets('location'):
    for lemma in ss.lemma_names():
        location.append(lemma)
for ss in wn.synsets('transportation'):
    for lemma in ss.lemma_names():
        location.append(lemma)

In [21]:
sleep = []
for ss in wn.synsets('sleep'):
    for lemma in ss.lemma_names():
        sleep.append(lemma)
for ss in wn.synsets('pillow'):
    for lemma in ss.lemma_names():
        sleep.append(lemma)

In [22]:
room = []
for ss in wn.synsets('room'):
    for lemma in ss.lemma_names():
        room.append(lemma)
for ss in wn.synsets('suite'):
    for lemma in ss.lemma_names():
        room.append(lemma)
for ss in wn.synsets('bedroom'):
    for lemma in ss.lemma_names():
        room.append(lemma)

In [23]:
topic_all_reviews = []

for review in all_reviews:
    topic_review = []
    for sentence in review:
        topic_sentence = [0]*6
        for word in sentence:
            if word in cleanliness:
                topic_sentence[0] += 1
            if word in service:
                topic_sentence[1] += 1
            if word in value:
                topic_sentence[2] += 1
            if word in location:
                topic_sentence[3] += 1
            if word in sleep:
                topic_sentence[4] += 1
            if word in room:
                topic_sentence[5] += 1
        topic_review.append(topic_sentence)
    topic_all_reviews.append(topic_review)

In [24]:
topics_total = []

for review in topic_all_reviews: 
    for sentence in review:
        topics_total.append(sentence)

In [25]:
with open(OUTPUT_FILE, 'w') as f:
    json.dump(topics_total, f)
    print('Successfully wrote '+str(len(topics_total))+' entries to file: '+OUTPUT_FILE)

Successfully wrote 18185 entries to file: topics.json
