In [1]:
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

# Instantiates a client
client = language.LanguageServiceClient()

## Sentiment analysis on the posts

We read the file and use google NLP features to get a sentiment score on the document, and for each word within each document

In [2]:
import pandas as pd

df = pd.read_json('data/1000_posts.json')

entities_result = []
annotations_result = []
counter = 0
for index, row in df.iloc[0:100,:].iterrows():
    texts = [row['init_post']] + row['comments']
    for text in texts:
        document = types.Document(
                    content=text,
                    type=enums.Document.Type.PLAIN_TEXT,
                    language='en')
        entities = client.analyze_entity_sentiment(document).entities
        entities_result.append(entities)
        
        annotations = client.analyze_sentiment(document=document)
        annotations_result.append({"score": annotations.document_sentiment.score,
               "magnitude": annotations.document_sentiment.magnitude})

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


We aggregate the results for each word to get overall values for each word for the entire corpus

In [3]:
processed_docword_sentiment = {}
for i in range(len(entities_result)):
    for j in range(len(entities_result[i])):
        if(entities_result[i][j].name not in processed_docword_sentiment.keys()):
            processed_docword_sentiment[entities_result[i][j].name] = {'score':[entities_result[i][j].sentiment.score],
                                                             'magnitude':[entities_result[i][j].sentiment.magnitude]}
        else:
            processed_docword_sentiment[entities_result[i][j].name]['score'].append(entities_result[i][j].sentiment.score)
            processed_docword_sentiment[entities_result[i][j].name]['magnitude'].append(entities_result[i][j].sentiment.magnitude)

In [4]:
import json

with open('data/document_entity_sentiment.json', 'w') as outfile:
    json.dump(processed_docword_sentiment, outfile, indent=4, ensure_ascii=False)
    
with open('data/document_sentiment.json', 'w') as outfile:
    json.dump(annotations_result, outfile, indent=4, ensure_ascii=False)

In [31]:
word_data['Classic']

{'score': [-0.20000000298023224,
  0.30000001192092896,
  -0.30000001192092896,
  0.5,
  -0.6000000238418579,
  -0.10000000149011612,
  0.4000000059604645,
  0.0,
  0.800000011920929,
  0.5,
  0.10000000149011612,
  0.5,
  0.800000011920929,
  0.5,
  0.8999999761581421,
  0.6000000238418579,
  0.8999999761581421,
  0.6000000238418579,
  0.10000000149011612,
  0.10000000149011612,
  0.699999988079071,
  0.6000000238418579,
  0.699999988079071,
  0.0,
  0.20000000298023224,
  0.10000000149011612,
  0.8999999761581421,
  -0.10000000149011612,
  0.0,
  0.0,
  0.4000000059604645,
  0.10000000149011612,
  0.20000000298023224,
  -0.699999988079071,
  0.10000000149011612,
  0.10000000149011612,
  0.800000011920929,
  -0.10000000149011612,
  0.800000011920929,
  0.699999988079071,
  0.6000000238418579,
  0.20000000298023224,
  0.20000000298023224,
  0.0,
  0.8999999761581421,
  0.8999999761581421,
  0.699999988079071,
  -0.800000011920929,
  0.0,
  0.5,
  0.30000001192092896,
  -0.1000000014901

## Sentiment analysis results

We first start by computing the average sentiment score for the documents

In [46]:
score_sum = 0
for item in doc_data:
    score_sum += item['score']*item['magnitude']
    
print("Average sentiment score over the posts is {:2f}".format(score_sum/len(doc_data)))

Average sentiment score over the posts is -0.067428


We can see that the average sentiment score is slightly below 0, which indicates that the average sentiment is rather negative. However, the value is still very close to 0 which indicates mixed feelings among the posts (which corresponds to the fact that although people do complain about the game, they are still trying to improve it by providing feedback and are not only writing to express dissatisfaction.

In [43]:
import numpy as np

aggregated_score = []
for item in word_data.keys():
    #We only account for words that can be found in at least 50 posts
    if(len(word_data[item]['score']) > 50):
        aggregated_score.append((item, sum(np.array(word_data[item]['score'])*np.array(word_data[item]['magnitude']))/len(word_data[item]['score'])))

In [44]:
print("The top-10 most positive words are:")
print(sorted(aggregated_score, key=lambda tup: -tup[1])[0:10])
print("The top-10 most negative words are:")
print(sorted(aggregated_score, key=lambda tup: tup[1])[0:10])

The top-10 most positive words are:
[('fun', 0.528315775651681), ('Classic', 0.37642857332314783), ('zones', 0.13692307485697375), ('example', 0.10652173530796306), ('class', 0.0950000011435513), ('power', 0.07753246661130486), ('games', 0.06279569776788839), ('life', 0.06215384385677486), ('ability', 0.05508196753556618), ('gear', 0.044363634153523235)]
The top-10 most negative words are:
[('problem', -0.42785714148410725), ('damage', -0.36709302032582997), ('issues', -0.3392727232683791), ('nothing', -0.2863398719466981), ('issue', -0.2135294121942101), ('People', -0.16950980554754835), ('Alliance', -0.15937500143423686), ('post', -0.1508333312205616), ('Blizzard', -0.14409836148873698), ('blizzard', -0.13682539564039978)]


The results are interesting: "Classic" and 'classic' are among the top-10 most positive words within the posts which shows the enthusiasm of the players for the newly launched Classic WOW game. On the contrary, Blizzard has a very low score (you can actually find the occurence of Blizzard twice in the bottom 10 words). 

Besides, it is interesting to note that 'zones', 'gear', 'class' have very high scores as well, which might indicte that those elements are the most-liked features of the game.