## Sentiment Analysis 

Se realizará el análisis de sentimientos a partir de los reviews realizados por los usuarios.
Para esto se utilizarán las librerías NLTK y VADER.

In [1]:
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
import pyarrow

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/ingridbarriosv/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Empezamos cargando el dataframe que habíamos creado donde están los reviews ya separados 
Reviews_sep = pd.read_parquet('DB Steam/Reviews_sep.parquet')

In [3]:
Reviews_sep.head()

Unnamed: 0,user_id,user_url,reviews,review,recommend,item_id,review_date,review_year
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"{'funny': '', 'helpful': 'No ratings yet', 'it...",Simple yet with great replayability. In my opi...,True,1250,2011-11-05,2011
1,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"{'funny': '', 'helpful': 'No ratings yet', 'it...",It's unique and worth a playthrough.,True,22200,2011-07-15,2011
2,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"{'funny': '', 'helpful': 'No ratings yet', 'it...",Great atmosphere. The gunplay can be a bit chu...,True,43110,2011-04-21,2011
3,js41637,http://steamcommunity.com/id/js41637,"{'funny': '', 'helpful': '15 of 20 people (75%...",I know what you think when you see this title ...,True,251610,2014-06-24,2014
4,js41637,http://steamcommunity.com/id/js41637,"{'funny': '', 'helpful': '0 of 1 people (0%) f...",For a simple (it's actually not all that simpl...,True,227300,2013-09-08,2013


In [4]:
# Creamos una función para poder realizar el análisis de sentimientos.
# En este punto se usar NLTK como herramienta especifica para determinar la polaridad de un texto, es decir, si las opiniones son positivas, negativas o neutrales.
# Para la polarización se asignan valores de 0 si son negativas, 1 si son neutrales y 2 si son positivas. 
# También se retornará el 1 en caso de no haber review escrita. 

def SentimentAnalysis(text):
    if text is None:
        return '1'

    s_i_a = SentimentIntensityAnalyzer()
    sentimentScore = s_i_a.polarity_scores(text)['compound']

    if sentimentScore >= 0.05:
        return '2'
    elif sentimentScore <= -0.05:
        return '0'
    else:
        return '1'

In [5]:
# Aplicamos la funcion SentimentAnalysis en la columna 'review' y la guardamos en una columna nueva llamada 'sentiment_analysis'
Reviews_sep['sentiment_analysis'] = Reviews_sep['review'].apply(SentimentAnalysis)
Reviews_sep.head()

Unnamed: 0,user_id,user_url,reviews,review,recommend,item_id,review_date,review_year,sentiment_analysis
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"{'funny': '', 'helpful': 'No ratings yet', 'it...",Simple yet with great replayability. In my opi...,True,1250,2011-11-05,2011,2
1,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"{'funny': '', 'helpful': 'No ratings yet', 'it...",It's unique and worth a playthrough.,True,22200,2011-07-15,2011,2
2,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"{'funny': '', 'helpful': 'No ratings yet', 'it...",Great atmosphere. The gunplay can be a bit chu...,True,43110,2011-04-21,2011,2
3,js41637,http://steamcommunity.com/id/js41637,"{'funny': '', 'helpful': '15 of 20 people (75%...",I know what you think when you see this title ...,True,251610,2014-06-24,2014,2
4,js41637,http://steamcommunity.com/id/js41637,"{'funny': '', 'helpful': '0 of 1 people (0%) f...",For a simple (it's actually not all that simpl...,True,227300,2013-09-08,2013,2


In [6]:
# Borramos las columnas que no necesitamos 
del Reviews_sep['reviews']
del Reviews_sep['review']
del Reviews_sep['user_url']

In [7]:
# Podemos ordenar el dataframe para tener una mejor visibilidad de los datos
Reviews_sep.sort_values(by = 'sentiment_analysis').head()

Unnamed: 0,user_id,recommend,item_id,review_date,review_year,sentiment_analysis
36414,76561198086722947,False,242860,2015-09-02,2015,0
54666,76561198081933171,True,221100,2014-01-04,2014,0
54665,76561198081924175,False,49520,2014-07-25,2014,0
54664,76561198081923658,True,570,2014-09-09,2014,0
27536,CowOnWeed,True,220,2015-04-16,2015,0


In [8]:
# Convertimos el dataframe Reviews_sep a formato parquet
Reviews_sep.to_parquet('DB Steam/Reviews_sep_sentiment.parquet', index = False)
