In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Introduction

Sentiment analysis refers to the use of natural language processing, text analysis, computational linguistics, and many more to identify and quantify the sentiment of some kind of text or audio.


There are two major techniques for sentiment analysis :-

• Supervised machine learning

• Unsupervised lexicon-based 


Often, you may not have the convenience of a well-labeled training dataset. 
In those situations, you need to use unsupervised techniques for predicting the sentiment by using knowledgebases, ontologies, databases, and lexicons that have detailed information specially curated and prepared just for sentiment analysis.


Here we use unsupervised lexicon based approach based on Vader lexicon for sentiment analysis.

In [None]:
# install vader if not already available
!pip install vaderSentiment

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

In [None]:
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(score)))

In [None]:
sentiment_analyzer_scores("I am doing good!.")

# Punctuation
The use of an exclamation mark(!), increases the magnitude of the intensity without modifying the semantic orientation. 
For example, “The food here is good!” is more intense than “The food here is good.” and an increase in the number of (!), increases the magnitude accordingly.

In [None]:
sentiment_analyzer_scores("The phone is super cool!")
sentiment_analyzer_scores("The phone is super cool!!")
sentiment_analyzer_scores("The phone is super cool!!!")

# Capitalization

Using upper case letters to emphasize a sentiment-relevant word in the presence of other non-capitalized words, increases the magnitude of the sentiment intensity. 
For example, “The food here is GREAT!” conveys more intensity than “The food here is great!”

In [None]:
sentiment_analyzer_scores("The phone is super COOL!")

# Degree Modifiers

Also called intensifiers, they impact the sentiment intensity by either increasing or decreasing the intensity. For example, “The service here is extremely good” is more intense than “The service here is good”, whereas “The service here is marginally good” reduces the intensity.

In [None]:
sentiment_analyzer_scores("Food here is good.")
sentiment_analyzer_scores("Food here is moderately good.")
sentiment_analyzer_scores("Food here is extremely good.")

# Conjunctions

Use of conjunctions like “but” signals a shift in sentiment polarity, with the sentiment of the text following the conjunction being dominant. “The food here is great, but the service is horrible” has mixed sentiment, with the latter half dictating the overall rating.

In [None]:
sentiment_analyzer_scores("Food here is extremely good but service is horrible.")

# Preceding Tri-gram

By examining the tri-gram preceding a sentiment-laden lexical feature, we catch nearly 90% of cases where negation flips the polarity of the text. A negated sentence would be “The food here isn’t really all that great”.

In [None]:
sentiment_analyzer_scores("The food here isn’t really all that great")
sentiment_analyzer_scores("The food here isn’t that great")

sentiment_analyzer_scores("The food here is not really all that great")
sentiment_analyzer_scores("The food here is not that great")


# Handling Emojis, Slangs and Emoticons

VADER performs very well with emojis, slangs and acronyms in sentences. 

Let us see each with an example.

In [None]:
print(sentiment_analyzer_scores('I am 😄 today'))
print(sentiment_analyzer_scores('😊'))
print(sentiment_analyzer_scores('😥'))
print(sentiment_analyzer_scores('☹️'))

# Slangs

In [None]:
print(sentiment_analyzer_scores("Today SUX!"))
print(sentiment_analyzer_scores("Today only kinda sux! But I'll get by, lol"))

# Emoticons

In [None]:
print(sentiment_analyzer_scores("Make sure you :) or :D today!"))

# Processing Multiple Sentences

In [None]:
import pandas as pd

scores =[]
sentences = ["A really bad, horrible book.","A good, awesome, wonderful, cool book !!!  :)"]

for sentence in sentences:
    score = analyser.polarity_scores(sentence)
    scores.append(score)
    
#Converting List of Dictionaries into Dataframe
dataFrame= pd.DataFrame(scores)

print(dataFrame)

print("Overall Sentiment Score for the multiple sentences :- ",dataFrame.mean())

In [None]:
df=pd.read_csv("/kaggle/input/gameloft-android-games-collection-2022/data.csv")
r=pd.read_csv("/kaggle/input/gameloft-android-games-collection-2022/reviews.csv")

In [None]:
com.gameloft.android.ANMP.GloftA8HM

In [None]:
as8=r[r['appId']=="com.gameloft.android.ANMP.GloftA8HM"]['reviews']

In [None]:
import ast

In [None]:
scores =[]
sentences = data

for sentence in sentences:
    score = analyser.polarity_scores(sentence)
    scores.append((sentence,score))
    
#Converting List of Dictionaries into Dataframe
dataF= pd.DataFrame(scores)

In [None]:
dataF[1][0]

In [None]:
new_df = (pd.DataFrame(dataF[1].tolist(), index=dataF[0])
            .reset_index()
            .reset_index(drop=True))
new_df

In [None]:
def Process(df,reviews):
    out=False
    app_list=list(df['appId'])
    for appId in app_list:
        review_list=get_reviews(appId,reviews)
        scores= get_scores(appId,review_list)
        if out==False:
            result=scores.copy()
            out=True
        else :
            result=result.append(scores)
        
    result=result.rename(columns={0: "reviews"})
    result=result.reset_index()
        
    return result

In [None]:
def get_reviews(appId,reviews):
    appReviews=reviews[reviews['appId']==appId]['reviews']
    appReviews=list(appReviews)[0]
    appReviews=ast.literal_eval(appReviews)
    return appReviews

In [None]:
def get_scores(appId,review_list):
    scores =[]
    sentences = review_list
    
    for sentence in sentences:
        score = analyser.polarity_scores(sentence)
        scores.append((sentence,score))
        
    dataF= pd.DataFrame(scores)
    new_df = (pd.DataFrame(dataF[1].tolist(), index=dataF[0])
            .reset_index()
            .reset_index(drop=True))

    new_df['appId']=[appId]*len(review_list)
    
    return new_df

In [None]:
fggg=Process(df,r)
fggg

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
fggg

In [None]:
sns.set(rc={'figure.figsize':(25,30)})
for i, column in enumerate(["neg","neu","pos","compound"], 1):
    plt.subplot(4,1,i)
    p=sns.histplot(x=column,data=fggg,hue="appId",stat='count',kde=True,palette='magma')
    p.legend(loc='center left', bbox_to_anchor=(1, 1),ncol=4)

In [None]:
fggg.groupby("appId")["neg","neu","pos","compound"].mean()

## Note

Please share, upvote and comment to help me create and share more content for the community.
Thank you all.