# Sentiment analysis 



### Load necessary libraries

In [1]:
# import necessary libraries
import boto3
#!pip install nltk
import nltk
nltk.download('punkt')
import pandas as pd
import os

[nltk_data] Downloading package punkt to /home/sagemaker-
[nltk_data]     user/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Load the input file from S3

In [2]:
"""Accessing the S3 buckets using boto3 client"""
s3_client =boto3.client('s3')
s3_bucket_name='jpg-sentimentanalysis'
key_name = 'Twitter_Data.csv'

# Data taken from https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset?select=Twitter_Data.csv

In [13]:
# Create the S3 object
obj = s3_client.get_object(
    Bucket = s3_bucket_name,
    Key = key_name
)
    
# Read data from the S3 object
df = pd.read_csv(obj['Body'], nrows=10)
df.head()  

Unnamed: 0,clean_text,category
0,when modi promised “minimum government maximum...,-1
1,talk all the nonsense and continue all the dra...,0
2,what did just say vote for modi welcome bjp t...,1
3,asking his supporters prefix chowkidar their n...,1
4,answer who among these the most powerful world...,1


### Prepare the file

In [47]:
# drop rows with NAs
df_nona = df.dropna()

# drop rows with category -1
df = df[df.category != -1]

df.tail(10)

Unnamed: 0,clean_text,category
1,talk all the nonsense and continue all the dra...,0
2,what did just say vote for modi welcome bjp t...,1
3,asking his supporters prefix chowkidar their n...,1
4,answer who among these the most powerful world...,1
5,kiya tho refresh maarkefir comment karo,0
6,surat women perform yagna seeks divine grace f...,0
7,this comes from cabinet which has scholars lik...,0
8,with upcoming election india saga going import...,1
9,gandhi was gay does modi,1


In [48]:
df.shape

(9, 2)

### Initialise the AWS comprehend module

In [29]:
#initialize comprehend module
comprehend = boto3.client(service_name='comprehend', region_name='eu-west-1')

#### Test comprehend sentiment detector

In [49]:
#comprehend.detect_sentiment(Text='A Happy life', LanguageCode='en')

comprehend.detect_sentiment(Text='Enhorabuena', LanguageCode='es')

{'Sentiment': 'POSITIVE',
 'SentimentScore': {'Positive': 0.9997196793556213,
  'Negative': 0.00011877580982400104,
  'Neutral': 0.0001478759804740548,
  'Mixed': 1.362422790407436e-05},
 'ResponseMetadata': {'RequestId': '34aae987-2b7a-4da5-8391-e0be74ca20ad',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '34aae987-2b7a-4da5-8391-e0be74ca20ad',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '166',
   'date': 'Fri, 23 Jul 2021 07:54:27 GMT'},
  'RetryAttempts': 0}}

### Create a function to get the sentiment

In [53]:
def get_sentiment(tweets_df):
        
    dfsent = pd.DataFrame(columns=['tweet','category','sentiment','score_pos','score_neg','score_mxd','score_ntrl'])

    index = 0
    for i, row in tweets_df.iterrows(): # each row of the datframe contains a section at the required level of granularity

        # get the document, section name for reporting back the flagged sentence
        dfsent.loc[index,'tweet'] = row['clean_text']
        dfsent.loc[index,'category'] = row['category']
        sentence = row['clean_text']
       
        # call AWS comprehend for sentiment prediction
        senti_response = comprehend.detect_sentiment(Text=sentence, LanguageCode='en')

        # parse the json response
        dfsent.loc[index,'sentiment'] = (senti_response['Sentiment'])
        dfsent.loc[index,'score_pos'] = (senti_response['SentimentScore']['Positive'])
        dfsent.loc[index,'score_neg'] = (senti_response['SentimentScore']['Negative'])
        dfsent.loc[index,'score_ntrl'] = (senti_response['SentimentScore']['Neutral'])
        dfsent.loc[index,'score_mxd'] = (senti_response['SentimentScore']['Mixed'])
        #sentiment_per_sentence.append(senti)
        index = index + 1      

    return(dfsent)

### Get the sentiment

In [54]:
results = pd.DataFrame()
results = get_sentiment(df)

In [55]:
results.head()

Unnamed: 0,tweet,category,sentiment,score_pos,score_neg,score_mxd,score_ntrl
0,talk all the nonsense and continue all the dra...,0,NEGATIVE,0.019644,0.670815,0.029622,0.279919
1,what did just say vote for modi welcome bjp t...,1,NEUTRAL,0.028299,0.027591,0.001225,0.942885
2,asking his supporters prefix chowkidar their n...,1,MIXED,0.015248,0.175266,0.772895,0.036591
3,answer who among these the most powerful world...,1,NEUTRAL,0.161099,0.149261,0.098815,0.590826
4,kiya tho refresh maarkefir comment karo,0,NEUTRAL,0.002847,0.025781,1.2e-05,0.97136
5,surat women perform yagna seeks divine grace f...,0,NEUTRAL,0.260554,0.011546,0.005131,0.722768
6,this comes from cabinet which has scholars lik...,0,NEUTRAL,0.095931,0.193855,0.047301,0.662914
7,with upcoming election india saga going import...,1,NEUTRAL,0.03936,0.040302,0.002582,0.917755
8,gandhi was gay does modi,1,NEUTRAL,0.160447,0.053508,0.000785,0.785261
