# Sentiment analysis using Amazon Comprehend

This notebook contains the script to perform a sentiment analysis using Amazon Comprehend. 
We will run the sentiment analysis for all the clean tweets (tweets with only keywords of one category) to extract the sentiment towards that BC category. 

When running this notebook, we saved the output in "AWScomprenhend_output"

In [1]:
import pandas as pd
from collections import OrderedDict
import requests
import boto3 
comprehend = boto3.client('comprehend', region_name='us-east-1')
import os
path=os.getcwd()
import timeit

**Note that the average time to process one tweet is 0.0591 seconds**

In [17]:
###########
# LNG IUD #
###########

df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/LNG-IUD_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()

print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/LNG-IUD_AWScomprehend_complete.csv")

Time:  699.2440515314229


In [19]:
##############
# Copper IUD #
##############

df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/copperIUD_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()


print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/copperIUD_AWScomprehend_complete.csv")

Time:  1074.938692579046


In [20]:
#############
# The Patch #
############
df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/Patch_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()


print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/Patch_AWScomprehend_complete.csv")

Time:  846.8812921629287


In [21]:
############
# The Ring #
############
df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/Ring_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()

print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/Ring_AWScomprehend_complete.csv")

Time:  3390.8262455840595


In [22]:
###########
# Implant #
###########
df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/Implant_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()


print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/Implant_AWScomprehend_complete.csv")

Time:  4820.4766345359385


In [23]:
############
# The pill #
############

df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/Pill_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()


print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/Pill_AWScomprehend_complete.csv")

Time:  5920.48179143481


In [None]:
###########
# TheShot #
###########
df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/Shot_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()

print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/Shot_AWScomprehend_complete.csv")


Time:  7641.2541323899995


In [None]:
#######
# IUD #
#######

df = pd.read_csv('/home/ec2-user/SageMaker/CleanAndAggregateTweets/IUD_CleanTweets.txt',
                 sep = '\t' )
len(df.text)

start = timeit.default_timer()
dfTweet = pd.DataFrame(columns=["tweets" ,"sentiments" ,"positive" ,"negative" ,"neutral", "mixed" ])


for i in range(len(df.text)):  
    #print(i) 
    if pd.notna(df.text[i]):
        res = comprehend.detect_sentiment(Text=df.text[i] , LanguageCode='en')
        s = res.get('Sentiment')
        p = res.get('SentimentScore')['Positive']
        neg = res.get('SentimentScore')['Negative']
        neu = res.get('SentimentScore')['Neutral']
        mix = res.get('SentimentScore')['Mixed']

        
        dfTweet = dfTweet.append({"tweets": df.text[i],"sentiments": s, 'positive': p, 'negative': neg, 
                                 'neutral': neu, 'mixed': mix},ignore_index=True)

stop = timeit.default_timer()


print('Time: ', stop - start)  

dfTweet.to_csv(path_or_buf=path+'/'+"AWScomprenhend_output/IUD_AWScomprehend_complete.csv")

Time:  20650.387392189
