# Sentiment Analysis


In [1]:
# Import the necessary packages

import pandas as pd
from textblob import TextBlob 
from ibm_watson import ToneAnalyzerV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson import ApiException

In [2]:
# Input the dataset

df = pd.read_csv("Required Data.csv",sep='\t')

## Find the sentiments

In [3]:
sentiment_data = df['processed_text'].apply(lambda x : TextBlob(str(x)).sentiment) # Use textblob to find the sentiment

df['sentiment'] = sentiment_data.apply(lambda x:"Positive" if x[0]>0 else ("Neutral" if x[0] == 0 else "Negative"))
df['subjectivity'] = sentiment_data.apply(lambda x:x[1])

Here subjectivity shows whether the tweet is based on factual information or public opinion.

A value closer to zero shows factual information.

A value closer to one shows public opinion.

In [4]:
df.head(5) #First 5 data

Unnamed: 0,id,processed_text,sentiment,subjectivity
0,1242602093501800448,yeah missing freedom life covid19,Negative,0.05
1,1242602237571919872,contribute cm relief fund help delhi govt figh...,Neutral,0.0
2,1242602411962912769,bhai assalamualaikum possible please call bhai...,Negative,0.441667
3,1242602425023787008,bold adress nation activity banned except esse...,Positive,0.473333
4,1242602501284585472,please understand important stay home responsi...,Positive,0.538889


In [5]:
# Save the data

df.to_csv("Sentiment Polarity Data.csv", sep='\t', encoding='utf-8',index=False)

## IBM Tone Analyzer

IBM Tone analyzer allows us to analyse only 2500 tweets for free. 

So the tweets are split into 5 parts (corresponding to each phases of lockdown) with 500 tweets each.

In [15]:
# Read the data

df = pd.read_csv("Processed Data.csv",sep='\t')

In [16]:
df.head() #First 5 data

Unnamed: 0,date,time,username,to,replies,retweets,favorites,text,mentions,hashtags,id,permalink,processed_text
0,2020-03-25,00:00:00,Jaja_Ricky,annamzep,0,0,0,Yeah.. Missing my freedom life with COVID-19 ...,,,1242602093501800448,https://twitter.com/Jaja_Ricky/status/12426020...,yeah missing freedom life covid19
1,2020-03-25,00:00:34,RitikGupta1999,,0,7,9,You can contribute to CM Relief Fund and help ...,,#coronavirus,1242602237571919872,https://twitter.com/RitikGupta1999/status/1242...,contribute cm relief fund help delhi govt figh...
2,2020-03-25,00:01:16,ShaikhNadir5,BeingSalmanKhan,0,0,0,Bhai Assalamualaikum if possible please call b...,,,1242602411962912769,https://twitter.com/ShaikhNadir5/status/124260...,bhai assalamualaikum possible please call bhai...
3,2020-03-25,00:01:19,Vasu38,Dev15Sehgal,0,0,1,After PM’s bold Adress to Nation all activitie...,,,1242602425023787008,https://twitter.com/Vasu38/status/124260242502...,bold adress nation activity banned except esse...
4,2020-03-25,00:01:37,reemag1985,,0,0,0,"Please understand, why it is important to stay...",,#Stayhomestaysafe,1242602501284585472,https://twitter.com/reemag1985/status/12426025...,please understand important stay home responsi...


In [18]:
# Divide the data on the basis of lockdown phases and take 500 samples from each of them

lockdown1 = df[(df['date'] >= '2020-03-25') & (df['date'] <= '2020-04-14')].sample(500)

lockdown2 = df[(df['date'] >= '2020-04-15') & (df['date'] <= '2020-05-03')].sample(500)

lockdown3 = df[(df['date'] >= '2020-05-04') & (df['date'] <= '2020-05-17')].sample(500)

lockdown4 = df[(df['date'] >= '2020-05-18') & (df['date'] <= '2020-05-31')].sample(500)

unlock1 = df[(df['date'] >= '2020-06-01') & (df['date'] <= '2020-06-14')].sample(500)

In [19]:
lockdown1

Unnamed: 0,date,time,username,to,replies,retweets,favorites,text,mentions,hashtags,id,permalink,processed_text
59783,2020-03-30,05:32:00,haritsinhgohil,,0,0,0,@timesofindia @akshaykumar Over Obsession of A...,@timesofindia @akshaykumar,,1244497585462640640,https://twitter.com/haritsinhgohil/status/1244...,USER_MENTION USER_MENTION obsession america an...
184690,2020-04-12,11:46:03,kobikashok,,0,1,1,CORONAVIRUS PANDEMIC COVIT-19 LIVE WORLD MAP/C...,,,1249302757925773312,https://twitter.com/kobikashok/status/12493027...,coronavirus pandemic covit19 live world india ...
18261,2020-03-26,12:46:49,mishra_sbr,,0,0,0,@mishra_sbr What To do in21 days Do meditation...,@mishra_sbr,,1243157458899456000,https://twitter.com/mishra_sbr/status/12431574...,USER_MENTION in21 day meditation pranayam sing...
117379,2020-04-05,07:08:31,Nitishah45,,9,0,78,My cooking skill is getting better #lockdown #...,,#lockdown #Quarantine,1246696200159391744,https://twitter.com/Nitishah45/status/12466962...,cooking skill getting better lockdown quarantine
111251,2020-04-04,11:44:06,mohamed65962472,,0,0,0,Dear @narendramodi im seeking your reply to my...,@narendramodi @SushmaSwaraj @asadowaisi,#TabligiJamaat #Quarantined,1246403167362920449,https://twitter.com/mohamed65962472/status/124...,dear USER_MENTION im seeking reply tweet uncle...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
40955,2020-03-28,12:57:53,ANILBKAPUR,,0,0,0,.@RahulGandhi Highly impressed to see your con...,@RahulGandhi @narendramodi @INCIndia,,1243885019212349440,https://twitter.com/ANILBKAPUR/status/12438850...,USER_MENTION highly impressed see concern poor...
190499,2020-04-13,07:06:53,mike_hardman,,0,0,0,Trump really trying very hard to keep the numb...,,,1249594895062327297,https://twitter.com/mike_hardman/status/124959...,trump really trying hard keep number covid19 i...
205206,2020-04-14,15:41:13,rahari2k,ranganaathan,0,0,0,"Well said sir. If Congi + DMK in Power, then t...",,,1250086718734123008,https://twitter.com/rahari2k/status/1250086718...,well said sir congi dmk power prevented first ...
115045,2020-04-05,00:10:47,dubaisoccer,piersmorgan,0,0,0,FFS-just copy everything South Korea and China...,,,1246591073326993408,https://twitter.com/dubaisoccer/status/1246591...,ffsjust copy everything south korea china done...


In [21]:
combined_df = pd.concat([lockdown1,lockdown2,lockdown3,lockdown4,unlock1], ignore_index=True)  # Combine the data to easily analyse the tone

In [22]:
combined_df

Unnamed: 0,date,time,username,to,replies,retweets,favorites,text,mentions,hashtags,id,permalink,processed_text
0,2020-03-30,05:32:00,haritsinhgohil,,0,0,0,@timesofindia @akshaykumar Over Obsession of A...,@timesofindia @akshaykumar,,1244497585462640640,https://twitter.com/haritsinhgohil/status/1244...,USER_MENTION USER_MENTION obsession america an...
1,2020-04-12,11:46:03,kobikashok,,0,1,1,CORONAVIRUS PANDEMIC COVIT-19 LIVE WORLD MAP/C...,,,1249302757925773312,https://twitter.com/kobikashok/status/12493027...,coronavirus pandemic covit19 live world india ...
2,2020-03-26,12:46:49,mishra_sbr,,0,0,0,@mishra_sbr What To do in21 days Do meditation...,@mishra_sbr,,1243157458899456000,https://twitter.com/mishra_sbr/status/12431574...,USER_MENTION in21 day meditation pranayam sing...
3,2020-04-05,07:08:31,Nitishah45,,9,0,78,My cooking skill is getting better #lockdown #...,,#lockdown #Quarantine,1246696200159391744,https://twitter.com/Nitishah45/status/12466962...,cooking skill getting better lockdown quarantine
4,2020-04-04,11:44:06,mohamed65962472,,0,0,0,Dear @narendramodi im seeking your reply to my...,@narendramodi @SushmaSwaraj @asadowaisi,#TabligiJamaat #Quarantined,1246403167362920449,https://twitter.com/mohamed65962472/status/124...,dear USER_MENTION im seeking reply tweet uncle...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,2020-06-01,22:04:17,furqantsiddiqui,,0,2,3,Moderated a webinar on Safety & precautions am...,@UPAPakistan @smubashirimam,#Covid19,1267577738359119875,https://twitter.com/furqantsiddiqui/status/126...,moderated webinar safety precaution amid covid...
2496,2020-06-03,17:14:45,LadakhTimes,,0,0,1,"Press Release Containment of Shenam area Leh, ...",,,1268229650083348480,https://twitter.com/LadakhTimes/status/1268229...,press release containment shenam area leh june...
2497,2020-06-11,23:43:02,Hire_Prashanth,,0,0,0,Twitter shuts down huge Chinese network with a...,,,1271226466844282883,https://twitter.com/Hire_Prashanth/status/1271...,twitter shuts huge chinese network coronavirus...
2498,2020-06-07,14:19:37,RahulGiri143245,,0,0,0,LOCKDOWN 4.0 song #out_now full video only on ...,,#out_now #Lahari_film_production #B_Brand_Team...,1269635127186989057,https://twitter.com/RahulGiri143245/status/126...,lockdown song out_now full video youtube chann...


In [29]:
tone_analyzer = ToneAnalyzerV3(version='2017-09-21',authenticator=IAMAuthenticator("Api Key")) #Initialise Tone Analyzer

tone_analyzer.set_service_url("Api Url")  # Replace Api Url & Api Key with your credentials 
tone_analyzer.set_disable_ssl_verification(False)

In [30]:
tones = []
for item in combined_df['processed_text']:
    try:
        tone_analysis = tone_analyzer.tone({'text': str(item)}, content_type='application/json').get_result()
        tones.append(tone_analysis)  # Use IBM Tone Analyzer to find the tone and append this data to a list
    except ApiException as ex:         
        tones.append(None)
        print ("Method failed with status code " + str(ex.code) + ": " + ex.message)  # Print the failure reason (If any occurs)

In [31]:
tones

[{'document_tone': {'tones': [{'score': 0.589357,
     'tone_id': 'sadness',
     'tone_name': 'Sadness'}]}},
 {'document_tone': {'tones': [{'score': 0.912588,
     'tone_id': 'confident',
     'tone_name': 'Confident'}]}},
 {'document_tone': {'tones': []}},
 {'document_tone': {'tones': [{'score': 0.526812,
     'tone_id': 'joy',
     'tone_name': 'Joy'}]}},
 {'document_tone': {'tones': [{'score': 0.735644,
     'tone_id': 'confident',
     'tone_name': 'Confident'}]}},
 {'document_tone': {'tones': [{'score': 0.696169,
     'tone_id': 'joy',
     'tone_name': 'Joy'}]}},
 {'document_tone': {'tones': [{'score': 0.69054,
     'tone_id': 'sadness',
     'tone_name': 'Sadness'},
    {'score': 0.653099, 'tone_id': 'analytical', 'tone_name': 'Analytical'}]}},
 {'document_tone': {'tones': [{'score': 0.783196,
     'tone_id': 'joy',
     'tone_name': 'Joy'}]}},
 {'document_tone': {'tones': [{'score': 0.618842,
     'tone_id': 'anger',
     'tone_name': 'Anger'},
    {'score': 0.579436, 'tone_id

In [72]:
tone_df = pd.DataFrame(tones) # Create a pandas dataframe from the data

In [73]:
tone_df['document_tones'] = tone_df.document_tone.apply(lambda x: x['tones'])  #Split the tones into individual rows
tone_df.drop('document_tone',axis = 1,inplace = True)

In [74]:
tone_df.head()

Unnamed: 0,sentences_tone,document_tones
0,,"[{'score': 0.589357, 'tone_id': 'sadness', 'to..."
1,,"[{'score': 0.912588, 'tone_id': 'confident', '..."
2,,[]
3,,"[{'score': 0.526812, 'tone_id': 'joy', 'tone_n..."
4,,"[{'score': 0.735644, 'tone_id': 'confident', '..."


In [75]:
# split sentiments in corresponding columns

length_df = len(tone_df)
for i in range(length_df):
    for j in range(len(tone_df.loc[i, 'document_tones'])):
        doc_tone = tone_df.loc[i, 'document_tones'][j]
        source = doc_tone['tone_id']
        tone_df.loc[i, source] = 1
        
tone_df.head()

Unnamed: 0,sentences_tone,document_tones,sadness,confident,joy,analytical,anger,tentative,fear
0,,"[{'score': 0.589357, 'tone_id': 'sadness', 'to...",1.0,,,,,,
1,,"[{'score': 0.912588, 'tone_id': 'confident', '...",,1.0,,,,,
2,,[],,,,,,,
3,,"[{'score': 0.526812, 'tone_id': 'joy', 'tone_n...",,,1.0,,,,
4,,"[{'score': 0.735644, 'tone_id': 'confident', '...",,1.0,,,,,


In [83]:
result = pd.concat([combined_df, tone_df], axis=1)  # Combine the data
result

Unnamed: 0,date,time,username,to,replies,retweets,favorites,text,mentions,hashtags,...,processed_text,sentences_tone,document_tones,sadness,confident,joy,analytical,anger,tentative,fear
0,2020-03-30,05:32:00,haritsinhgohil,,0,0,0,@timesofindia @akshaykumar Over Obsession of A...,@timesofindia @akshaykumar,,...,USER_MENTION USER_MENTION obsession america an...,,"[{'score': 0.589357, 'tone_id': 'sadness', 'to...",1.0,,,,,,
1,2020-04-12,11:46:03,kobikashok,,0,1,1,CORONAVIRUS PANDEMIC COVIT-19 LIVE WORLD MAP/C...,,,...,coronavirus pandemic covit19 live world india ...,,"[{'score': 0.912588, 'tone_id': 'confident', '...",,1.0,,,,,
2,2020-03-26,12:46:49,mishra_sbr,,0,0,0,@mishra_sbr What To do in21 days Do meditation...,@mishra_sbr,,...,USER_MENTION in21 day meditation pranayam sing...,,[],,,,,,,
3,2020-04-05,07:08:31,Nitishah45,,9,0,78,My cooking skill is getting better #lockdown #...,,#lockdown #Quarantine,...,cooking skill getting better lockdown quarantine,,"[{'score': 0.526812, 'tone_id': 'joy', 'tone_n...",,,1.0,,,,
4,2020-04-04,11:44:06,mohamed65962472,,0,0,0,Dear @narendramodi im seeking your reply to my...,@narendramodi @SushmaSwaraj @asadowaisi,#TabligiJamaat #Quarantined,...,dear USER_MENTION im seeking reply tweet uncle...,,"[{'score': 0.735644, 'tone_id': 'confident', '...",,1.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,2020-06-01,22:04:17,furqantsiddiqui,,0,2,3,Moderated a webinar on Safety & precautions am...,@UPAPakistan @smubashirimam,#Covid19,...,moderated webinar safety precaution amid covid...,,"[{'score': 0.687768, 'tone_id': 'analytical', ...",,,,1.0,,,
2496,2020-06-03,17:14:45,LadakhTimes,,0,0,1,"Press Release Containment of Shenam area Leh, ...",,,...,press release containment shenam area leh june...,,"[{'score': 0.592138, 'tone_id': 'confident', '...",,1.0,,1.0,,,
2497,2020-06-11,23:43:02,Hire_Prashanth,,0,0,0,Twitter shuts down huge Chinese network with a...,,,...,twitter shuts huge chinese network coronavirus...,,[],,,,,,,
2498,2020-06-07,14:19:37,RahulGiri143245,,0,0,0,LOCKDOWN 4.0 song #out_now full video only on ...,,#out_now #Lahari_film_production #B_Brand_Team...,...,lockdown song out_now full video youtube chann...,,"[{'score': 0.538448, 'tone_id': 'analytical', ...",,,,1.0,,,


In [84]:
# Save the data

result.to_csv("Sentiment Data.csv", sep='\t', encoding='utf-8',index=False)

In [88]:
# Split the data and save it for different lockdown phases

lockdown1tone = result[(result['date'] >= '2020-03-25') & (result['date'] <= '2020-04-14')].sort_values(by=['date', 'time'])
lockdown1tone.to_csv("Lockdown 1 Tone.csv", sep='\t', encoding='utf-8',index=False)

lockdown2tone = result[(result['date'] >= '2020-04-15') & (result['date'] <= '2020-05-03')].sort_values(by=['date', 'time'])
lockdown2tone.to_csv("Lockdown 2 Tone.csv", sep='\t', encoding='utf-8',index=False)

lockdown3tone = result[(result['date'] >= '2020-05-04') & (result['date'] <= '2020-05-17')].sort_values(by=['date', 'time'])
lockdown3tone.to_csv("Lockdown 3 Tone.csv", sep='\t', encoding='utf-8',index=False)

lockdown4tone = result[(result['date'] >= '2020-05-18') & (result['date'] <= '2020-05-31')].sort_values(by=['date', 'time'])
lockdown4tone.to_csv("Lockdown 4 Tone.csv", sep='\t', encoding='utf-8',index=False)

unlock1tone = result[(result['date'] >= '2020-06-01') & (result['date'] <= '2020-06-14')].sort_values(by=['date', 'time'])
unlock1tone.to_csv("Unlock 1 Tone.csv", sep='\t', encoding='utf-8',index=False)