In [None]:
# requirement

# pip install -U textblob
# pip install vaderSentiment

In [None]:
# import SentimentIntensityAnalyzer class
# from vaderSentiment.vaderSentiment module.
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import pandas as pd
import re

In [None]:
# create word cloud
from wordcloud import WordCloud, STOPWORDS
from PIL import Image
import numpy as np
def create_wordcloud(text,name):
    mask = np.array(Image.open("sentiment_analysis/cloud.png"))

    stopwords = set(STOPWORDS)
    stopwords.add('s')
    stopwords.add('S')
    wc = WordCloud(background_color="white",
    mask = mask,
    random_state=40,
    max_font_size=150,
    max_words=2500,
    stopwords=stopwords,
    repeat=True)
    wc.generate(str(text))
    wc.to_file(name+".png")
    print("Word Cloud Saved Successfully")

In [None]:
# clean text
def clean_text(content):
	# remove @.
	content = re.sub(r"@\w+","",str(content))
	# remove links
	content = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", str(content)) 
	# remove hashtag sign 
	content = str(content).replace("#", "").replace("_", " ") 
	# remove \n\t..
	content = re.sub(r"\r?\n|\r","",str(content))
	return content



In [None]:
# textblob
def sentiment_scores_txtblob(file):
    # create TextBlob object of passed tweet text
    for index,tweet in file['tweet_clean'].iteritems():
        tweet = re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)","",tweet)
        analysis = TextBlob(tweet)
        #file.loc[index,'polarity'] = analysis.sentiment.polarity
        #file.loc[index,'subjectivity'] = analysis.sentiment.subjectivity
        # set sentiment
        if analysis.sentiment.polarity > 0:
            file.loc[index,'sentiment_txtblob']= 'positive'
        elif analysis.sentiment.polarity == 0:
            file.loc[index,'sentiment_txtblob']= 'neutral'
        else:
            file.loc[index,'sentiment_txtblob']= 'negative'

In [None]:

# VADER

def sentiment_scores_vader(file):
	for index,tweet in file['tweet'].iteritems():
		# Create a SentimentIntensityAnalyzer object.
		sid_obj = SentimentIntensityAnalyzer()
		
		# polarity_scores method of SentimentIntensityAnalyzer
		# object gives a sentiment dictionary.
		# which contains pos, neg, neu, and compound scores.
		sentiment_dict = sid_obj.polarity_scores(tweet)
		'''
		print("Overall sentiment dictionary is : ", sentiment_dict)
		print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
		print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
		print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")
		print("Sentence Overall Rated As", end = " ")
		'''
		file.loc[index,'neg'] = sentiment_dict['neg']
		file.loc[index,'neu'] = sentiment_dict['neu']
		file.loc[index,'pos'] = sentiment_dict['pos']
		file.loc[index,'comp'] = sentiment_dict['compound']

		# decide sentiment as positive, negative and neutral
		if sentiment_dict['compound'] >= 0.05 :
			file.loc[index,'sentiment_vader']= 'positive'

		elif sentiment_dict['compound'] <= - 0.05 :
			file.loc[index,'sentiment_vader']= 'negative'

		else :
			file.loc[index,'sentiment_vader']= 'neutral'


In [None]:
# filename need to be changed
# filename = data.csv''
# e.g: filename='early/US.csv'

filename = "/Users/xihuang/Desktop/SocialNetwork-TeamProject/data/top10_country/early/873_Colombia.csv"
file = pd.read_csv(filename,index_col=False, error_bad_lines=False, engine='python',encoding="utf-8")
file = file.drop(columns=['user_id', 'user_created_at', 'user_name', 'user_source',
                                  'user_verified', 'user_favourites_count', 'user_followers_count',
                                  'user_friends_count','tweet_is_retweet'])
file['tweet'] = list(map(lambda tweet:clean_text(tweet), list(file['tweet'])))


In [None]:
# apply vader sentiment
sentiment_scores_vader(file)
print(file)
# extract pos,neg,neu tweets
list_negative = file[file["sentiment_vader"]=="negative"]
list_positive = file[file["sentiment_vader"]=="positive"]
list_neutral = file[file["sentiment_vader"]=="neutral"]
#Creating wordcloud for all tweets
create_wordcloud(list_positive['tweet'].values,"pos")
create_wordcloud(list_neutral['tweet'].values,"neu")
create_wordcloud(list_negative['tweet'].values,"neg")

Result

In [None]:
from pyecharts import options as opts

# count percentage for sentiment
def count_values_in_column(data,feature):
 total=data.loc[:,feature].value_counts(dropna=False)
 percentage=round(data.loc[:,feature].value_counts(dropna=False,normalize=True)*100,2)
 return pd.concat([total,percentage],axis=1,keys=["Total","Percentage"])
#Count_values for sentiment
print(count_values_in_column(file,"sentiment_vader"))


Visualizations

In [None]:
# create bar by countries
from pyecharts.charts import Bar
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.render import make_snapshot

def createSentiment(positive ,negative,neutral,name):
    bar = (
            Bar(init_opts=opts.InitOpts(width="1000px",theme=ThemeType.INFOGRAPHIC))
            .add_xaxis(['Early','Middle','Late'])
            .add_yaxis("Position",
                positive
                )
            .add_yaxis("Negative",
               negative
               )
             .add_yaxis("Neutral",
                neutral
                )
            .set_series_opts(label_opts=opts.LabelOpts(
                font_size=12,
                font_weight="bold",
            ))
            .set_global_opts(
                legend_opts=opts.LegendOpts(
                    pos_left='right',    # 图例放置的位置，分上下左右，可用左右中表示，也可用百分比表示
                    pos_top='top',
                    orient='vertical',   # horizontal、vertical #图例放置的方式 横着放or竖着放
                    padding=30,
                    textstyle_opts=opts.TextStyleOpts(
                    font_size=12,
                    font_weight='bold',
                    color="#942c47"
                    
                ),
            ),
            title_opts=opts.TitleOpts(
                title="Sentiment Analysis(Percentage)",
                subtitle = "Country:"+name,
                pos_left='center',
                padding=20,
                subtitle_textstyle_opts= opts.TextStyleOpts(
                    color="#eb4c49",
                    font_weight='bold',
                    font_size=15),
                title_textstyle_opts= opts.TextStyleOpts(
                    color="#942c47",
                    font_weight='bold',
                    font_size=20)
            ),
            xaxis_opts=opts.AxisOpts(
                axistick_opts=opts.AxisTickOpts(
                    is_show=False, 
                ),
                axislabel_opts=opts.LabelOpts(
                    font_weight = "bold",
                    font_size=12,
                    color="#eb4c49",
                    rotate=30
                )
            ),
            yaxis_opts=opts.AxisOpts(
                axistick_opts=opts.AxisTickOpts(
                    is_show=False, 
                ),
                axislabel_opts=opts.LabelOpts(
                    font_weight = "bold",
                    font_size=12,
                    color="#eb4c49",
                    )
                )
            
            )
        )
    return bar
bar_us = createSentiment([45.92,37.61,43.23],[28.35,31.37,31.05],[25.73,31.01,25.72],'United States')
bar_us.render('sentiment_US.html')

bar_uk = createSentiment([53.83,37.61,46.11],[50.61,39.13,32.73],[20.21,10.26,21.16],'United Kingdom')
bar_uk.render('sentiment_UK.html')

bar_th = createSentiment([35.11,49.49,37.41],[27.19,28.08,34.15],[37.71,22.42, 28.44],'Thailand')
bar_th.render('sentiment_TH.html')

bar_sa = createSentiment([43.7,40.41,42.38],[30.99,37.19,32.7],[25.31,22.68, 24.92],'South Africa')
bar_sa.render('sentiment_SA.html')

bar_sb = createSentiment([40.11,44.85,39.52],[ 39.16,34.96,35.38],[20.73,20.19, 25.11],'Serbia')
bar_sb.render('sentiment_SB.html')

bar_ng = createSentiment([49.41,44.75,43.09],[24.86,29.71,28.46],[25.73,25.54,28.46],'Nigeria')
bar_ng.render('sentiment_NG.html')

bar_ia = createSentiment([47.59,37.42,42.42],[29.11,37.00,34.03],[23.30, 25.58,23.54],'India')
bar_ia.render('sentiment_Inda.html')

bar_co = createSentiment([35.05,37.15,38.92],[35.17,34.39, 34.58],[29.78,28.46,26.50],'Colombia')
bar_co.render('sentiment_Co.html')

bar_ca = createSentiment([35.05,37.15,38.92],[35.17,34.39, 34.58],[29.78,28.46,26.50],'Canada')
bar_ca.render('sentiment_Ca.html')

bar_co = createSentiment([50.21,37.47,42.55],[25.41,32.47,31.03],[24.38,30.06,26.42],'Colombia')
bar_co.render('sentiment_Co.html')

bar_au = createSentiment([42.4,37.35,39.36],[29.55,33.96,35.28],[28.04,28.69,25.37],'Australia')
bar_au.render('sentiment_Au.html')

bar = createSentiment([45.93 ,34.53,40.78] ,[26.78,36.04,31.20] ,[27.29,29.43,28.02],'Overall')
bar.render('sentiment.html')

In [None]:
# create heat map for each stage by countries
from pyecharts.charts import Map
import json
import statistics

def createWordlMap(country_name,country_data):
    country_list = [list(z) for z in zip(country_name,country_data)]
    map = (Map(init_opts=opts.InitOpts(width = "1200px",height="800px",theme=ThemeType.ESSOS))
        .add(
            "COVID_Tweet_Map",
            country_list,
            maptype="world",
            is_map_symbol_show=False,
            itemstyle_opts=opts.ItemStyleOpts(
                border_color="#fdfcf4",

            )
        )
        .set_series_opts(
            label_opts=opts.LabelOpts(
                is_show=False
            ),
        )
        .set_global_opts(
        title_opts=opts.TitleOpts(
            title="The Overall Sentiment Average Score",
            pos_left='center',
            padding=20,
            item_gap = 10,
            title_textstyle_opts= opts.TextStyleOpts(
                font_weight='bold',
                font_size=25),
            subtitle_textstyle_opts= opts.TextStyleOpts(
                font_weight='bold',
                font_size=20),
        ),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(
            range_text=["High", "Low"],
            is_piecewise=True, 
            pieces =[
                {"min": -0.09,"max":-0.05,"label":"-0.09~-0.05",'color':"#3d707a"},
                {"min": -0.05, "max": -0.01,"label":"-0.05~-0.01",'color':"#4c8c99"},
                {"min": -0.01, "max": 0.03,"label":"-0.01~0.03",'color':"#69b8c7"},
                {"min": 0.03, "max": 0.07,"label":"0.03~0.07",'color':"#f6dc7a"},
                {"min": 0.07, "max": 0.11,"label":"0.07~0.11",'color':"#ecc641"},
                {"min": 0.11, "max": 0.15,"label":"0.11~0.15",'color':"#e38931"},
                {"min": 0.15, "max": 0.19,"label":"0.15~0.19",'color':"#e36331"},
                {"min": 0.19, "max": 0.23,"label":"0.19~0.23",'color':"#e38931"},
                   
            ],
            pos_top= "bottom",  
            pos_left="left",
            orient="vertical",
            textstyle_opts = opts.TextStyleOpts(
                font_weight="bold",
                font_size=15
            )
        ))
        )
    return map
with open('/Users/xihuang/Desktop/SocialNetwork-TeamProject/sentiment_analysis/result.json','r') as f:
    country_data = json.load(f)

country_name =list(country_data.keys())
early_avg = list(map(lambda x:x['avg_list'][0],list(country_data.values())))
middle_avg = list(map(lambda x:x['avg_list'][1],list(country_data.values())))
late_avg = list(map(lambda x:x['avg_list'][2],list(country_data.values())))
print(early_avg)
print(middle_avg)
print(late_avg)
print(country_name)
print(statistics.mean(early_avg))
print(statistics.mean(middle_avg))
print(statistics.mean(late_avg))

print(statistics.stdev(early_avg))
print(statistics.stdev(middle_avg))
print(statistics.stdev(late_avg))

#create world covid tweet volume map
Early_map = createWordlMap(country_name,early_avg)
Early_map.render('early_senti_Map.html')
#create world covid tweet volume map
Middle_map = createWordlMap(country_name,middle_avg)
Middle_map.render('middle_senti_Map.html')
#create world covid tweet volume map
Late_map = createWordlMap(country_name,late_avg)
Late_map.render('late_senti_Map.html')

#create world covid tweet volume map
#sentiment_map = createWordlMap(country_name,country_tweets)
#sentiment_map.render('Map.html')

Statistics

In [None]:
# calculate pos,neg,neu sentiment score for different stage
# path name need to be changed for different stages
import os
path = "data/top10_country/early" #文件夹目录
files= os.listdir(path) #得到文件夹下的所有文件名称
files = filter(lambda file:'.csv' in file,files)
pos=0
neg =0
neu=0
for file in files: 
    db = pd.read_csv(path+'/'+file,index_col=False, error_bad_lines=False, engine='python',encoding="utf-8")
    db = db.drop(columns=['user_id', 'user_created_at', 'user_name', 'user_source',
                                  'user_verified', 'user_favourites_count', 'user_followers_count',
                                  'user_friends_count','tweet_is_retweet'])
    db['tweet'] = list(map(lambda tweet:clean_text(tweet), list(db['tweet'])))  
    sentiment_scores_vader(db)
    list_negative = db[db["sentiment_vader"]=="negative"]
    list_positive = db[db["sentiment_vader"]=="positive"]
    list_neutral = db[db["sentiment_vader"]=="neutral"]
    pos+=len(list_positive)
    neg+=len(list_negative)
    neu+=len(list_neutral)

In [None]:
# compute average sentment score for countries
# path name need to be changed for different countries
import os
# folder
path = "sentiment_analysis/sentiment_data" 
folders= os.listdir(path) #get folders 
folders =list( filter(lambda folder:folder != '.DS_Store',folders))
for folder in folders:
    files= os.listdir(path+folder) #get files
    files = list(filter(lambda file:'.csv' in file,files))
    
    avg = []
    for file in files: 
        db = pd.read_csv(path+'/'+file,index_col=False, error_bad_lines=False, engine='python',encoding="utf-8")
        db = db.drop(columns=['user_id', 'user_created_at', 'user_name', 'user_source',
                                    'user_verified', 'user_favourites_count', 'user_followers_count',
                                    'user_friends_count','tweet_is_retweet'])
        db['tweet'] = list(map(lambda tweet:clean_text(tweet), list(db['tweet'])))  
        sentiment_scores_vader(db)
        compoud_avg = db['comp'].mean()
        avg.append(compoud_avg)
    print(avg)
