In [1]:
import pandas as pd
import numpy as np
from itertools import combinations 
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

### Functions

In [2]:
############### sentiment analysis
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    neg = score['neg']
    pos = score['pos']
    neu = score['neu']
    compound= score['compound']
    
    return [neg,pos,neu,compound, sentence]

############### splitting hashtag groupings
def splitTags(x,y):
    return [(x,z) for z in y]

### Sentiment Analysis

In [3]:
# read in and replace nulls
tweets = pd.read_csv('Covid_Twitter_City_Data.csv', delimiter=',')
tweets = tweets.replace(np.nan, '', regex=True)

# instantiate sentiment analyzer, define function for sentiment output
analyser = SentimentIntensityAnalyzer()

In [6]:
# sentiments to dataframe
text = tweets['TEXT'].tolist()
sentiments = [sentiment_analyzer_scores(s) for s in text]
sentiments_df = pd.DataFrame(sentiments, columns = ['NEGATIVE_SCORE', 'POSITIVE_SCORE', 'NEUTRAL_SCORE', 'COMPOUND', 'SENTENCE'])

# final dataframe with sentiments
finalFrame = tweets.join(sentiments_df)
finalFrame = finalFrame.iloc[:,1:-1]

### Creating a Network
* **Nodes**: Cities, **TO DO: color: sentiment, size: portion of population affected by covid**
* **Edges**: Shared Hashtags, **TO DO: edge weights: number of shared hashtags**

In [8]:
# hashtags & coordinates for each record
hashtags = tweets['HASHTAGS'].tolist()
coords = tweets['COORDS'].tolist()
sepHash = [i.split() for i in hashtags]
sepHash[1:5]

[[], [], [], ['#CX', '#COVID', '#custserv']]

In [12]:
# set of all coordinates with individual hashtag
coordTag = [splitTags(i,j) for i,j in list(zip(coords,sepHash)) if len(j)> 0]
flattened = [val for sublist in coordTag for val in sublist]
finalHash = set(flattened)

In [18]:
# create a dictionary of each hashtag with the city coordinates
tempDict = {}
for i,j in finalHash:
    if j not in tempDict:
        tempDict[j]= [i]
    else:
        tempDict[j].append(i)
        
# remove covid hashtags from dictionary
tagsToRemove = ['#covid_19', '#COVID19', '#COVID2019', '#COVID_19', '#COVID__19', '#COVID', '#COVD19', '#Covid_19']

for k in tagsToRemove:
    tempDict.pop(k, None)

print('Example output from the hashtag #FollowTheScience:')
tempDict['#FollowTheScience']

Example output from the hashtag #FollowTheScience:


['39.7392358,-104.990251',
 '34.7464809,-92.2895948',
 '34.0522342,-118.2436849',
 '39.9611755,-82.9987942',
 '47.6062095,-122.3320708',
 '41.8781136,-87.6297982',
 '38.9071923,-77.0368707',
 '39.9525839,-75.1652215']

In [19]:
#### Final Edges
# combining all elements in the dictionary values into separate node connections
coordPairs = list(tempDict.values())
productList = []

for i in coordPairs:
    if len(i) >1:
        productList.append(list(combinations(i,2)))
    
finalPairs = [val for sublist in productList for val in sublist]

print('Example Edge:', finalPairs[1])

### Edge Weights
https://www.geeksforgeeks.org/python-program-to-count-duplicates-in-a-list-of-tuples/