In [1]:
pip install datetime

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install snscrape

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [4]:
from datetime import date
import snscrape.modules.twitter as sntwitter
import pandas as pd
import numpy as np
from textblob import TextBlob
from wordcloud import WordCloud
import re
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
import nltk
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords
import spacy
import en_core_web_sm
nlp = spacy.load("en_core_web_sm")

In [5]:
#Create a list to append tweet data
tweets_list = []
maxTweets = 1000

#Using TwitterSearchScraper to scrape data and append tweets to list
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('paris climate agreement since:2017-06-01 until:2017-07-01').get_items()):
    if i > maxTweets:
        break
    tweets_list.append([tweet.content])

#Creating a dataframe from the tweets the list above
tweets_to_df = pd.DataFrame(tweets_list, columns=['Tweets'])    

In [6]:
tweets_to_df.head(5) #lists first five tweets

Unnamed: 0,Tweets
0,Union County Freeholders Opt in to Paris Clima...
1,Encourage Governor Ducey to Support the Paris ...
2,Encourage Governor Ducey to Support the Paris ...
3,Did you pull out of the Paris Climate agreeme...
4,What the Paris Climate Agreement Was Really Ab...


In [7]:
#clean the tweets with a re (regular experssion) function

def cleanTweets(text):
    text = re.sub('@[A-Za-z0-9_]+', '', text) #removes @mentions
    text = re.sub('#', '', text) #removes hastag symbol
    text = re.sub('RT[\s]+', '', text)
    text = re.sub('https?:\/\/\S+', '', text)
    text = re.sub('\n', ' ', text)
    return text

tweets_to_df['cleanedTweets'] = tweets_to_df['Tweets'].apply(cleanTweets) #apply cleantweet function to the tweet

tweets_to_df.head()

Unnamed: 0,Tweets,cleanedTweets
0,Union County Freeholders Opt in to Paris Clima...,Union County Freeholders Opt in to Paris Clima...
1,Encourage Governor Ducey to Support the Paris ...,Encourage Governor Ducey to Support the Paris ...
2,Encourage Governor Ducey to Support the Paris ...,Encourage Governor Ducey to Support the Paris ...
3,Did you pull out of the Paris Climate agreeme...,Did you pull out of the Paris Climate agreeme...
4,What the Paris Climate Agreement Was Really Ab...,What the Paris Climate Agreement Was Really Ab...


In [8]:
tweets_to_df.to_csv('tweets_climate change.csv') #write dataframe into csv file

savedTweets = pd.read_csv('tweets_climate change.csv', index_col=0) #reads csv file

In [9]:
savedTweets.shape

(1005, 2)

In [10]:
savedTweets

Unnamed: 0,Tweets,cleanedTweets
0,Union County Freeholders Opt in to Paris Clima...,Union County Freeholders Opt in to Paris Clima...
1,Encourage Governor Ducey to Support the Paris ...,Encourage Governor Ducey to Support the Paris ...
2,Encourage Governor Ducey to Support the Paris ...,Encourage Governor Ducey to Support the Paris ...
3,Did you pull out of the Paris Climate agreeme...,Did you pull out of the Paris Climate agreeme...
4,What the Paris Climate Agreement Was Really Ab...,What the Paris Climate Agreement Was Really Ab...
...,...,...
996,@TalkAboutTopeka - Would you adopt the #parisc...,- Would you adopt the parisclimateagreement f...
997,Letter to the Editor » Checking the facts on P...,Letter to the Editor » Checking the facts on P...
998,Discussed withdrawal from the Paris Climate Ag...,Discussed withdrawal from the Paris Climate Ag...
999,Wisconsin: Support the Paris Climate Agreement...,Wisconsin: Support the Paris Climate Agreement


In [11]:
#get subjectivity and polarity of tweets with a function

def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

#get polarity with a function
def getPolarity(text):
    return TextBlob(text).sentiment.polarity

In [12]:
savedTweets['Subjectivity'] = savedTweets['cleanedTweets'].apply(getSubjectivity)
savedTweets['Polarity'] = savedTweets['cleanedTweets'].apply(getPolarity)

TypeError: The `text` argument passed to `__init__(text)` must be a string, not <class 'float'>

In [None]:
savedTweets.drop('Tweets', axis=1).head(10) #shows polarity and subjectivity of each tweet and drops tweets column

In [None]:
#Create a function to check negative, neutral, and positive analysis
def getAnalysis(score):
    if score<0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Positive'

savedTweets['Analysis'] = savedTweets['Polarity'].apply(getAnalysis)

In [None]:
savedTweets.drop('Tweets', axis = 1).head(10)

In [None]:
savedTweets[savedTweets['Analysis']=='Positive']

In [None]:
savedTweets[savedTweets['Analysis']=='Negative']

In [None]:
savedTweets[savedTweets['Analysis']=='Neutral']

In [None]:
savedTweets['Analysis'].value_counts() #shows the counts of tweets' polarity

In [None]:
#plot a bar graph to show count of tweet sentiment
fig = plt.figure(figsize=(7,5))
#xlabel = ['Positive, 'Negative', 'Neutral']
#plt.bar(xlabel, savedTweets['Analysis'].value_counts(), color = color)
color = ['green', 'grey', 'red']
savedTweets['Analysis'].value_counts().plot(kind='bar', color = color)
plt.title('Value count of tweet polarity')
plt.ylabel('Count')
plt.xlabel('Polarity')
plt.grid(False)
plt.show()

In [None]:
#pie chart to show percentage distribution of polarity
fig = plt.figure(figsize = (7,7))
colors = ('green', 'grey', 'red')
wp = {'linewidth':2, 'edgecolor': 'black'}
tags = savedTweets['Analysis'].value_counts()
explode = (0.1,0.1,0.1)
tags.plot(kind = 'pie', autopct = '%1.1f%%', shadow = True, colors = colors, wedgeprops = wp, explode = explode, label = '')
plt.title('Distribution of Polarity')

In [None]:
#plot the polarity and subjectivity on a scatter plot
plt.figure(figsize=(9,7))
for i in range(0,savedTweets.shape[0]):
    plt.scatter(savedTweets['Polarity'][i], savedTweets['Subjectivity'][i], color='blue')
plt.title('Sentiment Analysis on Climate Change')
plt.xlabel('Polarity')
plt.ylabel('Subjectivity')
plt.show()

In [None]:
#create a function for wordcloud
def create_wordcloud(text):
    allWords = ' '.join([tweets for tweets in text])
    wordCloud = WordCloud(background_color='white', width=800, height=500, random_state=21, max_font_size=130).generate(allWords)
    plt.figure(figsize=(10,10))
    plt.imshow(wordCloud)
    plt.axis('off')
    plt.show()

In [None]:
#plot wordcloud for all tweets
allTweets = savedTweets['cleanedTweets']
create_wordcloud(allTweets)

In [None]:
#create wordcloud for positive tweets
posTweets = savedTweets.loc[savedTweets['Analysis']=='Positive', 'cleanedTweets']
create_wordcloud(posTweets)

In [None]:
#create wordcloud for negative tweets
negTweets = savedTweets.loc[savedTweets['Analysis']=='Negative', 'cleanedTweets']
create_wordcloud(negTweets)

In [None]:
#create wordcloud for neutral tweets
neuTweets = savedTweets.loc[savedTweets['Analysis']=='Neutral', 'cleanedTweets']
create_wordcloud(neuTweets)

In [None]:
savedTweets.shape[0] #shows total rows of tweets

In [None]:
# get the percentage of positive tweets
pTweets = savedTweets[savedTweets['Analysis']=='Positive']
pTweets = pTweets['cleanedTweets']
percentage = round((pTweets.shape[0]/savedTweets.shape[0]) *100, 1)
print('Percentage of positive tweets: {0}%'.format(percentage))

In [None]:
# get the percentage of negative tweets
nTweets = savedTweets[savedTweets['Analysis']=='Negative']
nTweets = nTweets['cleanedTweets']
percentage = round((nTweets.shape[0]/savedTweets.shape[0]) *100, 1)
print('Percentage of negative tweets: {0}%'.format(percentage))

In [None]:
# get the percentage of neutral tweets
neutTweets = savedTweets[savedTweets['Analysis']=='Neutral']
neutTweets = neutTweets['cleanedTweets']
percentage = round((neutTweets.shape[0]/savedTweets.shape[0]) *100, 1)
print('Percentage of neutral tweets: {0}%'.format(percentage))

In [None]:
#break each tweet sentence into words
sentences = []
for word in savedTweets['cleanedTweets']:
    sentences.append(word)
sentences

lines = list()
for line in sentences:
    words = line.split()
    for w in words:
        lines.append(w)
lines[:10]

In [None]:
#stemming all the words to their root word
stemmer = SnowballStemmer(language='english')
stem=[]
for word in lines:
    stem.append(stemmer.stem(word))
stem[:20]    

In [None]:
#removes stopwords (very common words in a sentence)
stem2 = []
for word in stem:
    if word not in nlp.Defaults.stop_words:
        stem2.append(word)
#stem2[:20]

In [None]:
#creates a new dataframe for the stem
df = pd.DataFrame(stem2)
df = df[0].value_counts()
df

In [None]:
#plot the top 20 used words
import warnings
warnings.filterwarnings('ignore')
df = df[:20]
plt.figure(figsize=(10,5))
sns.barplot(df.values, df.index, alpha=0.8)
plt.title('Top Words Overall')
plt.xlabel('Counts of words', fontsize=12)
plt.ylabel('Words from Tweets', fontsize=12)
plt.show()