In [None]:
# Import 
import os
import tweepy
from dotenv import load_dotenv
from textblob import TextBlob
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
from pylab import rcParams
rcParams['figure.figsize'] = 12, 8

In [None]:
# Load environment variables - Tweeter API keys
load_dotenv()
consumer_key = os.getenv('CONSUMER_KEY')
consumer_secret = os.getenv('CONSUMER_SECRET')
access_token = os.getenv('ACCESS_TOKEN')
access_token_secret = os.getenv('ACCESS_TOKEN_SECRET')


auth =  tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)


username = 'LibranTechie'
count = 100

tweets = tweepy.Cursor(api.user_timeline, 
                        screen_name=username, 
                        count=None,
                        since_id=None,
                        max_id=None,
                        trim_user=True,
                        exclude_replies=True,
                        contributor_details=False,
                        include_entities=False
                        ).items(count);
df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweet'])
df.head()

In [None]:
# Remove mentions, hashtags, links, retweets
def clean_tweets(txt):
    # Remove mentions
    txt = re.sub(r'@[A-Za-z0-9_]+', '', txt)
    # Remove hashtags
    txt = re.sub(r'#', '', txt)
    # Remove retweets:
    txt = re.sub(r'RT : ', '', txt)
    # Remove urls
    txt = re.sub(r'https?:\/\/[A-Za-z0-9\.\/]+', '', txt)
    return txt

df['Tweet'] = df['Tweet'].apply(clean_tweets)

In [None]:
# Assign polarity and subjectivity to each tweet
def get_subjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity

def get_polarity(txt):
    return TextBlob(txt).sentiment.polarity

df['Subjectivity'] = df['Tweet'].apply(get_subjectivity)
df['Polarity'] = df['Tweet'].apply(get_polarity)

df.head(100)
    

In [None]:
# Replace Empty values with NaN the drop them
df['Tweet'].replace('', np.nan, inplace=True)
df.dropna(subset=['Tweet'], inplace=True)
print(df)

In [None]:
# Classify tweets as positive, negative or neutral
def classify_tweets(a):
    if a > 0:
        return 'Positive'
    elif a == 0:
        return 'Neutral'
    else:
        return 'Negative'

df['Score'] = df['Polarity'].apply(classify_tweets)
df.head(100)

In [None]:
# Calculate % of positive tweets
positive = df[df['Score'] == 'Positive']

print(str(positive.shape[0]/(df.shape[0])*100) + " % of positive tweets")

In [None]:
# Visualise the polarity of the tweets
labels = df.groupby('Score').count().index.values
values = df.groupby('Score').size().values
plt.title('Sentiment Analysis: % of Negative, Neutral and Positive Tweets')
plt.bar(labels, values)

In [None]:
for index, row in df.iterrows():
    if row['Score'] == 'Positive':
        plt.scatter(row['Polarity'], row['Subjectivity'], color="green")
    elif row['Score'] == 'Negative':
        plt.scatter(row['Polarity'], row['Subjectivity'], color="red")
    elif row['Score'] == 'Neutral':
        plt.scatter(row['Polarity'], row['Subjectivity'], color="blue")

plt.title('Twitter Sentiment Analysis')
plt.xlabel('Polarity')
plt.ylabel('Subjectivity')
# add legend
plt.show()

In [None]:
# Percentage og objective tweets
objective = df[df['Subjectivity'] == 0]

print(str(objective.shape[0]/(df.shape[0])*100) + " % of objective tweets")

In [None]:
# Creating a word cloud
words = ' '.join([tweet for tweet in df['Tweet']])
wordCloud = WordCloud(width=600, height=400).generate(words)
plt.title('Sentiment Analysis : Word Cloud')
plt.imshow(wordCloud)
plt.show()