In [None]:
# Import required libraries
import tweepy
from textblob import TextBlob
from wordcloud import WordCloud
import pandas as pd
import re
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 100)
pd.set_option("display.max_columns", None)
plt.style.use('fivethirtyeight')


In [None]:
# Load the API keys from the login credential file
log_cred = pd.read_csv("login_credentials.csv")
consumer_key = log_cred.iloc[0, 1]
consumer_secret = log_cred.iloc[1, 1]
access_token = log_cred.iloc[2, 1]
access_token_secret = log_cred.iloc[3, 1]


In [None]:
# Create the authentication object and the API object
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

In [None]:
def get_tweets_by_search_term(search_term, num_tweets):
    data = []
    counter = 0
    query_term = f"{search_term} -filter:retweets"
    for tweet in tweepy.Cursor(
        api.search_tweets,
        q=query_term,
        count=num_tweets,
        lang="en",
        tweet_mode="extended",
    ).items():
        tweet_details = {}
        tweet_details["UserId"] = tweet.user.name
        tweet_details["TweetId"] = tweet.id
        tweet_details["tweet"] = tweet.full_text
        tweet_details["location"] = tweet.user.location
        tweet_details["created"] = tweet.created_at.strftime("%d-%b-%Y")
        data.append(tweet_details)
        counter += 1
        if counter == num_tweets:
            break
        else:
            pass
    data_df = pd.DataFrame(data)
    return data_df

In [None]:
search_term =  input("Enter the search term or hashtag here: ") # Use AND or OR to search for multiple terms
no_of_tweets = int(input("Enter the number of tweets to be searched: "))
data_df = get_tweets_by_search_term(search_term, no_of_tweets)
data_df.head()

In [None]:
# Primary exploratory analysis
print('Dataset shape:', data_df.shape)
print('Dataset columns:', data_df.columns)
data_df.info()

In [None]:
# Extract the tweets and store them in a dataframe
# Drop all columns except for Tweet and create a list of all words
tweet_df = data_df.drop(['UserId', 'TweetId', 'location', 'created'], axis=1)
tweet_df.head()

In [None]:
# Clean Data - Remove special characters, links, and other elements
# Create a function to clean the data
def clean_tweet(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text) # Remove @mentions
    text = re.sub(r'#', '', text) # Remove the # in #hashtag
    text = re.sub(r'RT[\s]+', '', text) # Remove RT (retweet)
    text = re.sub(r'https?:\/\/\S+', '', text) # Remove hyperlinks

    return text

In [None]:
# Apply the function to the dataframe
tweet_df['tweet'] = tweet_df['tweet'].apply(clean_tweet)
# Show cleaned tweets
tweet_df.head(10)

In [None]:
# Create subjectivity and polarity then save to dataframe in new columns
def get_subjectivity(text):
    return TextBlob(text).sentiment.subjectivity

def get_polarity(text):
    return TextBlob(text).sentiment.polarity

tweet_df['Subjectivity'] = tweet_df['tweet'].apply(get_subjectivity)
tweet_df['Polarity'] = tweet_df['tweet'].apply(get_polarity)
tweet_df.head(10)

In [None]:
# Wordcloud of the top 100 most used words
all_words = ' '.join([text for text in tweet_df['tweet']])

fig, ax = plt.subplots(figsize=(12, 16))
word_cloud = WordCloud(max_font_size=50, max_words=50, random_state=21).generate(all_words)

ax.imshow(word_cloud, interpolation='bilinear')
ax.set_title(f'WordCloud of the 20 Most Used Words in Recent Tweets - {search_term}', fontsize=12)
ax.axis("off")

In [None]:
# Compute negative, neutral, positive analysis of the tweets
def get_analysis(score):
    if score < 0:
        return 'Negative'
    elif score== 0:
        return 'Neutral'
    else:
        return 'Positive'

tweet_df['Analysis'] = tweet_df['Polarity'].apply(get_analysis)

In [None]:
# Plotting the polarity of the tweets
ax1 = tweet_df['Analysis'].value_counts().plot(kind='bar', color=['green', 'blue', 'red'], figsize=(10, 8))
ax1.set_title(f'Analysis of Tweets by Search Term: {search_term}', fontsize=20)

In [None]:
def get_subjectivity_analysis(score):
    if score < 0.5:
        return 'Objective'
    else:
        return 'Subjective'

tweet_df['Subjectivity_Analysis'] = tweet_df['Subjectivity'].apply(get_subjectivity_analysis)
tweet_df.head(10)

In [None]:
# Plotting the polarity of the tweets
ax2 = tweet_df['Subjectivity_Analysis'].value_counts().plot(kind='pie', autopct='%1.1f%%', figsize=(10, 8))
ax2.set_title(f'Subjectivity Analysis of Tweets - {search_term}', fontsize=10)

In [None]:
# Pie chart of the polarity of the tweets
ax3 = tweet_df['Analysis'].value_counts().plot(kind='pie', autopct='%1.1f%%', figsize=(10, 8))
ax3.set_title(f'% of Positive, Negative and Neutral Tweets - {search_term}', fontsize=10)


In [None]:
# Plot polarity and subjectivity of the tweets
ax4 = tweet_df.plot(kind='scatter', x='Polarity', y='Subjectivity', color='blue', figsize=(16,8))
ax4.title.set_text(f'Sentiment Analysis of 1000 Tweets Mentioning - {search_term}')