In [1]:
!pip install tweepy --quiet

# Import Libraries

In [2]:
import tweepy as tw
from datetime import datetime
import pandas as pd
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
import string
import warnings
import re
import altair as alt
warnings.filterwarnings("ignore", category=UserWarning)



# API

In the development of our Twitter Sentiment Analysis project, an essential initial step involves retrieving tweets from the Twitter API, which requires a methodical authentication process to securely access Twitter's resources. This process is pivotal for ensuring the confidentiality and integrity of both the user's credentials and the data accessed. Below, we detail the steps undertaken to authenticate and fetch tweets using the Twitter API, leveraging the Tweepy library—a widely acclaimed Python library that simplifies the interaction with Twitter's API.

In [3]:
# Replace these with your API keys
bearer_token = 
api_key = 
api_secret_key = 
access_token = 
access_token_secret = 

# Authenticate
auth = tw.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_token_secret)

api = tw.API(auth)


In [4]:
# Authenticate to Twitter
auth = tw.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_token_secret)

# Create API object
api = tw.API(auth, wait_on_rate_limit=True)

# Verify credentials
try:
    api.verify_credentials()
    print("Authentication OK")
except Exception as e:
    print("Error during authentication")
    print(e)


Authentication OK


In [5]:
consumer_key = 
consumer_secret = 
client_id = 
client_secret = 
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth)

# Extract Tweets

This section of code demonstrates the process of fetching tweets from a specific user's timeline, in this case, tweets by the user with the username elonmusk, using Twitter's API v2 with the Tweepy library. It is a straightforward yet powerful example of how to interact with the Twitter API to retrieve and display user-specific data. 

In [6]:
client = tw.Client(bearer_token=bearer_token)

# Fetch user information by username to get the user ID
user_info = client.get_user(username="elonmusk")
user_id = user_info.data.id

# Fetch up to 100 tweets from the user's timeline
tweets = client.get_users_tweets(id=user_id, max_results=100, tweet_fields=["created_at", "text"])

for tweet in tweets.data:
    print(f"{tweet.created_at} - {tweet.text}\n")

2024-04-11 22:55:43+00:00 - Community Notes will now show faster https://t.co/QuFdcln6wf

2024-04-11 19:07:08+00:00 - Glad Tesla FSD was there to help and that you’re feeling well! https://t.co/8krLuAMEaj

2024-04-11 18:10:21+00:00 - @jack @ChrisJBakke 🔥🤣

2024-04-11 18:09:29+00:00 - @thackerpd This makes a mockery of FOIA

2024-04-11 18:05:48+00:00 - @jdotarnold Thanks for doing this. While tried hard to do the right thing.

2024-04-11 17:59:07+00:00 - @CollinRugg Wow

2024-04-11 15:38:15+00:00 - @jk_rowling 🔥😂

2024-04-11 15:35:36+00:00 - Congrats SpaceX Team &amp; @SpaceForceDoD on completing 3 orbital Vandenberg launches in 11 days!

Might be a record. https://t.co/72OQELpXyj

2024-04-11 15:28:52+00:00 - Tyranny https://t.co/r3Se5J542z

2024-04-11 15:25:17+00:00 - @TexasLindsay_ !!

2024-04-11 15:17:21+00:00 - @JudiciaryGOP 💯

2024-04-11 15:16:43+00:00 - @VivekGRamaswamy Yup, they proudly so say on their website!

2024-04-11 15:03:16+00:00 - @MarioNawfal !

2024-04-11 15:01:16+00:0

In [7]:
elon_tweets = []
for tweet in tweets.data:
    elon_tweet = (f"{tweet.created_at} - {tweet.text}\n")
    elon_tweets.append(elon_tweet)

In [8]:
# Process each tweet to extract datetime and text, then convert to a DataFrame
parsed_tweets = []
for tweet_str in elon_tweets:
    datetime_str, text = tweet_str.strip().split(" - ", 1)
    datetime_obj = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S%z')
    parsed_tweets.append({"datetime": datetime_obj, "text": text})

df_tweets = pd.DataFrame(parsed_tweets)

# Sentiment Analysis

This section enriches a DataFrame df_tweets, containing tweet data, with new insights extracted through custom functions. The extract_mentions function identifies and compiles user mentions from each tweet into a new 'mentions' column, using regular expressions to capture any text following the '@' symbol. Simultaneously, the analyze_sentiment function applies the TextBlob library to evaluate and append each tweet's sentiment to the DataFrame, resulting in two new columns: 'polarity', indicating the emotional tone ranging from negative to positive, and 'subjectivity', quantifying the presence of personal opinion versus factual information. These enhancements facilitate a deeper analysis of the social interactions and emotional undertones present in the tweet corpus.

In [9]:
# Function to extract mentions from text
def extract_mentions(text):
    mentions = re.findall(r'@\w+', text)
    return ', '.join(mentions) if mentions else ''

# Apply the function to each row in the DataFrame to create the 'mentions' column
df_tweets['mentions'] = df_tweets['text'].apply(extract_mentions)

df_tweets.head() 

Unnamed: 0,datetime,text,mentions
0,2024-04-11 22:55:43+00:00,Community Notes will now show faster https://t...,
1,2024-04-11 19:07:08+00:00,Glad Tesla FSD was there to help and that you’...,
2,2024-04-11 18:10:21+00:00,@jack @ChrisJBakke 🔥🤣,"@jack, @ChrisJBakke"
3,2024-04-11 18:09:29+00:00,@thackerpd This makes a mockery of FOIA,@thackerpd
4,2024-04-11 18:05:48+00:00,@jdotarnold Thanks for doing this. While tried...,@jdotarnold


In [10]:
# Function to analyze sentiment of each tweet
def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment

# Apply the function to each row in the DataFrame to create new columns for polarity and subjectivity
df_tweets['polarity'], df_tweets['subjectivity'] = zip(*df_tweets['text'].apply(lambda txt: analyze_sentiment(txt)))

df_tweets.head()  

Unnamed: 0,datetime,text,mentions,polarity,subjectivity
0,2024-04-11 22:55:43+00:00,Community Notes will now show faster https://t...,,0.0,0.0
1,2024-04-11 19:07:08+00:00,Glad Tesla FSD was there to help and that you’...,,0.625,1.0
2,2024-04-11 18:10:21+00:00,@jack @ChrisJBakke 🔥🤣,"@jack, @ChrisJBakke",0.0,0.0
3,2024-04-11 18:09:29+00:00,@thackerpd This makes a mockery of FOIA,@thackerpd,0.0,0.0
4,2024-04-11 18:05:48+00:00,@jdotarnold Thanks for doing this. While tried...,@jdotarnold,0.064683,0.425794


**Polarity**<br>
- Polarity is a measure of the sentiment expressed in the text, ranging from -1 to 1. <br>
- A positive value (greater than 0) indicates a positive sentiment, suggesting the text expresses positive emotions, opinions, or attitudes.<br>
- A negative value (less than 0) indicates a negative sentiment, suggesting the text expresses negative emotions, opinions, or attitudes.<br>
- A value of 0 indicates a neutral sentiment, implying the text is neither explicitly positive nor negative in emotion or opinion.<br>

**Subjectivity** <br>
- Subjectivity quantifies the amount of personal opinion and subjective judgment in the text, with a range from 0 to 1.<br>
- A higher value (closer to 1) signifies that the text contains more personal opinions, emotions, or subjective assessments.<br>
- A lower value (closer to 0) signifies that the text is more objective, factual, or devoid of personal emotion or opinion.<br>
- Subjectivity is crucial for understanding how much of the sentiment expressed is influenced by personal views versus being a reflection of objective facts or statements.<br>

# Emotion Category

In [11]:
def categorize_emotion(row):
    if row['polarity'] > 0:
        return 'Positive'
    elif row['polarity'] < 0:
        return 'Negative'
    else:
        return 'Neutral'

# Apply the function to each row in the DataFrame to create the 'emotion' column
df_tweets['emotion'] = df_tweets.apply(categorize_emotion, axis=1)

df_tweets.head()  # Display the first few rows to confirm the emotion column


Unnamed: 0,datetime,text,mentions,polarity,subjectivity,emotion
0,2024-04-11 22:55:43+00:00,Community Notes will now show faster https://t...,,0.0,0.0,Neutral
1,2024-04-11 19:07:08+00:00,Glad Tesla FSD was there to help and that you’...,,0.625,1.0,Positive
2,2024-04-11 18:10:21+00:00,@jack @ChrisJBakke 🔥🤣,"@jack, @ChrisJBakke",0.0,0.0,Neutral
3,2024-04-11 18:09:29+00:00,@thackerpd This makes a mockery of FOIA,@thackerpd,0.0,0.0,Neutral
4,2024-04-11 18:05:48+00:00,@jdotarnold Thanks for doing this. While tried...,@jdotarnold,0.064683,0.425794,Positive


In [12]:
# Calculate the counts of each emotion category in the DataFrame
emotion_counts = df_tweets['emotion'].value_counts()

# Determine the emotion with the highest count
dominant_emotion = emotion_counts.idxmax()

# Print a statement concluding the overall emotion of the tweets
conclusion_statement = f"The overall emotion of the tweets is predominantly '{dominant_emotion}' with a count of {emotion_counts[dominant_emotion]}."

In [13]:
# Calculate average polarity and subjectivity
average_polarity = df_tweets['polarity'].mean()
average_subjectivity = df_tweets['subjectivity'].mean()

# Update the conclusion statement to include these averages
conclusion_statement_updated = f"The overall emotion of the tweets is predominantly '{dominant_emotion}' with a count of {emotion_counts[dominant_emotion]}. The average polarity of the tweets is {average_polarity:.2f}, indicating a generally {'positive' if average_polarity > 0 else 'negative' if average_polarity < 0 else 'neutral'} sentiment. The average subjectivity is {average_subjectivity:.2f}, suggesting that tweets are {'mostly subjective' if average_subjectivity > 0.5 else 'mostly objective' if average_subjectivity < 0.5 else 'equally subjective and objective'} on average."
conclusion_statement_updated

"The overall emotion of the tweets is predominantly 'Neutral' with a count of 56. The average polarity of the tweets is 0.09, indicating a generally positive sentiment. The average subjectivity is 0.29, suggesting that tweets are mostly objective on average."

# Visualizations

In our Twitter sentiment analysis, we leveraged both a bar chart for emotion distribution and a conceptual pie chart to dissect sentiment polarity and subjectivity, providing a holistic view of user engagement. The bar chart vividly illustrated the prevalence of various emotions, notably showing a significant lean towards Neutral sentiments, which suggests a substantial portion of tweets convey information or comments without strong emotional undertones. Conversely, the pie chart aimed to offer a broader perspective on sentiment polarity, distinguishing between Positive, Neutral, and (hypothetically) Highly Subjective tweets based on polarity and subjectivity scores. This contrast underscored the dominance of Neutral sentiments, implying a predominant trend of tweets that refrain from conveying marked positivity or negativity, possibly reflecting a preference for objective discourse on the platform. Together, these visualizations paint a comprehensive picture of the emotional and sentiment landscape within the analyzed Twitter dataset, offering valuable insights into public mood and engagement trends.

In [14]:
# Create a bar chart for the distribution of emotions in tweets
emotion_chart = alt.Chart(df_tweets).mark_bar().encode(
    x=alt.X('emotion', sort='-y', title='Emotion'),
    y=alt.Y('count():Q', title='Count'),
    color='emotion:N'
).properties(
    title='Distribution of Emotions in Tweets',
    width=600,
    height=400
)

# Display the chart
emotion_chart


In [15]:
# Calculate counts for Positive, Neutral, and Highly Subjective tweets
positive_count = df_tweets[df_tweets['polarity'] > 0].shape[0]
neutral_count = df_tweets[df_tweets['polarity'] == 0].shape[0]
# Highly subjective: subjectivity score > 0.5
highly_subjective_count = df_tweets[df_tweets['subjectivity'] > 0.5].shape[0]

positive_count, neutral_count, highly_subjective_count

(38, 56, 28)

In [16]:
data = {
    'Category': ['Positive', 'Neutral', 'Subjective'],
    'Count': [positive_count, neutral_count, highly_subjective_count] 
}
df_summary = pd.DataFrame(data)

# Create a pie chart
pie_chart = alt.Chart(df_summary).mark_arc().encode(
    theta=alt.Theta(field="Count", type="quantitative"),
    color=alt.Color(field="Category", type="nominal", legend=alt.Legend(title="Categories")),
    tooltip=[alt.Tooltip(field="Category", type="nominal", title="Category"),
             alt.Tooltip(field="Count", type="quantitative", title="Count")]
).properties(
    title="Polarity vs Neutral vs Subjectivity"
)

pie_chart
