<a href="https://colab.research.google.com/github/arpitamangal/twitter-topic-sentiment-analysis/blob/main/TwitterTopicScraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install -q tweepy matplotlib wordcloud

import tweepy
import time
import requests
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from wordcloud import STOPWORDS


## Scraping

In [None]:
# Add Twitter API key and secret
consumer_key = 'YOUR_CONSUMER_KEY'
consumer_secret = 'YOUR_CONSUMER_SECRET'
access_token = 'YOUR_ACCESS_TOKEN'
access_token_secret = 'YOUR_ACCESS_TOKEN_SECRET'

In [None]:
# Handling authentication with Twitter

# Authenticate with Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# Create the API object
# api = tweepy.API(auth)


# Create a wrapper for the Twitter API
api = tweepy.API(auth, wait_on_rate_limit=True)

In [None]:
# Helper function for handling pagination in our search and handle rate limits
def limit_handled(cursor):
    while True:
        try:
            yield cursor.next()
        except tweepy.errors.TweepyException:
            print ("sleeping....")
            time.sleep(60 * 15)
            continue
        except StopIteration:
            break

# Define the term we will be using for searching tweets
query = "Snapdragon"
query = query + ' -filter:retweets'


In [None]:
# Define how many tweets to get from the Twitter API 
count = 18000

# Search for tweets using Tweepy 
search = limit_handled(tweepy.Cursor(api.search_tweets,
                        q=query,
                        tweet_mode='extended',
                        lang='en',
                        result_type="recent").items(count))


In [None]:
# Process the results from the search using Tweepy
tweets = []
for result in search:
    tweet_content = result.full_text
    print(tweet_content)
    # Only saving the tweet content. 
    # You could also save other attributes for each tweet like date or # of RTs.
    tweets.append(tweet_content)
    
    

sleeping....
sleeping....
sleeping....
sleeping....


In [None]:
pd.DataFrame(tweets).to_csv(r"SnapdragonRawData20220604.csv")

## Analysis

In [None]:
# Set up the API call to the Inference API to do sentiment analysis
model = "cardiffnlp/twitter-roberta-base-sentiment-latest"
hf_token = "YOUR_ACCESS_TOKEN"
API_URL = "https://api-inference.huggingface.co/models/" + model
headers = {"Authorization": "Bearer %s" % (hf_token)}

def analysis(data):
    payload = dict(inputs=data, options=dict(wait_for_model=True))
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

# Let's run the sentiment analysis on each tweet
tweets_analysis = []
for tweet in tweets:
    try:
        sentiment_result = analysis(tweet)[0]
        top_sentiment = max(sentiment_result, key=lambda x: x['score']) # Get the sentiment with the higher score     
        tweets_analysis.append({'tweet': tweet, 'sentiment': top_sentiment['label']})

    except Exception as e: 
        print(e)
        
        
        # Load the data in a dataframe
pd.set_option('max_colwidth', None)
pd.set_option('display.width', 3000) 
df = pd.DataFrame(tweets_analysis)

# Show a tweet for each sentiment 
print(df[df["sentiment"] == 'Positive'].head(1))
print(df[df["sentiment"] == 'Neutral'].head(1))
print(df[df["sentiment"] == 'Negative'].head(1))


# Let's count the number of tweets by sentiments
sentiment_counts = df.groupby(['sentiment']).size()
print(sentiment_counts)

# Let's visualize the sentiments
fig = plt.figure(figsize=(6,6), dpi=100)
ax = plt.subplot(111)
sentiment_counts.plot.pie(ax=ax, autopct='%1.1f%%', startangle=270, fontsize=12, label="")

# Wordcloud with positive tweets
positive_tweets = df['tweet'][df["sentiment"] == 'Positive']
stop_words = ["https", "co", "RT"] + list(STOPWORDS)
positive_wordcloud = WordCloud(max_font_size=50, max_words=50, background_color="white", stopwords = stop_words).generate(str(positive_tweets))
plt.figure()
plt.title("Positive Tweets - Wordcloud")
plt.imshow(positive_wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

# Wordcloud with negative tweets
negative_tweets = df['tweet'][df["sentiment"] == 'Negative']
stop_words = ["https", "co", "RT"] + list(STOPWORDS)
negative_wordcloud = WordCloud(max_font_size=50, max_words=50, background_color="white", stopwords = stop_words).generate(str(negative_tweets))
plt.figure()
plt.title("Negative Tweets - Wordcloud")
plt.imshow(negative_wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

pd.DataFrame(tweets).to_csv(r"SnapdragonRawData20220604.csv")
