In [13]:
pip install tweepy

Note: you may need to restart the kernel to use updated packages.


In [14]:
import tweepy
import pandas as pd
import numpy as np

In [15]:
import json

# Set the path to your credentials JSON file:
credentials = "\\Users\\hasan\\Downloads\\credentials1_yt.json"
with open(credentials, "r") as keys:
    api_tokens = json.load(keys)

In [16]:
# Grab the API keys:
API_KEY = api_tokens["api_key"]
API_SECRET = api_tokens["api_secret"]
BEARER_TOKEN = api_tokens["bearer_token"]
ACCESS_TOKEN = api_tokens["access_token"]
ACCESS_SECRET = api_tokens["access_secret"]

In [17]:
# We use Tweepy's OAuthHandler method to authenticate our credentials:
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)

# Then, we set our access tokens by calling the auth object directly:
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

# Finally, we can initialize the Twitter API. 
# NOTE: we will be using this `api` object to interact
# with Twitter from here on out:
api = tweepy.API(auth)

In [18]:
tweets = tweepy.Cursor(api.search_tweets,
                       q="#covid19",
                       count=100).items(500)

In [19]:
# We can use logical search operators in our query text.
# Let's add a series of hashtags with OR, meaning a tweet can
# contain any of the search terms:
query = "#cobalt"

# We will also add a new parameter that limits us to English
# results only:
lang = "en"

# Ensure extended is set to true:
tweet_mode = "extended"

# Let's limit ourselves to 100 tweets per page:
count = 100 

# Let's grab only 1000 tweets:
tweet_limit = 10


In [20]:
def tweet_scraper(query='#cobalt', lang="en", tweet_mode="extended", count=100, tweet_limit=10):
    """
    This function takes Tweepy search_tweets parameters as arguments and returns a Pandas
    dataframe containing tweet data.

    :param query: a keyword search phrase (string)
    :param lang: limit results by language (default: English)
    :param tweet_mode: choose whether to extend tweets to full 280 characters.
    :param count: the number of tweets to return per page (default: 100; max: 100)
    :param tweet_limit: the maximum number of tweets to return (default: 1000).
    """

    # First, let's create a dictionary that will store our tweet data. We
    # are using a dictionary because we can easily generate a Pandas dataframe
    # from the dictionary keys.
    #
    # The dictionary will be formatted so that its keys are parameters associated with
    # each tweet and its values are lists to which we will append results for each tweet:

    data = {
        "user_id": [], 
        "screen_name": [],
        "name": [],
        "verified": [],
        "id": [],
        "created_at": [],
        "full_text": []

    }

    # Search the tweets as we've already done, but this time, plug in the paremeter values
    # from the function arguments:

    for tweet in tweepy.Cursor(api.search_tweets, q=query, tweet_mode=tweet_mode, count=count).items(tweet_limit):
        """
        We need to start with user level variables, meaning we are going to iterate
        through the user dictionary. We can do this easily! Then, we are going to
        append the data to the list in our data dictionary. Let's see how it's
        done:
        """

        # User ID:
        data["user_id"].append(tweet.user.id)
        # Screen name:
        data["screen_name"].append(tweet.user.screen_name)
        # Name:
        data["name"].append(tweet.user.name)
        # verified status:
        data["verified"].append(tweet.user.verified)

#         """
#         Great! Now let's grab the tweet level data:
#         """

#         # Tweet ID:
#         data["id"].append(tweet.id)
#         # Date:
#         data["created_at"].append(tweet.created_at)
#         # Full text of tweet:
#         data["full_text"].append(tweet.full_text)
#         # Get retweet count:
#         data["retweet_count"].append(tweet.retweet_count)
#         # Get favorite count:
#         data["favorite_count"].append(tweet.favorite_count)
        
#         # NOTE: to get hashtags & user mentions, we need to iterate through
#         # the entities sub dictionary. Then, we need to iterate through
#         # the hashtag sub dictionary. It sounds bad, but it's not! 
#         # We will save the hashtags to a list and append the list
#         # to our data dictionary:

#         hashtags = []
#         # Try to get hashtags; if there is an error, then there are no hashtags
#         # and we can pass:
#         try:
#             for hashtag in tweet.entities["hashtags"]:
#                 hashtags.append(hashtag["text"])
#         except Exception:
#             pass
        
#         # Now append the hashtag list to our dataset! If there are no
#         # hashtags, just set it equal to NaN:
#         if len(hashtags) == 0:
#             data["hashtags"].append(np.nan)
#         else:
#             data["hashtags"].append(hashtags)

#         # We do the same thing for user mentions:
#         mentions = []
#         try:
#             for mention in tweet.entities["user_mentions"]:
#                 mentions.append(mention["screen_name"])
#         except Exception:
#             pass
        

#         if len(mentions) == 0:
#             data["user_mentions"].append(np.nan)
#         else:
#             data["user_mentions"].append(mentions)

#         # In reply to user id:
#         data["in_reply_to_user_id"].append(tweet.in_reply_to_user_id)
#         # In reply to user screen name:
#         data["in_reply_to_screen_name"].append(tweet.in_reply_to_screen_name)
#         # Check if quote status:
#         data["is_quote_status"].append(tweet.is_quote_status)

#         # We need to check if a tweet is a retweet ourselves. We can do this by checking
#         # if the retweeted_status key is present in the JSON:
#         if "retweeted_status" in tweet._json.keys():
#             # Then it is a retweet:
#             data["is_retweet"].append(True)
#             # Get OG tweet id:
#             data["retweet_og_id"].append(tweet.retweeted_status.id)
#             # Get OG author ID:
#             data["retweet_og_author_id"].append(tweet.retweeted_status.user.id)
#             # Get OG author screen name:
#             data["retweet_og_author_screen_name"].append(tweet.retweeted_status.user.screen_name)
#             # Get OG author name:
#             data["retweet_og_author_name"].append(tweet.retweeted_status.user.name)
#             # Get date of OG tweet:
#             data["retweet_og_date"].append(tweet.retweeted_status.created_at)
#             # Get OG full text:
#             data["retweet_og_full_text"].append(tweet.retweeted_status.full_text)
#             # Get OG retweet count:
#             data["retweet_og_retweet_count"].append(tweet.retweeted_status.retweet_count)
#             # Get OG favorite count:
#             data["retweet_og_favorite_count"].append(tweet.retweeted_status.favorite_count)
#         else:
#             # Set is_retweet to false and all other values to np.nan:
#             data["is_retweet"].append(False)
#             data["retweet_og_id"].append(np.nan)
#             data["retweet_og_author_id"].append(np.nan)
#             data["retweet_og_author_screen_name"].append(np.nan)
#             data["retweet_og_author_name"].append(np.nan)
#             data["retweet_og_date"].append(np.nan)
#             data["retweet_og_full_text"].append(np.nan)
#             data["retweet_og_retweet_count"].append(np.nan)
#             data["retweet_og_favorite_count"].append(np.nan)
    
    # Whoo! That's a lot of code. Now, let's turn our data dictionary into a Pandas dataframe
    # and then return it:

    df = pd.DataFrame(data)

    # Now send it out:
    return df



In [21]:

# Set the function parameters:
query = "#cobalt"
lang = "en"
tweet_mode = "extended"
count = 100 
tweet_limit = 10

# Call the function using our parameters:
df = tweet_scraper(query=query, lang=lang, tweet_mode=tweet_mode, count=count, tweet_limit=tweet_limit)


Forbidden: 403 Forbidden
453 - You currently have access to a subset of Twitter API v2 endpoints and limited v1.1 endpoints (e.g. media post, oauth) only. If you need access to this endpoint, you may need a different access level. You can learn more here: https://developer.twitter.com/en/portal/product