In [None]:
import tweepy
import pandas as pd
import time
from APIKeys import API_KEY, API_SECRET_KEY, ACCESS_TOKEN, BEARER_TOKEN, SECRET_ACCESS_TOKEN
def setup_api():
    client = tweepy.Client(
    bearer_token=BEARER_TOKEN,
    consumer_key=API_KEY,
    consumer_secret=API_SECRET_KEY,
    access_token=ACCESS_TOKEN,
    access_token_secret=SECRET_ACCESS_TOKEN,
    wait_on_rate_limit=True)
    return client

In [None]:
def fetchTweetsByHashTags(client, maxResult, sincewhen, hashtags):
    query = " OR ".join([f"#{tag}" for tag in hashtags])
    tweet_fields=['id','author_id','created_at','text','public_metrics','entities']   ## ref https://docs.x.com/x-api/fundamentals/data-dictionary#space
    user_fields=['id','username']
    allTweets = []
    try:
        tweets = client.search_recent_tweets(
            query=query, tweet_fields=tweet_fields, user_fields=user_fields, max_results=maxResult)
    
        for tweet in tweets.data: ##.data bcoz https://stackoverflow.com/questions/72186517/tweepy-for-twitter-api-v2-extracting-additional-fields-for-tweet-search
            
            tweet_data = processTweetData(tweet)
            allTweets.append(tweet_data)
        print(f"Found {len(allTweets)} tweets")
    except Exception as e:
        print(f"Exception occured fetchTweetsByHashTags : {e}")
    return allTweets

In [None]:
def processTweetData(tweet):
    tweetData = {
        'text' : tweet.text,
        'id' : tweet.id,
        'created_at' : tweet.created_at,
        'author_id' : tweet.author_id
        }

    ## Extract:  engagement metrics

    if(hasattr(tweet, public_metrics) and tweet.public_metrics):
        metrics = tweet.public_metrics
        tweetData.update({
            'retweet_count': metrics.get('retweet_count', 0),
            'quote_count': metrics.get('quote_count', 0),
            'like_count': metrics.get('like_count', 0),
            'reply_count': metrics.get('reply_count', 0)
        })

    ## Extract: hashtags, mentions

    if(hasattr(tweet, entities) and tweet.entities):
        entities = tweet.entities

        if('hashtags' in tweet.entities):
            hashtags = [tag['tag'] for tag in tweet.entities['hashtags']]
            tweetData['hashtags'] = hashtags
        if('mentions' in tweet.entities):
            mentions = [mention['username'] for tag in tweet.entities['mentions']]
            tweetData['mentions'] = mentions

    return tweetData;

In [None]:
def getUserData(client, userIds):
    if not userIds: return {}
        
    userFields = [
        'id', 'username', 'name'    
    ]

    usersData = {};

    try:
        for i in range(0, len(userIds), 100): #since free account has limit of 100, it can be parametrized as max records per api call
            bacthuserIds = userIds[i:i+100]
            time.sleep(120)
            users = client.get_users(ids = bacthuserIds, user_fields = userFields)
            print(f"users ======= {users}")
            if users.data: ##.data bcoz https://docs.x.com/x-api/users/get-users-by-ids
                print(f"users.data ======= {users.data}")
                for user in users.data:
                    usersData[user.id] = {
                        'user_id' : user.id,
                        ##'name' : user.name,
                        'username' : user.username
                    }
    except Exception as e:
        print(f"Exception occured getUserData : {e}")
    return usersData;

In [None]:
def concateTweetAndUserData(userData, tweetData):
    if(not tweetData):
        print("No tweets found")
        return pd.DataFrame()
    
    finalData = [];
    
    for tweet in tweetData: 
        combineData = tweet.copy()
        userId = tweet['author_id']

        if(userId in userData):
            combineData.update(userData[userId])
        finalData.append(combineData)
    
    return pd.DataFrame(finalData)

In [None]:
client = setup_api()
tweetData = fetchTweetsByHashTags(client, 11, 7, ["NASDAQ", "AAPL", "GOLD", "FED"]) ## free plan allows, 1 requests / 15 mins & total post allowed 100
userIdsFromTweets = list(set(tweet['author_id'] for tweet in tweetData))
usersData = getUserData(client, userIdsFromTweets)
combinedData = concateTweetAndUserData(usersData, tweetData)


print(combinedData.info)
print(combinedData.size)
print(combinedData.head(10))