# Step 1 - Setting up the Environment

In [1]:
import tweepy
import re
import pandas as pd
import numpy as np

Setting up authentication for Twitter (AppCred is a separate .py file with my credentials):

In [2]:
from AppCred import BEARER_TOKEN
from AppCred import CONSUMER_KEY, CONSUMER_SECRET
from AppCred import ACCESS_TOKEN, ACCESS_TOKEN_SECRET

client = tweepy.Client(bearer_token = BEARER_TOKEN,
                       consumer_key = CONSUMER_KEY,
                       consumer_secret = CONSUMER_SECRET,
                       access_token = ACCESS_TOKEN,
                       access_token_secret = ACCESS_TOKEN_SECRET,
                       return_type = dict,         # return the response as a Python dictionary
                       wait_on_rate_limit = True)  # wait once the rate limit is reached

# Step 2 - Getting User Data _(trial and error phase)_

Loading user:

In [38]:
user_id = client.get_user(username = 'RishiSunak')['data']['id']

Checking the number of tweets:

In [66]:
user_metrics = client.get_user(username = 'RishiSunak', user_fields = ['public_metrics'])
user_number_of_tweets = int(user_metrics['data']['public_metrics']['tweet_count'])
user_number_of_tweets

2801

In [67]:
user_id = user_metrics['data']['id']
user_id

'1168968080690749441'

Collecting the last 3200 tweets:

In [40]:
all_user_tweets = []
while (len(all_user_tweets) < 3200) & (len(all_user_tweets) < user_number_of_tweets):
    if len(all_user_tweets) == 0:        
        # getting the first 100 tweets
        tweets = client.get_users_tweets(user_id, tweet_fields = ['created_at', 'public_metrics', 'referenced_tweets'], max_results = 100)
        all_user_tweets = tweets['data']
    else:
        # getting subsequent groups of 100 tweets
        last_tweet = all_user_tweets[-10]['id']
        tweets = client.get_users_tweets(user_id, tweet_fields = ['created_at', 'public_metrics', 'referenced_tweets'], max_results = 100, until_id = last_tweet)
        all_user_tweets = all_user_tweets + tweets['data']
len(all_user_tweets)

2891

In [68]:
user_id = []
text = []
tweet_id = []
edit_history_tweet_ids = []
created_at = []
retweeted = []
retweets = []
replies = []
likes = []
impression = []


for tweet in all_user_tweets:

    user_id.append(user_metrics['data']['id'])
    text.append(tweet['text'])
    tweet_id.append(tweet['id'])
    edit_history_tweet_ids.append(tweet['edit_history_tweet_ids'])
    created_at.append(tweet['created_at'])
    retweets.append(tweet['public_metrics']['retweet_count'])
    replies.append(tweet['public_metrics']['reply_count'])
    likes.append(tweet['public_metrics']['like_count'])
    impression.append(tweet['public_metrics']['impression_count'])

    if ('referenced_tweets' in tweet):
        retweeted.append(1)
    else:
        retweeted.append(0)

user_tweets_df = pd.DataFrame([user_id, text, tweet_id, edit_history_tweet_ids, created_at, 
                               retweeted, retweets, replies, likes, impression]).transpose()
user_tweets_df.columns = ['id', 'text', 'tweet_id', 'edit_history_tweet_ids', 'created_at', 
                          'retweeted', 'retweets', 'replies', 'likes', 'impression']

user_tweets_df.head()

Unnamed: 0,id,text,tweet_id,edit_history_tweet_ids,created_at,retweeted,retweets,replies,likes,impression
0,1168968080690749441,RT @RishiSunak: God Save The King!,1654827289446686726,[1654827289446686726],2023-05-06T12:35:45.000Z,1,754,0,0,1
1,1168968080690749441,RT @RoyalFamily: 𝐓𝐡𝐞 𝐂𝐫𝐨𝐰𝐧𝐢𝐧𝐠 𝐨𝐟 𝐓𝐡𝐞 𝐊𝐢𝐧𝐠\n\nT...,1654820674249129984,[1654820674249129984],2023-05-06T12:09:28.000Z,1,6836,0,0,0
2,1168968080690749441,🇬🇧 God Save The King! 🇬🇧 https://t.co/FfQWH5bpXU,1654814746300776448,[1654814746300776448],2023-05-06T11:45:55.000Z,0,561,725,4098,181026
3,1168968080690749441,RT @RishiSunak: Today’s #Coronation is a momen...,1654775374981521408,[1654775374981521408],2023-05-06T09:09:28.000Z,1,1832,0,0,0
4,1168968080690749441,👏 THANK YOU!\n\nThank you to every voter and v...,1654230099233841153,[1654230099233841153],2023-05-04T21:02:44.000Z,0,165,2092,729,1193660


An alternative way of saving (actually saved further below)

from pathlib import Path
i = 'BBC'
filepath = Path(f'{i}_tweets.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
user_df.to_csv(filepath)

# Step 3 - Getting Multiple User Data

In [63]:
user_list = ['Conservatives', 'RishiSunak', 'KwasiKwarteng', 
            'KirstySNP', 'MhairiBlack', 'SarahLudford', 'LordStorey', 
            'munirawilson', 'Stuart_McDonald', 'ASollowayUK']
tweets_list = []

In [69]:
for i in user_list:
    # define user
    user_metrics = client.get_user(username = i, user_fields = ['public_metrics'])
    user_id = user_metrics['data']['id']
    user_number_of_tweets = int(user_metrics['data']['public_metrics']['tweet_count'])

    # get last 3200 tweets
    all_user_tweets = []
    while (len(all_user_tweets) < 3200) & (len(all_user_tweets) < user_number_of_tweets):
        if len(all_user_tweets) == 0:        
            # getting the first 100 tweets
            tweets = client.get_users_tweets(user_id, 
                                             tweet_fields = ['created_at', 'public_metrics', 'referenced_tweets'], 
                                             max_results = 100)
            all_user_tweets = tweets['data']
        else:
            # getting subsequent groups of 100 tweets
            last_tweet = all_user_tweets[-10]['id']
            tweets = client.get_users_tweets(user_id, 
                                             tweet_fields = ['created_at', 'public_metrics', 'referenced_tweets'], 
                                             max_results = 100, until_id = last_tweet)
            all_user_tweets += tweets['data']

    # create dataframe
    user_id = []
    text = []
    tweet_id = []
    edit_history_tweet_ids = []
    created_at = []
    retweeted = []
    retweets = []
    replies = []
    likes = []
    impression = []

    for tweet in all_user_tweets:

        user_id.append(user_id)
        text.append(str(tweet['text']))
        tweet_id.append(str(tweet['id']))
        edit_history_tweet_ids.append(str(tweet['edit_history_tweet_ids']))
        created_at.append(str(tweet['created_at']))
        retweets.append(str(tweet['public_metrics']['retweet_count']))
        replies.append(str(tweet['public_metrics']['reply_count']))
        likes.append(str(tweet['public_metrics']['like_count']))
        impression.append(str(tweet['public_metrics']['impression_count']))

        if ('referenced_tweets' in tweet):
            retweeted.append(1)
        else:
            retweeted.append(0)

    user_tweets_df = pd.DataFrame([user_id, text, tweet_id, edit_history_tweet_ids, created_at, 
                                   retweeted, retweets, replies, likes, impression]).transpose()
    user_tweets_df.columns = ['id', 'text', 'tweet_id', 'edit_history_tweet_ids', 'created_at', 
                              'retweeted', 'retweets', 'replies', 'likes', 'impression']
  
    # save variable under a new name
    tweets_list.append(user_tweets_df)

In [74]:
tweets_list[9]

Unnamed: 0,id,text,tweet_id,edit_history_tweet_ids,created_at,retweeted,retweets,replies,likes,impression
0,4439444062,🇬🇧 God Save The King! 🇬🇧 https://t.co/BiOOX5rz4z,1654822880247525377,['1654822880247525377'],2023-05-06T12:18:14.000Z,0,2,0,18,655
1,4439444062,So fantastic to be here! 🇬🇧👑🎉🤴 #Coronation htt...,1654792556956221443,['1654792556956221443'],2023-05-06T10:17:44.000Z,0,3,0,19,864
2,4439444062,"🇬🇧👑 Today, history is made as we come together...",1654789730028978176,['1654789730028978176'],2023-05-06T10:06:30.000Z,0,2,1,18,2073
3,4439444062,Really excited for the Coronation tomorrow! 🇬🇧...,1654581684744904705,['1654581684744904705'],2023-05-05T20:19:48.000Z,0,0,2,5,1118
4,4439444062,RT @RoyalSchoolDD: Happy #DeafAwarenessWeek ev...,1654572873346891782,['1654572873346891782'],2023-05-05T19:44:48.000Z,1,4,0,0,0
...,...,...,...,...,...,...,...,...,...,...
3195,4439444062,"The Hub is so welcoming, feels so safe and is ...",1368276553419943938,['1368276553419943938'],2021-03-06T19:05:26.000Z,1,0,1,1,0
3196,4439444062,Visiting the fantastic team at Royal Derby Hos...,1368276551972831236,['1368276551972831236'],2021-03-06T19:05:25.000Z,0,0,2,1,0
3197,4439444062,It's vitally important you don't neglect any o...,1368189642739187713,['1368189642739187713'],2021-03-06T13:20:05.000Z,1,0,1,0,0
3198,4439444062,"This morning, I visited the Vaccination Hub at...",1368189641002717188,['1368189641002717188'],2021-03-06T13:20:04.000Z,0,0,1,1,0


In [75]:
for i, user in zip(range(len(user_list)), user_list):
    user_id = client.get_user(username = user)['data']['id']
    tweets_list[i].to_csv(f'{user_id}_tweets_csilla.csv')