# Replies
Get as many replies as possible for each top tweet.

Imports

In [160]:
from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRequestError, TwitterConnectionError, TwitterPager
import pandas as pd
from tqdm import tqdm

from dotenv import load_dotenv
import os
import glob

Authentication

In [161]:
load_dotenv()
consumer_key = os.getenv("CONSUMER_KEY")
consumer_secret = os.getenv("CONSUMER_SECRET")
access_token_key = os.getenv("ACCESS_TOKEN_KEY")
access_token_secret = os.getenv("ACCESS_TOKEN_SECRET")

api = TwitterAPI(consumer_key, consumer_secret, access_token_key, access_token_secret, api_version='2')

Fetch replies (inspired by [Adithya Narayanan](https://towardsdatascience.com/mining-replies-to-tweets-a-walkthrough-9a936602c4d6))

In [162]:
def retrieve_replies(tweet_id):
    try:
        # Get 25 replies
        pager = TwitterPager(api, 'tweets/search/recent',
                        {
                            'query': f'conversation_id:{tweet_id}',
                            'tweet.fields': 'author_id,conversation_id,created_at,in_reply_to_user_id'
                        })

        id_list = []
        tweet_list = []
        tweet_ct = 0
        for item in pager.get_iterator(wait=5):
            id_list.append(item['author_id'])
            tweet_list.append(item['text'])

            tweet_ct += 1
            if tweet_ct > 30:
                break

    # If rate limit is exceeded, wait 15 minutes
    except TwitterRequestError as e:
        print(e.status_code)
        for msg in iter(e):
            print(msg)
        time.sleep(15 * 60)

    except TwitterConnectionError as e:
        print(e)

    except Exception as e:
        print(e)

    return pd.DataFrame({ 'id': id_list, 'tweet': tweet_list })

This function takes a tweet ID and returns a dataframe with the reply ID and the reply text (with newlines removed)

In [163]:
def get_replies(tweet_id):
    replies = retrieve_replies(tweet_id)
    replies.replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=["",""], regex=True, inplace=True)
    return replies

Create folders for each username (do this after fetching the replies – can be done easily using `parent_tweets.csv`)

In [164]:
# usernames = [os.path.basename(f).split('.')[0] for f in glob.glob('../data/tweets/*.csv')]
# for username in usernames:
#     os.makedirs(f'../data/replies/{username}', exist_ok=True)

Iterate over all parent tweets and fetch replies for each one

In [167]:
import time

for i in tqdm(range(23, len(usernames))):
    username = usernames[i]
    tweets = pd.read_csv(f'../data/tweets/{username}.csv')
    reply_ct = 0
    for index, row in tweets.iterrows():
        replies = get_replies(row['id'])
        replies.to_csv(f'../data/replies/{row["id"]}.csv', index=False)
        reply_ct += len(replies)

    # Sleep based on the number of tweets retrieved
    # Follows rate limit of 450 requests / 15 minutes = 1 request / 2 seconds
    time.sleep(reply_ct * 2)

    # Write progress to a file
    with open('../data/progress.txt', 'a') as f:
        f.write(f'User: {username}, Progress: {i}/{len(usernames)}, Replies: {reply_ct}')
        f.write('\n')

  8%|▊         | 21/251 [08:49<2:03:31, 32.22s/it]

429
Too Many Requests


 16%|█▋        | 41/251 [26:15<1:27:45, 25.07s/it]  

429
Too Many Requests


 21%|██        | 52/251 [1:18:51<3:29:14, 63.09s/it]  

429
Too Many Requests


 23%|██▎       | 57/251 [1:35:48<5:11:44, 96.42s/it]  

429
Too Many Requests


 27%|██▋       | 67/251 [1:59:11<4:06:21, 80.33s/it]  

429
Too Many Requests


 32%|███▏      | 80/251 [2:22:53<1:30:37, 31.80s/it]  

429
Too Many Requests


 37%|███▋      | 92/251 [2:40:02<22:19,  8.43s/it]    

429
Too Many Requests


 38%|███▊      | 95/251 [3:01:13<9:01:54, 208.43s/it] 

429
Too Many Requests


 46%|████▌     | 115/251 [3:35:06<38:07, 16.82s/it]   

429
Too Many Requests


 47%|████▋     | 118/251 [4:00:19<11:26:22, 309.64s/it]

429
Too Many Requests


 50%|████▉     | 125/251 [4:21:15<3:48:16, 108.71s/it] 

429
Too Many Requests
429
Too Many Requests


 51%|█████▏    | 129/251 [6:12:29<25:07:15, 741.28s/it] 

429
Too Many Requests


 59%|█████▊    | 147/251 [6:35:11<1:03:42, 36.75s/it]  

429
Too Many Requests


 65%|██████▍   | 162/251 [6:52:26<05:50,  3.94s/it]   

429
Too Many Requests


 69%|██████▉   | 173/251 [7:16:07<28:25, 21.86s/it]   

429
Too Many Requests


 73%|███████▎  | 183/251 [7:38:02<21:18, 18.80s/it]   

429
Too Many Requests


 78%|███████▊  | 196/251 [7:55:39<20:55, 22.82s/it]   

429
Too Many Requests


 84%|████████▎ | 210/251 [8:13:09<16:44, 24.49s/it]   

429
Too Many Requests


 85%|████████▍ | 213/251 [8:32:12<1:56:48, 184.43s/it]

429
Too Many Requests


 90%|████████▉ | 225/251 [8:59:59<07:19, 16.89s/it]   

429
Too Many Requests


 90%|█████████ | 226/251 [9:15:28<2:01:02, 290.50s/it]

429
Too Many Requests


 93%|█████████▎| 233/251 [9:51:30<38:26, 128.12s/it]  

429
Too Many Requests


100%|██████████| 251/251 [10:13:06<00:00, 146.56s/it]  
