### Setup

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import requests
import os
import csv

import pandas as pd

from datetime import datetime
from dotenv import load_dotenv
from random import randint
from time import sleep

In [None]:
load_dotenv(dotenv_path=".env")

In [None]:
BEARER_TOKEN = os.environ.get("BEARER_TOKEN")
MAYU_USER_ID = "1574902811703541760"

OUTPUT_FOLDER="out"
TODAY_DT = datetime.today().strftime("%Y%m%d")
CONV_ID_CSV_FILE = f"conversationId_{TODAY_DT}.csv"

In [None]:
if not os.path.exists(OUTPUT_FOLDER):
    os.mkdir(OUTPUT_FOLDER)

### Twitter API Methods

In [37]:
def create_tweet_retrieval_url(user_id):
    tweet_fields = "tweet.fields=conversation_id,in_reply_to_user_id,entities"
    exclude_fields = "exclude=retweets,replies"
    result_fields = "max_results=100"
    
    url = f"https://api.twitter.com/2/users/{user_id}/tweets?{tweet_fields}&{exclude_fields}&{result_fields}"
    return url

def bearer_oauth(r):
    r.headers["Authorization"] = f"Bearer {BEARER_TOKEN}"
    r.headers["User-Agent"] = "v2TweetLookupPython"
    return r

def connect_to_endpoint(url):
    response = requests.request("GET", url, auth=bearer_oauth)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
    return response.json()

def get_tweets(user_id):
    url = create_tweet_retrieval_url(user_id=user_id)
    json_response = connect_to_endpoint(url)

    return json_response

### Retrieve Main Tweets

In [None]:
main_tweets = get_tweets(user_id=MAYU_USER_ID)
conv_ids = set([tweet["conversation_id"] for tweet in main_tweets["data"]])

# backup
with open(os.path.join(OUTPUT_FOLDER, CONV_ID_CSV_FILE), "w", newline="") as myfile:
    wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    wr.writerow(conv_ids)


### Retrieve Replies

In [None]:
# Note that this can only retrieve replies from the past 7 days

count = 0

for conv_id in conv_ids:
    output_file_path = os.path.join(OUTPUT_FOLDER, f"{conv_id}_replies.json")
    !twarc2 conversation {conv_id} > {output_file_path}
    count = count + 1
    print(f"Completed {count}/{len(conv_ids)}")
    sleep(randint(5,15))