# Explore ReTweets and Replies using TweePy

In [1]:
%load_ext lab_black

In [2]:
import datetime
import os
import re

import numpy as np
import pandas as pd
import tweepy

## About

This notebook explores the differences between retweets and replies, in terms of the fields
- `is_quote_status`
- `retweeted`
- `in_reply_to_screen_name`

that were captured during streaming with the `TweePy` library.

## User Inputs

In [3]:
user = "RegVickers"

num_pages_wanted = 6
num_tweets_per_page = 200

In [4]:
api_key = os.getenv("TWITTER_API_KEY")
api_key_secret = os.getenv("TWITTER_API_KEY_SECRET")
access_token = os.getenv("TWITTER_ACCESS_TOKEN")
access_token_secret = os.getenv("TWITTER_ACCESS_TOKEN_SECRET")

dtypes_dict = dict(
    in_reply_to_status_id=pd.Float64Dtype(),
    page=pd.Int32Dtype(),
    tweet=pd.Int32Dtype(),
)

## Authenticate Twitter Client

In [5]:
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

## Get Tweets from a Single User

In [6]:
%%time
# Search using pagination
pages = tweepy.Cursor(
    api.user_timeline,
    screen_name=user,
    tweet_mode='extended',
    count=num_tweets_per_page,
).pages(num_pages_wanted)

# Iterate through the pages and print the text of the tweets
data = []
# Loop over individual pages
for page_num, page in enumerate(pages):
    # Loop over tweets on single page
    # for i in range(len(page)):
    print(f"Reading {len(page):,} tweets on page number {page_num+1}...", end="")
    for tweet_num, tweet in enumerate(page):
        # if page_no == 0 and i == 0:
        #     print(dir(page[i]))
        if hasattr(tweet, "retweeted_status"):
            try:
                text = tweet.retweeted_status.extended_tweet["full_text"]
            except AttributeError:
                text = tweet.retweeted_status.full_text
        else :
            try:
                text = tweet.extended_tweet["full_text"]
            except AttributeError:
                text = tweet.full_text
        
        text_no_usernames = re.sub('@[^\s]+','', text.replace("\n", " "))
        text_no_usernames_urls = re.sub('http[^\s]+','', text_no_usernames)
        data.append(
            {
                "page": page_num,
                "tweet": tweet_num,
                "created_at": tweet.created_at,
                "is_quote_status": tweet.is_quote_status,
                "favorited": tweet.favorited,
                "retweeted": tweet.retweeted,
                "in_reply_to_status_id": tweet.in_reply_to_status_id,
                'in_reply_to_screen_name': tweet.in_reply_to_screen_name,
                "user_screen_name": tweet.user.screen_name,
                "text_no_usernames": text_no_usernames,
                "text": text_no_usernames_urls,
                "url": f"https://twitter.com/twitter/statuses/{tweet.id}"
            }
        )
    print("Done.")

Reading 200 tweets on page number 1...Done.
Reading 200 tweets on page number 2...Done.
Reading 200 tweets on page number 3...Done.
Reading 200 tweets on page number 4...Done.
Reading 199 tweets on page number 5...Done.
CPU times: user 616 ms, sys: 22.3 ms, total: 638 ms
Wall time: 5.51 s


In [7]:
%%time
df = (
    pd.DataFrame.from_records(data)
    .astype(dtypes_dict)
    .dropna(subset=['user_screen_name'])
    .sort_values(by=["in_reply_to_screen_name", "created_at"], ignore_index=True)
    .assign(num_urls=lambda df: df["text_no_usernames"].apply(lambda x: re.findall(r"(https?://\S+)", x)).str.len())
    .assign(is_reply=lambda df: ~df["in_reply_to_screen_name"].isna())
)
df["in_reply_to_status_id"] = (
    df["in_reply_to_status_id"]
    .astype("float64")
    .fillna(0)
    .astype("int64")
    .astype(str)
    .replace("0", None)
)
with pd.option_context("display.max_colwidth", None):
    display(
        pd.concat([df.head(), df.tail()])
        .style
        .set_caption(f"Retrieved {len(df):,} tweets")
    )

Unnamed: 0,page,tweet,created_at,is_quote_status,favorited,retweeted,in_reply_to_status_id,in_reply_to_screen_name,user_screen_name,text_no_usernames,text,url,num_urls,is_reply
0,0,146,2022-10-18 22:57:01+00:00,False,False,False,1.582476618907648e+18,4everNeverTrump,RegVickers,The MAGATs are trolling and calling him a failure. Don't think his star is rising in the Fox sky.,The MAGATs are trolling and calling him a failure. Don't think his star is rising in the Fox sky.,https://twitter.com/twitter/statuses/1582506064229986304,0,True
1,3,2,2022-10-17 16:24:13+00:00,False,False,False,1.5816959497429563e+18,62WalterP,RegVickers,https://t.co/vWamnhPMGA,,https://twitter.com/twitter/statuses/1582044824336990210,1,True
2,4,162,2022-10-16 00:51:43+00:00,False,False,False,1.5814157026986844e+18,ACTBrigitte,RegVickers,"Weren’t there Chinese spies running around Mar a Lago while Donnie the traitor was hiding Top Secret, Classified Materials in the basement?","Weren’t there Chinese spies running around Mar a Lago while Donnie the traitor was hiding Top Secret, Classified Materials in the basement?",https://twitter.com/twitter/statuses/1581447762746318848,0,True
3,3,83,2022-10-16 22:15:13+00:00,False,False,False,1.5817579747200614e+18,ACTBrigitte,RegVickers,"If you stand with Trump, you stand with Putin.🛑","If you stand with Trump, you stand with Putin.🛑",https://twitter.com/twitter/statuses/1581770767611482112,0,True
4,3,44,2022-10-16 23:46:48+00:00,False,False,False,1.5817830656966943e+18,ACTBrigitte,RegVickers,"He has always put himself first, unless Putin is in the house.","He has always put himself first, unless Putin is in the house.",https://twitter.com/twitter/statuses/1581793814251196416,0,True
994,0,8,2022-10-19 16:54:15+00:00,False,False,False,,,RegVickers,👏👏👏👏,👏👏👏👏,https://twitter.com/twitter/statuses/1582777156697874436,0,False
995,0,6,2022-10-19 17:01:54+00:00,False,False,False,,,RegVickers,If you know who Captain Kangaroo is then say hey.,If you know who Captain Kangaroo is then say hey.,https://twitter.com/twitter/statuses/1582779080772325376,0,False
996,0,3,2022-10-19 17:04:29+00:00,False,False,False,,,RegVickers,"If investigated, how many of tRumps accusations do you think are actually confessions? I am betting 95% or more.","If investigated, how many of tRumps accusations do you think are actually confessions? I am betting 95% or more.",https://twitter.com/twitter/statuses/1582779731455270914,0,False
997,0,2,2022-10-19 17:05:05+00:00,False,False,False,,,RegVickers,"I’m pinning my most earnest hopes on the new voter registrations and the doctrine of, “hell hath no fury like women scorned.”","I’m pinning my most earnest hopes on the new voter registrations and the doctrine of, “hell hath no fury like women scorned.”",https://twitter.com/twitter/statuses/1582779884056633344,0,False
998,0,0,2022-10-19 17:11:26+00:00,False,False,False,,,RegVickers,I've decided to go to a Halloween costume party dressed as a cranky old geezer. Costume will be easy. I have a whole closet full of them..,I've decided to go to a Halloween costume party dressed as a cranky old geezer. Costume will be easy. I have a whole closet full of them..,https://twitter.com/twitter/statuses/1582781481524793355,0,False


CPU times: user 57.6 ms, sys: 0 ns, total: 57.6 ms
Wall time: 55.4 ms


## Manual Checking of Tweets on Twitter.com Using Web Browser

In [8]:
tweet_links = [
    {
        "created_at": "2022-10-17 17:42:24+00:00",
        "link": "https://twitter.com/RegVickers/status/1582064497933836294",
        "description": "reply to original tweet (only added text)",
    },
    {
        "created_at": "2022-10-17 17:40:04+00:00",
        "link": "https://twitter.com/Welshsprout/status/1582063183769935872",
        "description": "retweet (original tweet was a reply with a quote so there is a URL in the tweet text)",
    },
    {
        "created_at": "2022-10-17 17:31:19+00:00",
        "link": "https://twitter.com/AWeissmann_/status/1582060352149532673",
        "description": "retweet (original tweet was not a reply and did not contain an attachment so there is no URL in the tweet text)",
    },
    {
        "created_at": "2022-10-17 17:20:43+00:00",
        "link": "https://twitter.com/PoliticusSarah/status/1581802895724675072",
        "description": "retweet (original tweet was not a reply but contained an attachment so there is a URL in the tweet text)",
    },
    {
        "created_at": "2022-10-17 17:20:29+00:00",
        "link": "https://twitter.com/kitchen5203/status/1582057702615678976",
        "description": "retweet (original tweet was a reply with a quote so there is a URL in the tweet text)",
    },
    {
        "created_at": "2022-10-17 17:20:09+00:00",
        "link": "https://twitter.com/JerasIkehorn/status/1581825640734478336",
        "description": "retweet (original tweet was not a reply but contained an attachment so there is a URL in the tweet text)",
    },
    {
        "created_at": "2022-10-17 17:19:58+00:00",
        "link": "https://twitter.com/4a_of/status/1582057653231968260",
        "description": "retweet (original tweet was a reply without a quote so there is no URL in the tweet text)",
    },
    {
        "created_at": "2022-10-17 17:13:38+00:00",
        "link": "https://twitter.com/RegVickers/status/1582057259302993920",
        "description": "reply to original tweet (only added picture, no text)",
    },
]
df_samples = pd.DataFrame.from_records(tweet_links).assign(
    created_at=lambda df: pd.to_datetime(df["created_at"])
)
df_samples_checked = df.merge(df_samples, on=["created_at"], how="right").sort_values(
    by=["description", "created_at"], ignore_index=True
)
with pd.option_context("display.max_colwidth", None):
    display(df_samples_checked)

Unnamed: 0,page,tweet,created_at,is_quote_status,favorited,retweeted,in_reply_to_status_id,in_reply_to_screen_name,user_screen_name,text_no_usernames,text,url,num_urls,is_reply,link,description
0,2,186,2022-10-17 17:13:38+00:00,False,False,False,1.5820254102582272e+18,MairScott3,RegVickers,https://t.co/8ZICk2CBw9,,https://twitter.com/twitter/statuses/1582057259302993920,1,True,https://twitter.com/RegVickers/status/1582057259302993920,"reply to original tweet (only added picture, no text)"
1,2,166,2022-10-17 17:42:24+00:00,False,False,False,1.5820624721395384e+18,Welshsprout,RegVickers,Looks like an animated toilet brush.,Looks like an animated toilet brush.,https://twitter.com/twitter/statuses/1582064497933836294,0,True,https://twitter.com/RegVickers/status/1582064497933836294,reply to original tweet (only added text)
2,2,174,2022-10-17 17:20:29+00:00,True,False,False,,,RegVickers,This! https://t.co/qsA4CmbofN,This!,https://twitter.com/twitter/statuses/1582058984785076234,1,False,https://twitter.com/kitchen5203/status/1582057702615678976,retweet (original tweet was a reply with a quote so there is a URL in the tweet text)
3,2,167,2022-10-17 17:40:04+00:00,True,False,False,,,RegVickers,Just seen this! https://t.co/qzfALa3jjl,Just seen this!,https://twitter.com/twitter/statuses/1582063911993765888,1,False,https://twitter.com/Welshsprout/status/1582063183769935872,retweet (original tweet was a reply with a quote so there is a URL in the tweet text)
4,2,176,2022-10-17 17:19:58+00:00,False,False,False,,,RegVickers,"But those trips to Delaware are killing us, right","But those trips to Delaware are killing us, right",https://twitter.com/twitter/statuses/1582058853780574208,0,False,https://twitter.com/4a_of/status/1582057653231968260,retweet (original tweet was a reply without a quote so there is no URL in the tweet text)
5,2,170,2022-10-17 17:31:19+00:00,False,False,False,,,RegVickers,DOJ Bannon brief: The rioters who overran the Capitol on [J6] did not just attack a building-they assaulted the rule of law upon which this country was built and through which it endures. By flouting the Select Committee’s subpoena and its authority [he] exacerbated that assault.,DOJ Bannon brief: The rioters who overran the Capitol on [J6] did not just attack a building-they assaulted the rule of law upon which this country was built and through which it endures. By flouting the Select Committee’s subpoena and its authority [he] exacerbated that assault.,https://twitter.com/twitter/statuses/1582061707714625536,0,False,https://twitter.com/AWeissmann_/status/1582060352149532673,retweet (original tweet was not a reply and did not contain an attachment so there is no URL in the tweet text)
6,2,175,2022-10-17 17:20:09+00:00,False,False,False,,,RegVickers,Be like Bruce…👇 https://t.co/9kkDxJHq1g,Be like Bruce…👇,https://twitter.com/twitter/statuses/1582058897656799237,1,False,https://twitter.com/JerasIkehorn/status/1581825640734478336,retweet (original tweet was not a reply but contained an attachment so there is a URL in the tweet text)
7,2,173,2022-10-17 17:20:43+00:00,False,False,False,,,RegVickers,Worried about high gas prices? Probably wanna vote Democrat. https://t.co/iFlyoJ7JmO,Worried about high gas prices? Probably wanna vote Democrat.,https://twitter.com/twitter/statuses/1582059043656699909,1,False,https://twitter.com/PoliticusSarah/status/1581802895724675072,retweet (original tweet was not a reply but contained an attachment so there is a URL in the tweet text)


## Links

1. [Get text from retweets](https://splunktool.com/how-to-get-full-text-of-tweets-using-tweepy-in-python)
2. [Remove username and url from tweet text](https://www.datasnips.com/59/remove-usernames-http-links-from-tweet-data/)
3. [Get tweet URL](https://www.anycodings.com/1questions/3137307/get-tweet-url-or-tweet-id-using-tweepy)
4. [Count number of URLs in tweet text](https://stackoverflow.com/a/50937560/4057186)