In [1]:
import tweepy, re
import pandas as pd

## Read the Bearer Token for Authentication

In [2]:
with open("./bearer_token.txt") as file:
    BEARER_TOKEN = file.read()

## Connect to the Twitter Client

In [3]:
client = tweepy.Client(bearer_token=BEARER_TOKEN)

## Read Tweets
This will retrieve the most recent 100 tweets written in english that contain the hashtags "#iphone" or "#apple" for the past week. The query can only be 512 characters long.

In [4]:
tweets = client.search_recent_tweets(
    query='(#iphone OR #Apple) lang:en has:hashtags -is:reply',
    tweet_fields=['created_at', 'possibly_sensitive', 'public_metrics', 'entities'],
    max_results=100,
)

## Convert the Tweets to a Pandas DataFram

In [5]:
def preprocess_tweet(sen):
    '''Cleans text data up, leaving only 2 or more char long non-stepwords composed of A-Z & a-z only
    in lowercase'''

    # Remove RT tag and "@" username mentions.
    sentence = re.sub("(RT @\w+: )|(@\w+)", " ", sen)

    # Remove special characters
    sentence = re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+://\S+)", " ", sentence)

    # Remove multiple spaces
    sentence = re.sub(r"\s{2,}", " ", sentence)

    # The previous RegEx rule may leave extra space at the end of the sentence.
    # This removes those extra spaces. 
    return sentence.strip()

In [6]:
raw_tweets = [
    [
        t.id,
        t.text,
        preprocess_tweet(t.text),
        t.created_at,
        t.possibly_sensitive,
        t.public_metrics['retweet_count'],
        t.public_metrics['reply_count'],
        t.public_metrics['like_count'],
        t.public_metrics['quote_count'],
        [tag['tag'] for tag in t.entities["hashtags"]] if "hashtags" in t.entities else []
    ]
    for t in tweets.data
]

df = pd.DataFrame(raw_tweets, columns=[
    "id", "text", "clean_text", "created_at", "is_sensitive",
    "retweet_count", "reply_count", "like_count", "quote_count",
    "hashtags"
])

### Preview the DataFrame

In [7]:
df

Unnamed: 0,id,text,clean_text,created_at,is_sensitive,retweet_count,reply_count,like_count,quote_count,hashtags
0,1593098408189120514,RT @groundzerofm: #NowPlaying: University of T...,NowPlaying University of Texas at Austin Earth...,2022-11-17 04:27:13+00:00,False,1,0,0,0,"[NowPlaying, Alexa, Android, Apple]"
1,1593098273359007745,RT @orfonline: 🚨 #COP27 PolicyPod: Is the worl...,COP27 PolicyPod Is the world climate disaster ...,2022-11-17 04:26:41+00:00,False,25,0,0,0,"[COP27, climate]"
2,1593098232405831681,"The controversy of #MLS and #Apple +, my opini...",The controversy of MLS and Apple my opinion via,2022-11-17 04:26:31+00:00,False,0,0,0,0,"[MLS, Apple]"
3,1593098049509031936,"Pixel7pro is big mistake?\nNot solved, indian ...",Pixel7pro is big mistake Not solved indian cus...,2022-11-17 04:25:47+00:00,False,0,0,0,0,"[teampixel, sunderpichai, googlepixel7pro, goo..."
4,1593097989958291456,RT @Tian_A1: BrainKids Educative Game Now avai...,BrainKids Educative Game Now available Apple A...,2022-11-17 04:25:33+00:00,False,3,0,0,0,"[Apple, Google]"
...,...,...,...,...,...,...,...,...,...,...
95,1593088146547380225,#Apple (@Apple) Watch : #SteveWozniak (@stevew...,Apple Watch SteveWozniak est un fan,2022-11-17 03:46:26+00:00,False,0,0,0,0,"[Apple, SteveWozniak]"
96,1593087913214238721,That’s how #Apple so wealthy. https://t.co/zlK...,That s how Apple so wealthy,2022-11-17 03:45:31+00:00,False,0,0,1,0,[Apple]
97,1593087783010082816,. @Apple rolls out #iPhone emergency SOS satel...,rolls out iPhone emergency SOS satellite alert...,2022-11-17 03:45:00+00:00,False,0,0,1,0,"[iPhone, Mobile, Technology]"
98,1593087425072746497,RT @TechInRL: How to Find your Apple Watch! (U...,How to Find your Apple Watch Updated applewatc...,2022-11-17 03:43:34+00:00,False,2,0,0,0,"[applewatchseries7, AppleWatch, AppleWatchSeri..."


### Save the Tweets DataFrame for Testing

In [8]:
df.to_json("test_data.json", orient='records')