https://towardsdatascience.com/an-extensive-guide-to-collecting-tweets-from-twitter-api-v2-for-academic-research-using-python-3-518fcb71df2a

In [1]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [2]:
os.environ['TOKEN'] = 'AAAAAAAAAAAAAAAAAAAAAKVLfwEAAAAA9U65FTL2ZrQgx1u1kgA94uCDzt8%3D9g59OKwN90CZ3BeWlqsMQfoFkLzUWhk7oYfz5917X90ygMxium'


In [3]:
def auth():
    return os.getenv('TOKEN')

In [4]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [5]:
def create_url(keyword, start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)

In [6]:
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [47]:
#Inputs for the request
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "@aerie lang:en -is:retweet"
start_time = "2022-08-06T00:00:00.000Z"
end_time = "2022-08-11T00:00:00.000Z"
max_results = 15

In [48]:
url = create_url(keyword, start_time,end_time, max_results)

In [49]:
json_response = connect_to_endpoint(url[0], headers, url[1])

Endpoint Response Code: 200


In [50]:
print(json.dumps(json_response, indent=4, sort_keys=True))

{
    "data": [
        {
            "author_id": "19499305",
            "conversation_id": "1557516176409956353",
            "created_at": "2022-08-10T23:56:08.000Z",
            "id": "1557516176409956353",
            "in_reply_to_user_id": "28668072",
            "lang": "en",
            "public_metrics": {
                "like_count": 0,
                "quote_count": 0,
                "reply_count": 1,
                "retweet_count": 0
            },
            "reply_settings": "everyone",
            "source": "Twitter for iPhone",
            "text": "@Aerie I ordered a Henley online in a size large. It got delivered yesterday and I opened it today and the price tag says L but the actual shirt is a M. What do. I need to do? I\u2019m super sad. https://t.co/wrEAZpODf9"
        },
        {
            "author_id": "18108977",
            "conversation_id": "1557493407135801345",
            "created_at": "2022-08-10T23:47:55.000Z",
            "id": "1557514111067504640

In [51]:
json_response['data'][0]['created_at']

'2022-08-10T23:56:08.000Z'

In [52]:
# Create file
tweets = open("data.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(tweets)

#Create headers for the data you want to save, in this example, we only want save these columns in our dataset
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
tweets.close()

In [53]:
def append_to_csv(json_response, fileName):

    #A counter variable
    counter = 0

    #Open OR create the target CSV file
    tweets = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(tweets)

    #Loop through each tweet
    for tweet in json_response['data']:
        
        # We will create a variable for each since some of the keys might not exist for some tweets
        # So we will account for that

        # 1. Author ID
        author_id = tweet['author_id']

        # 2. Time created
        created_at = dateutil.parser.parse(tweet['created_at'])

        # 3. Geolocation
        if ('geo' in tweet):   
            geo = tweet['geo']['place_id']
        else:
            geo = " "

        # 4. Tweet ID
        tweet_id = tweet['id']

        # 5. Language
        lang = tweet['lang']

        # 6. Tweet metrics
        retweet_count = tweet['public_metrics']['retweet_count']
        reply_count = tweet['public_metrics']['reply_count']
        like_count = tweet['public_metrics']['like_count']
        quote_count = tweet['public_metrics']['quote_count']

        # 7. source
        source = tweet['source']

        # 8. Tweet text
        text = tweet['text']
        
        # Assemble all data in a list
        res = [author_id, created_at, geo, tweet_id, lang, like_count, quote_count, reply_count, retweet_count, source, text]
        
        # Append the result to the CSV file
        csvWriter.writerow(res)
        counter += 1

    # When done, close the CSV file
    tweets.close()

    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter) 


In [54]:
append_to_csv(json_response, "tweets.csv")

# of Tweets added from this response:  15


In [55]:
df = pd.read_csv('tweets.csv')

In [56]:
pd.set_option('display.max_colwidth', None)

In [57]:
df

Unnamed: 0,849670936924172289,2022-08-08 23:59:43+00:00,Unnamed: 3,1556792300998639618,en,1,0,0.1,0.2,Twitter for iPhone,same! she dresses like rich me would 😭😍 https://t.co/adQU7hKk1w
0,1174554679822307329,2022-08-08 23:59:30+00:00,,1556792249106644992,en,1,0,0,1,Twitter for iPhone,@LionBrosPodcast Summer dresses
1,1236458791962644480,2022-08-08 23:59:22+00:00,,1556792215120265217,en,2,0,1,0,Twitter for Android,"one direction, all time low, 5sos, the hunger games, the best marvel era, frozen, skater dresses and skinny jeans, everyone had the same white samsung galaxy s3..."
2,1306498176854175744,2022-08-08 23:59:19+00:00,,1556792202524872704,en,0,0,0,0,Twitter for iPhone,I bought 2 dresses for 1 gala bc I just couldn't decide &amp; one might feel better on. It's my logic &amp; I won't apologize for it 😌
3,1357815347991183366,2022-08-08 23:58:34+00:00,,1556792011734261763,en,0,0,0,0,Twitter for iPhone,@gabrielafotoo @EmmetCohen He always dresses so well
4,1376947999901937671,2022-08-08 23:58:29+00:00,,1556791994327781377,en,0,0,0,0,Poshmark,Check out this listing I just added to my #Poshmark closet: Zara T-Shirt Maxi Dress Beige. https://t.co/gJUwbAKhWK #shopmycloset @poshmarkapp
...,...,...,...,...,...,...,...,...,...,...,...
81,2870708554,2022-08-10 18:37:53+00:00,,1557436088410464256,en,13,0,1,0,Twitter for Android,"When I told people I wanted to establish a library in an area that is still terrorized by Turkish airstrikes, people looked at me like I was insane. Then @Aerie wrote me a $20k check. Shoot your shot, &amp; don't fall into the trap of self-rejecting your wildest dreams. /fin"
82,1059265803974008833,2022-08-10 16:24:47+00:00,,1557402591880945664,en,0,0,0,0,Twitter for iPhone,@Aerie pls restock the patchwork triangle bikini top 🥲
83,1277261987840503808,2022-08-10 14:57:27+00:00,,1557380612150988802,en,0,0,0,0,Twitter Web App,@Aerie When did you stop using normal sized models? Very disappointed you decided to stop being body positive
84,1536361441539854336,2022-08-10 14:04:34+00:00,,1557367302978420739,en,0,0,0,0,Twitter Web App,@ameliexrose @Aerie Are you as bored as I am in this quaraantine?


In [None]:
#explored 15 tweets at ZARA, Everlane, Madewell, and Aerie, all clothing brands that are targeted towards women...
#tweets are mess/not high quality