https://towardsdatascience.com/an-extensive-guide-to-collecting-tweets-from-twitter-api-v2-for-academic-research-using-python-3-518fcb71df2a

In [26]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [27]:
os.environ['TOKEN'] = 'AAAAAAAAAAAAAAAAAAAAAKVLfwEAAAAA9U65FTL2ZrQgx1u1kgA94uCDzt8%3D9g59OKwN90CZ3BeWlqsMQfoFkLzUWhk7oYfz5917X90ygMxium'


In [28]:
def auth():
    return os.getenv('TOKEN')

In [29]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [30]:
def create_url(keyword, start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)

In [31]:
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [44]:
#Inputs for the request
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "@hm (dress OR dresses OR shirt OR shirts OR pants OR skirt OR skirts OR clothes) lang:en -is:retweet"
start_time = "2022-08-06T00:00:00.000Z"
end_time = "2022-08-11T00:00:00.000Z"
max_results = 15

In [45]:
url = create_url(keyword, start_time,end_time, max_results)

In [46]:
json_response = connect_to_endpoint(url[0], headers, url[1])

Endpoint Response Code: 200


In [47]:
print(json.dumps(json_response, indent=4, sort_keys=True))

{
    "data": [
        {
            "author_id": "536143002",
            "conversation_id": "1557447648801693696",
            "created_at": "2022-08-10T19:23:50.000Z",
            "id": "1557447648801693696",
            "lang": "en",
            "public_metrics": {
                "like_count": 1,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 0
            },
            "reply_settings": "everyone",
            "source": "Twitter for iPhone",
            "text": "Guys @hm just charged me 20p for a small paper bag \ud83d\udc80\ud83d\udc80\ud83d\udc80 what I\u2019m supposed to do, buy clothes and carry em in my hands?"
        },
        {
            "author_id": "1473410868612378704",
            "conversation_id": "1557285552004677642",
            "created_at": "2022-08-10T08:39:43.000Z",
            "id": "1557285552004677642",
            "lang": "en",
            "public_metrics": {
                "like_count": 1,
     

In [48]:
# Create file
tweets = open("data.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(tweets)

#Create headers for the data you want to save, in this example, we only want save these columns in our dataset
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
tweets.close()

In [49]:
def append_to_csv(json_response, fileName):

    #A counter variable
    counter = 0

    #Open OR create the target CSV file
    tweets = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(tweets)

    #Loop through each tweet
    for tweet in json_response['data']:
        
        # We will create a variable for each since some of the keys might not exist for some tweets
        # So we will account for that

        # 1. Author ID
        author_id = tweet['author_id']

        # 2. Time created
        created_at = dateutil.parser.parse(tweet['created_at'])

        # 3. Geolocation
        if ('geo' in tweet):   
            geo = tweet['geo']['place_id']
        else:
            geo = " "

        # 4. Tweet ID
        tweet_id = tweet['id']

        # 5. Language
        lang = tweet['lang']

        # 6. Tweet metrics
        retweet_count = tweet['public_metrics']['retweet_count']
        reply_count = tweet['public_metrics']['reply_count']
        like_count = tweet['public_metrics']['like_count']
        quote_count = tweet['public_metrics']['quote_count']

        # 7. source
        source = tweet['source']

        # 8. Tweet text
        text = tweet['text']
        
        # Assemble all data in a list
        res = [author_id, created_at, geo, tweet_id, lang, like_count, quote_count, reply_count, retweet_count, source, text]
        
        # Append the result to the CSV file
        csvWriter.writerow(res)
        counter += 1

    # When done, close the CSV file
    tweets.close()

    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter) 


In [50]:
append_to_csv(json_response, "tweets.csv")

# of Tweets added from this response:  15


In [51]:
json_response['data']

[{'created_at': '2022-08-10T19:23:50.000Z',
  'conversation_id': '1557447648801693696',
  'source': 'Twitter for iPhone',
  'public_metrics': {'retweet_count': 0,
   'reply_count': 0,
   'like_count': 1,
   'quote_count': 0},
  'lang': 'en',
  'author_id': '536143002',
  'text': 'Guys @hm just charged me 20p for a small paper bag 💀💀💀 what I’m supposed to do, buy clothes and carry em in my hands?',
  'id': '1557447648801693696',
  'reply_settings': 'everyone'},
 {'created_at': '2022-08-10T08:39:43.000Z',
  'conversation_id': '1557285552004677642',
  'source': 'Twitter for iPhone',
  'public_metrics': {'retweet_count': 0,
   'reply_count': 1,
   'like_count': 1,
   'quote_count': 0},
  'lang': 'en',
  'author_id': '1473410868612378704',
  'text': 'Where there is no struggle, there is absolutely no progress \n.\n.\nShirt - @hm \nShorts - @hm \nShades - @hm \nWatch - @michaelkors \nShoes - @nike \n.\n.\n#iwillbegreat #motivated #dedicated #struggle #progress #H&amp;M #michaelkors #nike htt

In [52]:
df = pd.read_csv('tweets.csv')

In [53]:
pd.set_option('display.max_colwidth', None)

In [56]:
df[-35:]

Unnamed: 0,849670936924172289,2022-08-08 23:59:43+00:00,Unnamed: 3,1556792300998639618,en,1,0,0.1,0.2,Twitter for iPhone,same! she dresses like rich me would 😭😍 https://t.co/adQU7hKk1w
86,25786908,2022-08-10 21:18:03+00:00,,1557476394418028545,en,7,0,2,2,Twitter Web App,Please Join my free only fans account for exclusive content each week. Thank you. 💋❤️\nJillian Jordan Xo\n\nkittycash7 on only fans 💕\n❣️🍒🌸🎵\n\n@ZARA Satin Dress 👗\n@SuicideSquadWB @suicidesquadRS\n@HarleyQuinnAC ❣️❤️‍🩹\n#onlyfans #Legs #Feet #Tootsies 👣\n\nhttps://t.co/R7R34fVoy1 https://t.co/m70FcWzlRb
87,1033953826192216064,2022-08-10 18:32:59+00:00,1d9a5370a355ab0c,1557434855620853766,en,0,0,1,0,Twitter for iPhone,My favorite work pants are too big on me now I’m going to cry I don’t have new work pant money @ZARA send me a new pair for free
88,462883774,2022-08-10 15:24:32+00:00,,1557387428574498816,en,0,0,0,0,Twitter for iPhone,Keep making your clothes in Turkey @benetton @ZARA @skims etc https://t.co/zQKlwdj2q0
89,412500781,2022-08-10 06:33:12+00:00,,1557253713089150981,en,3,0,1,0,Twitter for Android,lol @ZARA it's the same dress https://t.co/9bwmNhPCRL
90,196630146,2022-08-09 17:24:47+00:00,,1557055304663179267,en,0,0,1,0,Twitter for iPhone,@ZARA help me out bought this khaki shirt and the same one in like a cream colour after one wash the colour has came proper out. Bad batch ? Do we know ? https://t.co/hCYXe4JWV6
91,2298364800,2022-08-09 12:14:31+00:00,29732b67b6d40135,1556977222602440706,en,0,0,0,0,Instagram,Jus Kewl 🐼\n#shilpajoshi \n\n#shirt #whiteshirt @zara \n#denimjeans @zara \n#bag @dior \n#glasses @rayban \n#shoes #vansonshoes \n\nMahadev 💫 @ The Sky Patio Faridabad https://t.co/Cv3fDXN416
92,4510623076,2022-08-09 11:11:00+00:00,,1556961236687982594,en,0,0,0,0,Twitter for iPhone,@TheSocialCTV @cinqaseptnyc @MaisonValentino @ted_baker @ZARA @ALDO_Shoes Loved the green detailing on Jess’s dress.
93,1459121136168579073,2022-08-09 08:31:14+00:00,,1556921030018482176,en,0,0,0,0,Twitter for Android,"@ZARA just wondered if you had any nice fairy dresses in? Do you post to Darlington?\n\nHahaha win for the little people 1, win for the for greedy billionaires 0. BOOOOM x"
94,1156502233644785665,2022-08-09 06:25:07+00:00,,1556889290369548290,en,19,0,2,1,Twitter Web App,Harry for @ZARA 🙌🏻 Awesome inclusion-having a seamstress on set altering Harry’s clothes so he could ‘be himself with ease’ and feel confident 💙Thank you @ZARA 😭 #InclusionMatters #HisHeartSettled @ZebedeeMan @luckyfinproject @ReachCharity https://t.co/nqBfyiYgJV
95,1245158946,2022-08-08 20:00:00+00:00,,1556731976731840518,en,13,0,2,1,Twitter Web App,New week. Let's get it!\n\nMel \nDress: @cinqaseptnyc\n\nLainey \nDress: Moon River\nShoes: @MaisonValentino\n\nCynthia \nDress: @ted_baker\nShoes: @ZARA\n\nJess\nDress: MARELLA\nShoes: @ALDO_Shoes\n\n#WhatWeWore #OOTD https://t.co/yPo1ZD5Cd0


In [None]:
#explored 15 tweets at ZARA, Everlane, Madewell, and Aerie, all clothing brands that are targeted towards women...
#tweets are mess/not high quality

In [62]:
#tried refining tweets by adding additional paramenters to query, such as asking for different clothing types; not bad
#results with ZARA and H&M

In [57]:
#creating test set of tweets to run though my model

In [58]:
df2= df[-35:]

In [59]:
df2

Unnamed: 0,849670936924172289,2022-08-08 23:59:43+00:00,Unnamed: 3,1556792300998639618,en,1,0,0.1,0.2,Twitter for iPhone,same! she dresses like rich me would 😭😍 https://t.co/adQU7hKk1w
86,25786908,2022-08-10 21:18:03+00:00,,1557476394418028545,en,7,0,2,2,Twitter Web App,Please Join my free only fans account for exclusive content each week. Thank you. 💋❤️\nJillian Jordan Xo\n\nkittycash7 on only fans 💕\n❣️🍒🌸🎵\n\n@ZARA Satin Dress 👗\n@SuicideSquadWB @suicidesquadRS\n@HarleyQuinnAC ❣️❤️‍🩹\n#onlyfans #Legs #Feet #Tootsies 👣\n\nhttps://t.co/R7R34fVoy1 https://t.co/m70FcWzlRb
87,1033953826192216064,2022-08-10 18:32:59+00:00,1d9a5370a355ab0c,1557434855620853766,en,0,0,1,0,Twitter for iPhone,My favorite work pants are too big on me now I’m going to cry I don’t have new work pant money @ZARA send me a new pair for free
88,462883774,2022-08-10 15:24:32+00:00,,1557387428574498816,en,0,0,0,0,Twitter for iPhone,Keep making your clothes in Turkey @benetton @ZARA @skims etc https://t.co/zQKlwdj2q0
89,412500781,2022-08-10 06:33:12+00:00,,1557253713089150981,en,3,0,1,0,Twitter for Android,lol @ZARA it's the same dress https://t.co/9bwmNhPCRL
90,196630146,2022-08-09 17:24:47+00:00,,1557055304663179267,en,0,0,1,0,Twitter for iPhone,@ZARA help me out bought this khaki shirt and the same one in like a cream colour after one wash the colour has came proper out. Bad batch ? Do we know ? https://t.co/hCYXe4JWV6
91,2298364800,2022-08-09 12:14:31+00:00,29732b67b6d40135,1556977222602440706,en,0,0,0,0,Instagram,Jus Kewl 🐼\n#shilpajoshi \n\n#shirt #whiteshirt @zara \n#denimjeans @zara \n#bag @dior \n#glasses @rayban \n#shoes #vansonshoes \n\nMahadev 💫 @ The Sky Patio Faridabad https://t.co/Cv3fDXN416
92,4510623076,2022-08-09 11:11:00+00:00,,1556961236687982594,en,0,0,0,0,Twitter for iPhone,@TheSocialCTV @cinqaseptnyc @MaisonValentino @ted_baker @ZARA @ALDO_Shoes Loved the green detailing on Jess’s dress.
93,1459121136168579073,2022-08-09 08:31:14+00:00,,1556921030018482176,en,0,0,0,0,Twitter for Android,"@ZARA just wondered if you had any nice fairy dresses in? Do you post to Darlington?\n\nHahaha win for the little people 1, win for the for greedy billionaires 0. BOOOOM x"
94,1156502233644785665,2022-08-09 06:25:07+00:00,,1556889290369548290,en,19,0,2,1,Twitter Web App,Harry for @ZARA 🙌🏻 Awesome inclusion-having a seamstress on set altering Harry’s clothes so he could ‘be himself with ease’ and feel confident 💙Thank you @ZARA 😭 #InclusionMatters #HisHeartSettled @ZebedeeMan @luckyfinproject @ReachCharity https://t.co/nqBfyiYgJV
95,1245158946,2022-08-08 20:00:00+00:00,,1556731976731840518,en,13,0,2,1,Twitter Web App,New week. Let's get it!\n\nMel \nDress: @cinqaseptnyc\n\nLainey \nDress: Moon River\nShoes: @MaisonValentino\n\nCynthia \nDress: @ted_baker\nShoes: @ZARA\n\nJess\nDress: MARELLA\nShoes: @ALDO_Shoes\n\n#WhatWeWore #OOTD https://t.co/yPo1ZD5Cd0


In [60]:
# Rename columns
df2.columns = ['author_id', 'created_at', 'geo_location', 'tweet_id', 'lang', 'retweet_count', 'reply_count', 'like_count', 'quote_count', 'source', 'text']

In [61]:
df2

Unnamed: 0,author_id,created_at,geo_location,tweet_id,lang,retweet_count,reply_count,like_count,quote_count,source,text
86,25786908,2022-08-10 21:18:03+00:00,,1557476394418028545,en,7,0,2,2,Twitter Web App,Please Join my free only fans account for exclusive content each week. Thank you. 💋❤️\nJillian Jordan Xo\n\nkittycash7 on only fans 💕\n❣️🍒🌸🎵\n\n@ZARA Satin Dress 👗\n@SuicideSquadWB @suicidesquadRS\n@HarleyQuinnAC ❣️❤️‍🩹\n#onlyfans #Legs #Feet #Tootsies 👣\n\nhttps://t.co/R7R34fVoy1 https://t.co/m70FcWzlRb
87,1033953826192216064,2022-08-10 18:32:59+00:00,1d9a5370a355ab0c,1557434855620853766,en,0,0,1,0,Twitter for iPhone,My favorite work pants are too big on me now I’m going to cry I don’t have new work pant money @ZARA send me a new pair for free
88,462883774,2022-08-10 15:24:32+00:00,,1557387428574498816,en,0,0,0,0,Twitter for iPhone,Keep making your clothes in Turkey @benetton @ZARA @skims etc https://t.co/zQKlwdj2q0
89,412500781,2022-08-10 06:33:12+00:00,,1557253713089150981,en,3,0,1,0,Twitter for Android,lol @ZARA it's the same dress https://t.co/9bwmNhPCRL
90,196630146,2022-08-09 17:24:47+00:00,,1557055304663179267,en,0,0,1,0,Twitter for iPhone,@ZARA help me out bought this khaki shirt and the same one in like a cream colour after one wash the colour has came proper out. Bad batch ? Do we know ? https://t.co/hCYXe4JWV6
91,2298364800,2022-08-09 12:14:31+00:00,29732b67b6d40135,1556977222602440706,en,0,0,0,0,Instagram,Jus Kewl 🐼\n#shilpajoshi \n\n#shirt #whiteshirt @zara \n#denimjeans @zara \n#bag @dior \n#glasses @rayban \n#shoes #vansonshoes \n\nMahadev 💫 @ The Sky Patio Faridabad https://t.co/Cv3fDXN416
92,4510623076,2022-08-09 11:11:00+00:00,,1556961236687982594,en,0,0,0,0,Twitter for iPhone,@TheSocialCTV @cinqaseptnyc @MaisonValentino @ted_baker @ZARA @ALDO_Shoes Loved the green detailing on Jess’s dress.
93,1459121136168579073,2022-08-09 08:31:14+00:00,,1556921030018482176,en,0,0,0,0,Twitter for Android,"@ZARA just wondered if you had any nice fairy dresses in? Do you post to Darlington?\n\nHahaha win for the little people 1, win for the for greedy billionaires 0. BOOOOM x"
94,1156502233644785665,2022-08-09 06:25:07+00:00,,1556889290369548290,en,19,0,2,1,Twitter Web App,Harry for @ZARA 🙌🏻 Awesome inclusion-having a seamstress on set altering Harry’s clothes so he could ‘be himself with ease’ and feel confident 💙Thank you @ZARA 😭 #InclusionMatters #HisHeartSettled @ZebedeeMan @luckyfinproject @ReachCharity https://t.co/nqBfyiYgJV
95,1245158946,2022-08-08 20:00:00+00:00,,1556731976731840518,en,13,0,2,1,Twitter Web App,New week. Let's get it!\n\nMel \nDress: @cinqaseptnyc\n\nLainey \nDress: Moon River\nShoes: @MaisonValentino\n\nCynthia \nDress: @ted_baker\nShoes: @ZARA\n\nJess\nDress: MARELLA\nShoes: @ALDO_Shoes\n\n#WhatWeWore #OOTD https://t.co/yPo1ZD5Cd0


In [63]:
df2.to_csv('./data/sample_tweet_dataset.csv', index=False)