https://towardsdatascience.com/an-extensive-guide-to-collecting-tweets-from-twitter-api-v2-for-academic-research-using-python-3-518fcb71df2a

In [2]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [3]:
os.environ['TOKEN'] = 'AAAAAAAAAAAAAAAAAAAAAKVLfwEAAAAA9U65FTL2ZrQgx1u1kgA94uCDzt8%3D9g59OKwN90CZ3BeWlqsMQfoFkLzUWhk7oYfz5917X90ygMxium'


In [4]:
def auth():
    return os.getenv('TOKEN')

In [5]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [6]:
def create_url(keyword, start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)

In [7]:
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [8]:
#Inputs for the request
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "@hm (dress OR dresses OR shirt OR shirts OR pants OR skirt OR skirts OR clothes) lang:en -is:retweet"
start_time = "2022-08-12T00:00:00.000Z"
end_time = "2022-08-16T00:00:00.000Z"
max_results = 10

In [9]:
url = create_url(keyword, start_time,end_time, max_results)

In [10]:
json_response = connect_to_endpoint(url[0], headers, url[1])

Endpoint Response Code: 200


In [11]:
print(json.dumps(json_response, indent=4, sort_keys=True))

{
    "data": [
        {
            "author_id": "1525515243912298499",
            "conversation_id": "1559284907788083204",
            "created_at": "2022-08-15T21:04:26.000Z",
            "id": "1559284907788083204",
            "lang": "en",
            "public_metrics": {
                "like_count": 4,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 1
            },
            "reply_settings": "everyone",
            "source": "Twitter for iPhone",
            "text": "Corinna Brown - Short twill skirt in black/white checked by @hm - https://t.co/sDY2CJMMWV https://t.co/ZDirTLevCR"
        },
        {
            "author_id": "912295575697203201",
            "conversation_id": "1559270690381086721",
            "created_at": "2022-08-15T20:07:57.000Z",
            "id": "1559270690381086721",
            "in_reply_to_user_id": "14399483",
            "lang": "en",
            "public_metrics": {
                "like_co

In [13]:
json_response.keys()

dict_keys(['data', 'includes', 'meta'])

In [15]:
json_response['data']

[{'id': '1559284907788083204',
  'created_at': '2022-08-15T21:04:26.000Z',
  'author_id': '1525515243912298499',
  'text': 'Corinna Brown - Short twill skirt in black/white checked by @hm - https://t.co/sDY2CJMMWV https://t.co/ZDirTLevCR',
  'public_metrics': {'retweet_count': 1,
   'reply_count': 0,
   'like_count': 4,
   'quote_count': 0},
  'reply_settings': 'everyone',
  'lang': 'en',
  'source': 'Twitter for iPhone',
  'conversation_id': '1559284907788083204'},
 {'id': '1559270690381086721',
  'in_reply_to_user_id': '14399483',
  'created_at': '2022-08-15T20:07:57.000Z',
  'author_id': '912295575697203201',
  'text': '@hm when will the pants shown here be available online? https://t.co/pxlPkhUcKr',
  'public_metrics': {'retweet_count': 0,
   'reply_count': 0,
   'like_count': 0,
   'quote_count': 0},
  'reply_settings': 'everyone',
  'lang': 'en',
  'source': 'Twitter for iPhone',
  'conversation_id': '1559270690381086721'},
 {'id': '1559246999165206530',
  'created_at': '2022-08-

In [16]:
json_response['includes']

{'users': [{'id': '1525515243912298499',
   'name': 'fashion of heartstopper',
   'created_at': '2022-05-14T16:36:30.000Z',
   'public_metrics': {'followers_count': 179,
    'following_count': 19,
    'tweet_count': 144,
    'listed_count': 1},
   'username': 'fashionofhs',
   'description': '#1 source for all heartstopper cast and character clothing needs 👕👖👗👟',
   'verified': False},
  {'id': '912295575697203201',
   'name': 'Abigail',
   'created_at': '2017-09-25T12:39:49.000Z',
   'public_metrics': {'followers_count': 4,
    'following_count': 6,
    'tweet_count': 263,
    'listed_count': 0},
   'username': 'Abigail91127920',
   'description': 'Student & Muma ✨',
   'verified': False},
  {'id': '14399483',
   'name': 'H&M',
   'created_at': '2008-04-15T18:34:00.000Z',
   'public_metrics': {'followers_count': 8011196,
    'following_count': 327,
    'tweet_count': 14001,
    'listed_count': 10192},
   'username': 'hm',
   'description': 'Welcome to our #HM world – we’re all about f

In [17]:
json_response['meta']

{'newest_id': '1559284907788083204',
 'oldest_id': '1558647641663279108',
 'result_count': 10,
 'next_token': 'b26v89c19zqg8o3fpz5nho3o2sarmyw82839y6zf73ji5'}

In [18]:
type(json_response['data'])

list

In [19]:
type(json_response['data'][0])

dict

In [20]:
df = pd.DataFrame(json_response['data'])

In [21]:
df

Unnamed: 0,id,created_at,author_id,text,public_metrics,reply_settings,lang,source,conversation_id,in_reply_to_user_id,referenced_tweets,geo
0,1559284907788083204,2022-08-15T21:04:26.000Z,1525515243912298499,Corinna Brown - Short twill skirt in black/whi...,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",everyone,en,Twitter for iPhone,1559284907788083204,,,
1,1559270690381086721,2022-08-15T20:07:57.000Z,912295575697203201,@hm when will the pants shown here be availabl...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter for iPhone,1559270690381086721,14399483.0,,
2,1559246999165206530,2022-08-15T18:33:48.000Z,1549124854812639232,#outfitoftheday @Macys jewelry @Coach handbag ...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter for Android,1559246999165206530,,,
3,1559185096284016641,2022-08-15T14:27:49.000Z,1443595240137449472,Hot tip: @hm has a Garment Collection program ...,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",everyone,en,Twitter Web App,1559185096284016641,,,
4,1559123491324309504,2022-08-15T10:23:02.000Z,2283241081,My sister who loves @hm shops every month from...,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",everyone,en,Twitter for iPhone,1559122718385385472,2283241081.0,"[{'type': 'replied_to', 'id': '155912271838538...",
5,1558984167085355008,2022-08-15T01:09:24.000Z,2426396108,3/ We gladly accept hand-me-ups from family &a...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter Web App,1558977104275918848,2426396108.0,"[{'type': 'replied_to', 'id': '155898135753084...",
6,1558872612540907520,2022-08-14T17:46:07.000Z,87229885,"Here for linen, fresh whites and sandals 🕊\n\n...","{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Instagram,1558872612540907520,,,"{'place_id': '315b740b108481f6', 'coordinates'..."
7,1558830828259385344,2022-08-14T15:00:05.000Z,419663786,"Life's a party, so dress for it!\n\nShirt, pan...","{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Emplifi,1558830828259385344,,,
8,1558765315416174596,2022-08-14T10:39:46.000Z,1557223542625669121,Won the contest of the prettiest chick wearing...,"{'retweet_count': 5, 'reply_count': 1, 'like_c...",everyone,en,Twitter for Android,1558765315416174596,,,
9,1558647641663279108,2022-08-14T02:52:10.000Z,321152335,@hm your baby clothes are going to be the reas...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter for iPhone,1558647641663279108,14399483.0,,


In [22]:
#next request

In [26]:
#Inputs for the request
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "@zara (dress OR dresses OR shirt OR shirts OR pants OR skirt OR skirts OR clothes) lang:en -is:retweet"
start_time = "2022-08-12T00:00:00.000Z"
end_time = "2022-08-16T00:00:00.000Z"
max_results = 10

In [27]:
url = create_url(keyword, start_time,end_time, max_results)

In [28]:
json_response = connect_to_endpoint(url[0], headers, url[1])

Endpoint Response Code: 200


In [29]:
json_response['data']

[{'public_metrics': {'retweet_count': 0,
   'reply_count': 1,
   'like_count': 0,
   'quote_count': 0},
  'lang': 'en',
  'author_id': '243868052',
  'in_reply_to_user_id': '346742249',
  'created_at': '2022-08-15T23:31:49.000Z',
  'conversation_id': '1559321996667895809',
  'text': '@ZARA I ordered the 1st dress (only offered in paisley) and received the 2nd. Customer support refused to waive the return fee for a dress I did not order and told me that I “received the correct dress.” 🤯 Accusing your customers of lying isn’t a good look - please fix this. https://t.co/sJjM4PxuwG',
  'source': 'Twitter for iPhone',
  'id': '1559321996667895809',
  'reply_settings': 'everyone'},
 {'public_metrics': {'retweet_count': 0,
   'reply_count': 0,
   'like_count': 0,
   'quote_count': 0},
  'lang': 'en',
  'author_id': '18142537',
  'referenced_tweets': [{'type': 'replied_to', 'id': '1559315661565730817'}],
  'in_reply_to_user_id': '18142537',
  'created_at': '2022-08-15T23:10:42.000Z',
  'conver

In [30]:
df2 = pd.DataFrame(json_response['data'])

In [31]:
df2

Unnamed: 0,public_metrics,lang,author_id,in_reply_to_user_id,created_at,conversation_id,text,source,id,reply_settings,referenced_tweets
0,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",en,243868052,346742249.0,2022-08-15T23:31:49.000Z,1559321996667895809,@ZARA I ordered the 1st dress (only offered in...,Twitter for iPhone,1559321996667895809,everyone,
1,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",en,18142537,18142537.0,2022-08-15T23:10:42.000Z,1559315661565730817,.@Zara. The second thing that maybe you should...,Twitter Web App,1559316683453448192,everyone,"[{'type': 'replied_to', 'id': '155931566156573..."
2,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",en,1245158946,,2022-08-15T20:00:01.000Z,1559268696824193024,This is us kicking off the last week of Season...,Twitter Web App,1559268696824193024,everyone,
3,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",en,117858927,346742249.0,2022-08-15T18:22:32.000Z,1559244161659932672,@ZARA ilsym for my kids clothes but pls pls pl...,Twitter for iPhone,1559244161659932672,everyone,
4,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",en,1482476401747976194,1.482476401747976e+18,2022-08-15T16:46:17.000Z,1559219376108306432,1/\nWeb3 Fashion by day 💃🏻 and NFT Fashion by...,Twitter Web App,1559219940368027652,everyone,"[{'type': 'replied_to', 'id': '155921937610830..."
5,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",en,248489436,346742249.0,2022-08-15T16:35:15.000Z,1559217165387046914,@ZARA I deserve a discount code or something. ...,Twitter for iPhone,1559217165387046914,everyone,
6,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",en,248489436,,2022-08-15T16:33:24.000Z,1559216697743216641,Ordered a dress for my vacation from @zara … t...,Twitter for iPhone,1559216697743216641,everyone,
7,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",en,1502689995991269385,,2022-08-15T16:31:04.000Z,1559216111031304192,Look at shirt in @ZARA when @goodmindsnft TV’s...,Twitter for iPhone,1559216111031304192,everyone,
8,"{'retweet_count': 0, 'reply_count': 2, 'like_c...",en,21354066,346742249.0,2022-08-15T13:24:24.000Z,1559169135317319681,@ZARA shirt arriving creased to hell is one th...,Twitter for iPhone,1559169135317319681,everyone,
9,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",en,847306909266788355,,2022-08-15T06:17:55.000Z,1559061807125262337,"Let your spirit Fly with Peace, Happiness and ...",Twitter for Android,1559061807125262337,everyone,


In [33]:
#combining dataframes

In [32]:
df = df.append(df2,ignore_index=True,sort=False)


In [34]:
df

Unnamed: 0,id,created_at,author_id,text,public_metrics,reply_settings,lang,source,conversation_id,in_reply_to_user_id,referenced_tweets,geo
0,1559284907788083204,2022-08-15T21:04:26.000Z,1525515243912298499,Corinna Brown - Short twill skirt in black/whi...,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",everyone,en,Twitter for iPhone,1559284907788083204,,,
1,1559270690381086721,2022-08-15T20:07:57.000Z,912295575697203201,@hm when will the pants shown here be availabl...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter for iPhone,1559270690381086721,14399483.0,,
2,1559246999165206530,2022-08-15T18:33:48.000Z,1549124854812639232,#outfitoftheday @Macys jewelry @Coach handbag ...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter for Android,1559246999165206530,,,
3,1559185096284016641,2022-08-15T14:27:49.000Z,1443595240137449472,Hot tip: @hm has a Garment Collection program ...,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",everyone,en,Twitter Web App,1559185096284016641,,,
4,1559123491324309504,2022-08-15T10:23:02.000Z,2283241081,My sister who loves @hm shops every month from...,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",everyone,en,Twitter for iPhone,1559122718385385472,2283241081.0,"[{'type': 'replied_to', 'id': '155912271838538...",
5,1558984167085355008,2022-08-15T01:09:24.000Z,2426396108,3/ We gladly accept hand-me-ups from family &a...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter Web App,1558977104275918848,2426396108.0,"[{'type': 'replied_to', 'id': '155898135753084...",
6,1558872612540907520,2022-08-14T17:46:07.000Z,87229885,"Here for linen, fresh whites and sandals 🕊\n\n...","{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Instagram,1558872612540907520,,,"{'place_id': '315b740b108481f6', 'coordinates'..."
7,1558830828259385344,2022-08-14T15:00:05.000Z,419663786,"Life's a party, so dress for it!\n\nShirt, pan...","{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Emplifi,1558830828259385344,,,
8,1558765315416174596,2022-08-14T10:39:46.000Z,1557223542625669121,Won the contest of the prettiest chick wearing...,"{'retweet_count': 5, 'reply_count': 1, 'like_c...",everyone,en,Twitter for Android,1558765315416174596,,,
9,1558647641663279108,2022-08-14T02:52:10.000Z,321152335,@hm your baby clothes are going to be the reas...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",everyone,en,Twitter for iPhone,1558647641663279108,14399483.0,,


In [35]:
df.to_csv('../data/tweet_dataset.csv', index=False)