In [None]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [None]:
def auth():
    return ''

In [None]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [None]:
def create_url(keyword, start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)

In [None]:
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [None]:
def append_to_csv(json_response, fileName):

    #A counter variable
    counter = 0

    #Open OR create the target CSV file
    csvFile = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(csvFile)

    #Loop through each tweet
    for tweet in json_response['data']:
        
        # We will create a variable for each since some of the keys might not exist for some tweets
        # So we will account for that

        # 1. Author ID
        author_id = tweet['author_id']

        # 2. Time created
        created_at = dateutil.parser.parse(tweet['created_at'])

        # 3. Geolocation
        if ('geo' in tweet):   
            geo = tweet['geo']['place_id']
        else:
            geo = " "

        # 4. Tweet ID
        tweet_id = tweet['id']

        # 5. Language
        lang = tweet['lang']

        # 6. Tweet metrics
        retweet_count = tweet['public_metrics']['retweet_count']
        reply_count = tweet['public_metrics']['reply_count']
        like_count = tweet['public_metrics']['like_count']
        quote_count = tweet['public_metrics']['quote_count']

        # 7. source
        source = tweet['source']

        # 8. Tweet text
        text = tweet['text']
        
        # Assemble all data in a list
        res = [author_id, created_at, geo, tweet_id, lang, like_count, quote_count, reply_count, retweet_count, source, text]
        
        # Append the result to the CSV file
        csvWriter.writerow(res)
        counter += 1

    # When done, close the CSV file
    csvFile.close()

    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter)

In [None]:
#Total number of tweets we collected from the loop
total_tweets = 0

# Create file
csvFile = open("ek_tweets6.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)

#Create headers for the data you want to save, in this example, we only want save these columns in our dataset
#csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
# Inputs
count = 0 # Counting tweets per time period
max_count = 100 # Max tweets per time period
max_results = 100
flag = True
next_token = None
#Inputs for the request
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "to:EmiratesSupport lang:en"
start_time = "2022-11-17T02:00:00Z"
end_time = "2022-11-19T19:00:00Z"

# Check if flag is true
while flag:
    # Check if max_count reached
    if count >= max_count:
        break
    print("-------------------")
    print("Token: ", next_token)
    url = create_url(keyword, start_time,end_time, max_results)
    print(url)
    json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
    result_count = json_response['meta']['result_count']

    if 'next_token' in json_response['meta']:
        # Save the token to use for next call
        next_token = json_response['meta']['next_token']
        print("Next Token: ", next_token)
        if result_count is not None and result_count > 0 and next_token is not None:
            print("Start Date: ", start_time)
            append_to_csv(json_response, "ek_tweets6.csv")
            count += result_count
            total_tweets += result_count
            print("Total # of Tweets added: ", total_tweets)
            print("-------------------")
            time.sleep(5)                
    # If no next token exists
    else:
        if result_count is not None and result_count > 0:
            print("-------------------")
            print("Start Date: ", start_time)
            append_to_csv(json_response, "ek_tweets6.csv")
            count += result_count
            total_tweets += result_count
            print("Total # of Tweets added: ", total_tweets)
            print("-------------------")
            time.sleep(5)



-------------------
Token:  None
('https://api.twitter.com/2/tweets/search/recent', {'query': 'to:EmiratesSupport lang:en', 'start_time': '2022-11-17T02:00:00Z', 'end_time': '2022-11-19T19:00:00Z', 'max_results': 100, 'expansions': 'author_id,in_reply_to_user_id,geo.place_id', 'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source', 'user.fields': 'id,name,username,created_at,description,public_metrics,verified', 'place.fields': 'full_name,id,country,country_code,geo,name,place_type', 'next_token': {}})
Endpoint Response Code: 200
-------------------
Start Date:  2022-11-17T02:00:00Z
# of Tweets added from this response:  56
Total # of Tweets added:  56
-------------------
-------------------
Token:  None
('https://api.twitter.com/2/tweets/search/recent', {'query': 'to:EmiratesSupport lang:en', 'start_time': '2022-11-17T02:00:00Z', 'end_time': '2022-11-19T19:00:00Z', 'max_results': 100, 'expansio

In [None]:
df = pd.read_csv('ek_tweets6.csv')

In [None]:
df

Unnamed: 0,author id,created_at,geo,id,lang,like_count,quote_count,reply_count,retweet_count,source,tweet
0,1544151176,2022-10-12 11:17:35+00:00,,1.58E+18,en,0,0,0,0,Twitter for Android,@EmiratesSupport Thank you for your quick resp...
1,128183810,2022-10-12 11:14:33+00:00,,1.58E+18,en,0,0,0,1,Twitter for Android,RT @smartykapu: @EmiratesSupport We have alrea...
2,66006408,2022-10-12 10:56:58+00:00,,1.58E+18,en,0,0,0,0,Twitter for iPhone,@EmiratesSupport So being silver member where ...
3,304316064,2022-10-12 10:26:11+00:00,,1.58E+18,en,1,1,0,0,Twitter for Android,@EmiratesSupport @emirates have attempted book...
4,1.17E+18,2022-10-12 10:01:02+00:00,,1.58E+18,en,0,0,0,0,Twitter for Android,@EmiratesSupport did thanks
...,...,...,...,...,...,...,...,...,...,...,...
551,53265812,2022-10-23 06:57:31+00:00,,1584076534905655297,en,0,0,1,0,Twitter for iPhone,@EmiratesSupport unimpressed with business cla...
552,231900594,2022-10-23 05:04:39+00:00,,1584048133691973632,en,0,0,1,0,Twitter for iPhone,@EmiratesSupport @MbalulaFikile I have sent th...
553,1246743462990565377,2022-10-23 04:42:49+00:00,,1584042638499401728,en,1,0,1,0,Twitter for Android,@EmiratesSupport @Charlieweeks9 Hi. I have dm'...
554,85753326,2022-10-23 03:21:27+00:00,,1584022161982779392,en,0,0,0,0,Twitter for iPhone,@EmiratesSupport @EmiratesSupport @emirates @a...


In [None]:
_df = df.drop_duplicates()

In [None]:
_df.shape

(934, 11)

In [None]:
_df.to_csv('ek_tweets7.csv',index= False)

In [None]:
_df[_df['created_at'].str.contains("2022-10-19")]

Unnamed: 0,author id,created_at,geo,id,lang,like_count,quote_count,reply_count,retweet_count,source,tweet
597,1293530507309965320,2022-10-19 12:35:05+00:00,,1582711934431817733,en,0,0,0,0,Twitter for iPhone,@EmiratesSupport It’s like boomerang. You guys...
598,35179551,2022-10-19 11:39:17+00:00,,1582697894200496131,en,0,0,1,0,Twitter for iPhone,@EmiratesSupport Been days since I did as you ...
599,1222267527063121922,2022-10-19 11:38:47+00:00,,1582697768899923968,en,0,0,0,0,Twitter for Android,@EmiratesSupport @NatashasLegacy Appreciate ca...
600,2935263391,2022-10-19 10:56:28+00:00,,1582687120015077376,en,0,0,1,0,Twitter for Android,@EmiratesSupport Hello i am traveling from spa...
601,885003445765378048,2022-10-19 05:08:13+00:00,,1582599476447178752,en,0,0,1,0,Twitter for Android,@EmiratesSupport Hello. Good morning .Next mo...
602,261261302,2022-10-19 04:34:30+00:00,,1582590991034613760,en,0,0,1,0,Twitter Web App,"@EmiratesSupport Hi team, I have sent you the ..."
603,1556824246579609603,2022-10-19 03:53:37+00:00,,1582580702360797185,en,0,0,1,0,Twitter for iPhone,"@EmiratesSupport No, I do not need help with a..."
604,1283966208,2022-10-19 02:33:20+00:00,,1582560499539267585,en,0,0,1,0,Twitter for iPhone,@EmiratesSupport can i fly to Dubai from ghan...
605,65670150,2022-10-19 02:31:43+00:00,,1582560094126211072,en,0,0,1,0,Twitter for iPhone,"@EmiratesSupport hi emirates, please kindly re..."
606,1328264822350176256,2022-10-19 02:02:32+00:00,,1582552750063173633,en,0,0,1,0,Twitter Web App,@EmiratesSupport are your check-in staff updat...


In [None]:
df.drop_duplicates()['tweet']

0      @EmiratesSupport Thank you for your quick resp...
1      RT @smartykapu: @EmiratesSupport We have alrea...
2      @EmiratesSupport So being silver member where ...
3      @EmiratesSupport @emirates have attempted book...
4                            @EmiratesSupport did thanks
                             ...                        
463    @EmiratesSupport I'd like to talk about my dis...
464    @EmiratesSupport @AWPatel Do you ever resolve ...
465    @EmiratesSupport My baggage is lost from 1 mon...
466    @EmiratesSupport Check DMs please @EmiratesSup...
467    @EmiratesSupport Hi I am unable to get confirm...
Name: tweet, Length: 238, dtype: object

In [None]:
token='AAAAAAAAAAAAAAAAAAAAAGlSiAEAAAAAlcvjtM%2BTv7wmNxmxCEyjqiuM0EA%3DmDEUPgzu3bKQguhLuhN8DrO6NcFNh5LZGDFD50CE8VoRgFhJiI'

In [None]:
url='https://api.twitter.com/2/tweets/search/recent?query=to:EmiratesSupport&max_results=100&start_time=2022-10-12T00:00:00Z&end_time=2022-10-12T11:25:00Z'

In [None]:
headers = {
      'Content-Type': 'application/json',
      'Authorization': 'Bearer ' + token
    }

In [None]:
response = requests.get(url, headers=headers)

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [None]:
json_data = json.loads(response.content)

In [None]:
tweets = [i['text'] for i in json_data['data']]

98

In [None]:
pagination_token = json_data['meta']['next_token']

'b26v89c19zqg8o3fpzbn1g6ns3cxbn2zgz3rlqctkxf25'

In [None]:
https://api.twitter.com/2/tweets/search/recent?query=to:EmiratesSupport&max_results=100&start_time=2022-10-06T00:00:00Z&end_time=2022-10-12T11:25:00Z&pagination_token=b26v89c19zqg8o3fpzbn1g6ns3cxbn2zgz3rlqctkxf25

In [None]:
x = requests.get('https://w3schools.com')