In [1]:
import configparser

# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [2]:
# setting token variable
config = configparser.RawConfigParser()
config.read('config.ini')
os.environ['TOKEN'] = config['twitter']['bearer_token']


# function to retrieve the token
def auth():
    return os.getenv('TOKEN')


# function to create headers
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers


# function to create URL
def create_url(keywords, max_results = 100):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keywords,
                    #'start_time': start_date,
                    #'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,context_annotations,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source,possibly_sensitive,entities,attachments',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)


# function to connect to endpoint
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))

    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
        
    return response.json()


##### EXPLORING THE DATA WITH A 1ST QUERY

In [3]:
# FIRST QUERY

# setting the keywords as a variable
keywords = '''terror OR alarm OR panic OR unease OR scare OR afraid of OR careful about \ lang:en'''

# inputs for the request
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = keywords
max_results = 100

# getting and printing the response from the API
url = create_url(keyword, max_results)
print(url)
json_response = connect_to_endpoint(url[0], headers, url[1])
print(json.dumps(json_response, indent=4, sort_keys=True))

# getting the next token
next_token = json_response['meta']['next_token']

('https://api.twitter.com/2/tweets/search/recent', {'query': 'terror OR alarm OR panic OR unease OR scare OR afraid of OR careful about \\ lang:en', 'max_results': 100, 'expansions': 'author_id,in_reply_to_user_id,geo.place_id', 'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,context_annotations,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source,possibly_sensitive,entities,attachments', 'user.fields': 'id,name,username,created_at,description,public_metrics,verified', 'place.fields': 'full_name,id,country,country_code,geo,name,place_type', 'next_token': {}})
Endpoint Response Code: 200
{
    "data": [
        {
            "attachments": {
                "media_keys": [
                    "3_1563562866556276736"
                ]
            },
            "author_id": "1555323426784456704",
            "context_annotations": [
                {
                    "domain": {
                        "description": "Named people in the wo

In [11]:
# first tweet
json_response['data'][0]

# useful to keep: author_id, id, text, created_at, context_annotations, public_metrics?, conversation_id?

{'text': '@RonFilipkowski @joncoopertweets This is Domestic Terror: Donald Trump, the biggest CON MAN in American History. His people bought it hook, line, and sinker. https://t.co/8zHzE3aJ0u',
 'conversation_id': '1562459481031581697',
 'lang': 'en',
 'entities': {'annotations': [{'start': 58,
    'end': 69,
    'probability': 0.9914,
    'type': 'Person',
    'normalized_text': 'Donald Trump'}],
  'mentions': [{'start': 0,
    'end': 15,
    'username': 'RonFilipkowski',
    'id': '1298372735383605249'},
   {'start': 16, 'end': 32, 'username': 'joncoopertweets', 'id': '27493883'}],
  'urls': [{'start': 158,
    'end': 181,
    'url': 'https://t.co/8zHzE3aJ0u',
    'expanded_url': 'https://twitter.com/TTj76447322/status/1563562921644343296/photo/1',
    'display_url': 'pic.twitter.com/8zHzE3aJ0u',
    'media_key': '3_1563562866556276736'}]},
 'context_annotations': [{'domain': {'id': '10',
    'name': 'Person',
    'description': 'Named people in the world like Nelson Mandela'},
   'e

In [12]:
json_response.keys()

dict_keys(['data', 'includes', 'meta'])

In [13]:
json_response['includes']['users'][0]

{'name': 'T J',
 'verified': False,
 'created_at': '2022-08-04T22:43:19.000Z',
 'id': '1555323426784456704',
 'description': 'Arizona Hardworking Individual',
 'public_metrics': {'followers_count': 5,
  'following_count': 40,
  'tweet_count': 387,
  'listed_count': 0},
 'username': 'TTj76447322'}

In [10]:
json_response['meta']

{'newest_id': '1563562921644343296',
 'oldest_id': '1563562790719143938',
 'result_count': 100,
 'next_token': 'b26v89c19zqg8o3fpz8l5pz0ro12g5kk7x9snhgikcg3h'}

In [14]:
json_response['meta']['next_token']

'b26v89c19zqg8o3fpz8l5pz0ro12g5kk7x9snhgikcg3h'

In [15]:
json_response['data'][0].keys()

dict_keys(['text', 'conversation_id', 'lang', 'entities', 'context_annotations', 'attachments', 'referenced_tweets', 'source', 'id', 'public_metrics', 'possibly_sensitive', 'in_reply_to_user_id', 'author_id', 'created_at', 'reply_settings'])

In [16]:
json_response['data'][0]['public_metrics'].keys()

dict_keys(['retweet_count', 'reply_count', 'like_count', 'quote_count'])

In [17]:
json_response["data"][0]["context_annotations"]

[{'domain': {'id': '10',
   'name': 'Person',
   'description': 'Named people in the world like Nelson Mandela'},
  'entity': {'id': '799022225751871488',
   'name': 'Donald Trump',
   'description': '45th US President, Donald Trump'}},
 {'domain': {'id': '10',
   'name': 'Person',
   'description': 'Named people in the world like Nelson Mandela'},
  'entity': {'id': '1138106947234779137', 'name': 'Jon Cooper'}},
 {'domain': {'id': '35',
   'name': 'Politician',
   'description': 'Politicians in the world, like Joe Biden'},
  'entity': {'id': '799022225751871488',
   'name': 'Donald Trump',
   'description': '45th US President, Donald Trump'}},
 {'domain': {'id': '35',
   'name': 'Politician',
   'description': 'Politicians in the world, like Joe Biden'},
  'entity': {'id': '1138106947234779137', 'name': 'Jon Cooper'}},
 {'domain': {'id': '131',
   'name': 'Unified Twitter Taxonomy',
   'description': 'A taxonomy view into the Semantic Core knowledge graph'},
  'entity': {'id': '799022

######## creating dataframe

column_names = [col for col in json_response['data'][0].keys() if col != 'public_metrics'] + [col for col in json_response['data'][0]['public_metrics'].keys()]
print(column_names)

data = pd.DataFrame(columns=column_names)
data.head()

In [9]:
# creating dataframe
data_1 = pd.DataFrame()

raw_data = json_response['data']

for tweet in raw_data:
    data_1 = data_1.append(tweet, ignore_index=True)

#data.drop(['public_metrics'], axis=1, inplace=True)
print(data.shape)
data_1.head()

(100, 16)


Unnamed: 0,text,conversation_id,lang,entities,context_annotations,attachments,referenced_tweets,source,id,public_metrics,possibly_sensitive,in_reply_to_user_id,author_id,created_at,reply_settings,geo
0,@RonFilipkowski @joncoopertweets This is Domes...,1562459481031581697,en,"{'annotations': [{'start': 58, 'end': 69, 'pro...","[{'domain': {'id': '10', 'name': 'Person', 'de...",{'media_keys': ['3_1563562866556276736']},"[{'type': 'replied_to', 'id': '156245948103158...",Twitter Web App,1563562921644343296,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",0.0,1.2983727353836052e+18,1555323426784456704,2022-08-27T16:23:44.000Z,everyone,
1,RT @ConorGogarty: “The pain of Lily being take...,1563562919924678657,en,"{'annotations': [{'start': 31, 'end': 34, 'pro...",,,"[{'type': 'retweeted', 'id': '1563131038213505...",Twitter Web App,1563562919924678657,"{'retweet_count': 244, 'reply_count': 0, 'like...",0.0,,1102298694743924737,2022-08-27T16:23:44.000Z,everyone,
2,Habitual tax cheats are now in panic mode!!!\n...,1563562919207395331,en,"{'urls': [{'start': 49, 'end': 72, 'url': 'htt...","[{'domain': {'id': '10', 'name': 'Person', 'de...",,"[{'type': 'quoted', 'id': '1563544617555210240'}]",Twitter for iPhone,1563562919207395331,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",0.0,,961113187453165569,2022-08-27T16:23:44.000Z,everyone,
3,RT @calvinrobinson: This is a fight between go...,1563562917106434054,en,"{'mentions': [{'start': 3, 'end': 18, 'usernam...",,,"[{'type': 'retweeted', 'id': '1563226584445636...",Twitter for iPad,1563562917106434054,"{'retweet_count': 2072, 'reply_count': 0, 'lik...",0.0,,1475050585565057035,2022-08-27T16:23:43.000Z,everyone,
4,RT @GizAfg: @farid4035 @ABaerbock @NancyFaeser...,1563562916565368833,en,"{'hashtags': [{'start': 101, 'end': 109, 'tag'...",,,"[{'type': 'retweeted', 'id': '1563555321410617...",Twitter for Android,1563562916565368833,"{'retweet_count': 2, 'reply_count': 0, 'like_c...",0.0,,1474360256016924678,2022-08-27T16:23:43.000Z,everyone,


#### OBTAINING 10000 TWEETS

In [4]:
# function to get ~10000 tweets

def get_10k():
    
    c = 0

    jsons = []

    while c <= 100:

        if c == 0:
            next_token = None
        else:
            next_token = json_response['meta']['next_token']

        # setting the keywords as a variable
        keywords = '''terror OR alarm OR panic OR unease OR scare OR afraid of OR careful about \ lang:en'''

        # inputs for the request
        bearer_token = auth()
        headers = create_headers(bearer_token)
        keyword = keywords
        max_results = 100

        # getting and printing the response from the API
        url = create_url(keyword, max_results)
        
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token = next_token)
        
        jsons.append(json_response)
        
        c += 1

        time.sleep(5)

    return jsons 

In [5]:
# running function
ten_thousand = get_10k()

Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Code: 200
Endpoint Response Co

In [38]:
# creating dataframe for the 10 000 tweets
data = pd.DataFrame()

for i in range(0, len(ten_thousand)-1):
  raw_data_ = ten_thousand[i]['data']

  for tweet in raw_data_:
      data = data.append(tweet, ignore_index=True)

#data.drop(['public_metrics'], axis=1, inplace=True)
print(data.shape)
data.head()

(9985, 17)


Unnamed: 0,public_metrics,entities,lang,text,conversation_id,created_at,possibly_sensitive,author_id,reply_settings,context_annotations,referenced_tweets,source,id,in_reply_to_user_id,attachments,geo,withheld
0,"{'retweet_count': 4, 'reply_count': 0, 'like_c...","{'mentions': [{'start': 3, 'end': 14, 'usernam...",en,RT @MlkzyHywad: @GOPChairwoman The only cause ...,1563562988434780160,2022-08-27T16:24:00.000Z,0.0,1483439745900462091,everyone,"[{'domain': {'id': '10', 'name': 'Person', 'de...","[{'type': 'retweeted', 'id': '1563551195914186...",Twitter for Android,1563562988434780160,,,,
1,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",,en,The true duality of man is living day-to-day i...,1563562986916028419,2022-08-27T16:24:00.000Z,0.0,550210137,everyone,,,Twitter for iPhone,1563562986916028419,,,,
2,"{'retweet_count': 3, 'reply_count': 0, 'like_c...","{'mentions': [{'start': 3, 'end': 19, 'usernam...",en,RT @musclesnnursing: People keep asking when h...,1563562986765033473,2022-08-27T16:24:00.000Z,0.0,22447783,everyone,,"[{'type': 'retweeted', 'id': '1563329867612954...",Twitter for iPhone,1563562986765033473,,,,
3,"{'retweet_count': 444, 'reply_count': 0, 'like...","{'mentions': [{'start': 3, 'end': 16, 'usernam...",en,RT @Logically_JC: If you are afraid of sociali...,1563562986513453061,2022-08-27T16:24:00.000Z,0.0,1504505694837522432,everyone,,"[{'type': 'retweeted', 'id': '1563522431578361...",Twitter Web App,1563562986513453061,,,,
4,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",,en,"what're you doin' tonight, hey, boy?\nset my a...",1563562986182127618,2022-08-27T16:24:00.000Z,0.0,1531878468882923520,everyone,,,"Cheap Bots, Done Quick!",1563562986182127618,,,,


In [39]:
# unpacking the public metrics
df = data.copy()
df = df.public_metrics.dropna().apply(pd.Series)
print(df.shape)
df.head()

(9985, 4)


Unnamed: 0,retweet_count,reply_count,like_count,quote_count
0,4,0,0,0
1,0,0,0,0
2,3,0,0,0
3,444,0,0,0
4,0,0,0,0


In [40]:
# dropping the dict column 
data_ = data.drop('public_metrics', axis=1)
data_.head()

Unnamed: 0,entities,lang,text,conversation_id,created_at,possibly_sensitive,author_id,reply_settings,context_annotations,referenced_tweets,source,id,in_reply_to_user_id,attachments,geo,withheld
0,"{'mentions': [{'start': 3, 'end': 14, 'usernam...",en,RT @MlkzyHywad: @GOPChairwoman The only cause ...,1563562988434780160,2022-08-27T16:24:00.000Z,0.0,1483439745900462091,everyone,"[{'domain': {'id': '10', 'name': 'Person', 'de...","[{'type': 'retweeted', 'id': '1563551195914186...",Twitter for Android,1563562988434780160,,,,
1,,en,The true duality of man is living day-to-day i...,1563562986916028419,2022-08-27T16:24:00.000Z,0.0,550210137,everyone,,,Twitter for iPhone,1563562986916028419,,,,
2,"{'mentions': [{'start': 3, 'end': 19, 'usernam...",en,RT @musclesnnursing: People keep asking when h...,1563562986765033473,2022-08-27T16:24:00.000Z,0.0,22447783,everyone,,"[{'type': 'retweeted', 'id': '1563329867612954...",Twitter for iPhone,1563562986765033473,,,,
3,"{'mentions': [{'start': 3, 'end': 16, 'usernam...",en,RT @Logically_JC: If you are afraid of sociali...,1563562986513453061,2022-08-27T16:24:00.000Z,0.0,1504505694837522432,everyone,,"[{'type': 'retweeted', 'id': '1563522431578361...",Twitter Web App,1563562986513453061,,,,
4,,en,"what're you doin' tonight, hey, boy?\nset my a...",1563562986182127618,2022-08-27T16:24:00.000Z,0.0,1531878468882923520,everyone,,,"Cheap Bots, Done Quick!",1563562986182127618,,,,


In [41]:
# creating the final dataframe that will be used fot EDA and ML
final_df = pd.concat([data_, df], axis=1)
print(final_df.shape)
final_df.head()

(9985, 20)


Unnamed: 0,entities,lang,text,conversation_id,created_at,possibly_sensitive,author_id,reply_settings,context_annotations,referenced_tweets,source,id,in_reply_to_user_id,attachments,geo,withheld,retweet_count,reply_count,like_count,quote_count
0,"{'mentions': [{'start': 3, 'end': 14, 'usernam...",en,RT @MlkzyHywad: @GOPChairwoman The only cause ...,1563562988434780160,2022-08-27T16:24:00.000Z,0.0,1483439745900462091,everyone,"[{'domain': {'id': '10', 'name': 'Person', 'de...","[{'type': 'retweeted', 'id': '1563551195914186...",Twitter for Android,1563562988434780160,,,,,4,0,0,0
1,,en,The true duality of man is living day-to-day i...,1563562986916028419,2022-08-27T16:24:00.000Z,0.0,550210137,everyone,,,Twitter for iPhone,1563562986916028419,,,,,0,0,0,0
2,"{'mentions': [{'start': 3, 'end': 19, 'usernam...",en,RT @musclesnnursing: People keep asking when h...,1563562986765033473,2022-08-27T16:24:00.000Z,0.0,22447783,everyone,,"[{'type': 'retweeted', 'id': '1563329867612954...",Twitter for iPhone,1563562986765033473,,,,,3,0,0,0
3,"{'mentions': [{'start': 3, 'end': 16, 'usernam...",en,RT @Logically_JC: If you are afraid of sociali...,1563562986513453061,2022-08-27T16:24:00.000Z,0.0,1504505694837522432,everyone,,"[{'type': 'retweeted', 'id': '1563522431578361...",Twitter Web App,1563562986513453061,,,,,444,0,0,0
4,,en,"what're you doin' tonight, hey, boy?\nset my a...",1563562986182127618,2022-08-27T16:24:00.000Z,0.0,1531878468882923520,everyone,,,"Cheap Bots, Done Quick!",1563562986182127618,,,,,0,0,0,0


In [44]:
# saving the data in a csv file
final_df.to_csv('C:/Users/camil/Documents/Courses/Projects/NLP/twitter_fear_sentiment_analysis/final_df.csv', sep=",", header=True, index=False)    
