In [1]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
from dotenv import load_dotenv, find_dotenv
#For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time
import flair
import re

In [2]:
load_dotenv(find_dotenv())

True

In [3]:
def auth():
    return os.getenv('TOKEN')

In [4]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [5]:
def create_url(keyword,start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'tweet.fields': 'id,text,created_at',
                    'next_token': {}}
    return (search_url, query_params)

In [6]:
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [7]:
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "tesla lang:en"
start_time = "2022-08-24T00:00:00.000Z"
end_time = "2022-08-28T00:00:00.000Z"
max_results = 15

In [8]:
url = create_url(keyword, start_time,end_time, max_results)
json_response = connect_to_endpoint(url[0], headers, url[1])

Endpoint Response Code: 200


In [9]:
pd = pd.DataFrame(json_response['data'])

In [10]:
# Text before it's "cleaned"
pd

Unnamed: 0,created_at,id,text
0,2022-08-27T23:59:59.000Z,1563677741324681217,@krumping4 @legendfietss @agusnox @Bama_Sass @...
1,2022-08-27T23:59:58.000Z,1563677736195047424,@Bama_Sass @elonmusk @Tesla 1 ton death machines.
2,2022-08-27T23:59:56.000Z,1563677725717499906,"@elonmusk Elon, I want my Tesla car to be able..."
3,2022-08-27T23:59:50.000Z,1563677702418092032,RT @Bama_Sass: My grandfather’s #TeslaPlaid ca...
4,2022-08-27T23:59:49.000Z,1563677696231718912,RT @WholeMarsBlog: How is it that the writers ...
5,2022-08-27T23:59:49.000Z,1563677696084811778,@ValueAnalyst1 know full well that legacy auto...
6,2022-08-27T23:59:48.000Z,1563677694969032704,Would you use a smart security surveillance sy...
7,2022-08-27T23:59:45.000Z,1563677681568608257,RT @MikeCarlton01: “Nobody iss making electric...
8,2022-08-27T23:59:43.000Z,1563677673938980864,@agusnox @Bama_Sass @elonmusk @Tesla You cant ...
9,2022-08-27T23:59:42.000Z,1563677668582821888,as a man why do you have a tesla?


In [11]:
sentiment_model = flair.models.TextClassifier.load('en-sentiment')

2022-08-29 23:51:36,018 loading file C:\Users\amy34\.flair\models\sentiment-en-mix-distillbert_4.pt


In [12]:
def clean(tweet):
    whitespace = re.compile(r"\s+")
    web_address = re.compile(r"(?i)http(s):\/\/[a-z0-9.~_\-\/]+")
    tesla = re.compile(r"(?i)@Tesla(?=\b)")
    user = re.compile(r"(?i)@[a-z0-9_]+")

    # we then use the sub method to replace anything matching
    tweet = whitespace.sub(' ', tweet)
    tweet = web_address.sub('', tweet)
    tweet = tesla.sub('Tesla', tweet)
    tweet = user.sub('', tweet)
    return tweet
    

In [13]:
# we will append probability and sentiment preds later
probs = []
sentiments = []

# use regex expressions (in clean function) to clean tweets
pd['text'] = pd['text'].apply(clean)

for tweet in pd['text'].to_list():
    # make prediction
    sentence = flair.data.Sentence(tweet)
    sentiment_model.predict(sentence)
    # extract sentiment prediction
    probs.append(sentence.labels[0].score)  # numerical score 0-1
    sentiments.append(sentence.labels[0].value)  # 'POSITIVE' or 'NEGATIVE'

# add probability and sentiment predictions to tweets dataframe
pd['probability'] = probs
pd['sentiment'] = sentiments

In [14]:
# Text after it's "cleaned"
pd

Unnamed: 0,created_at,id,text,probability,sentiment
0,2022-08-27T23:59:59.000Z,1563677741324681217,Tesla Hi 👋 join chat and thank me later t...,0.93481,POSITIVE
1,2022-08-27T23:59:58.000Z,1563677736195047424,Tesla 1 ton death machines.,0.936145,NEGATIVE
2,2022-08-27T23:59:56.000Z,1563677725717499906,"Elon, I want my Tesla car to be able to trans...",0.965244,POSITIVE
3,2022-08-27T23:59:50.000Z,1563677702418092032,RT : My grandfather’s #TeslaPlaid caught fire....,0.999947,NEGATIVE
4,2022-08-27T23:59:49.000Z,1563677696231718912,RT : How is it that the writers of this show u...,0.999333,NEGATIVE
5,2022-08-27T23:59:49.000Z,1563677696084811778,know full well that legacy auto and most star...,0.640307,POSITIVE
6,2022-08-27T23:59:48.000Z,1563677694969032704,Would you use a smart security surveillance sy...,0.978994,POSITIVE
7,2022-08-27T23:59:45.000Z,1563677681568608257,"RT : “Nobody iss making electric Utess,” ssays...",0.924141,NEGATIVE
8,2022-08-27T23:59:43.000Z,1563677673938980864,Tesla You cant be serious my boy,0.844502,POSITIVE
9,2022-08-27T23:59:42.000Z,1563677668582821888,as a man why do you have a tesla?,0.99888,POSITIVE
