In [1]:
# -------------------------------------------------------------------------------
# Name: main.py
# Purpose: Pull data from twitter, perform sentiment analysis and pull stock data
#
# Author(s):    David Little
#
# Created:      04/26/2021
# Updated:
# Update Comment(s):
#
# TO DO:
#
# -------------------------------------------------------------------------------


import requests
import pandas as pd
import datetime
import regex as re
from datetime import datetime, timedelta

In [3]:
def get_data(tweet):
    data = {
        'id': tweet['id'],
        'created_at': tweet['created_at'],
        'text': tweet['text'],
        'retweet_count': tweet['public_metrics']['retweet_count'],
        'reply_count': tweet['public_metrics']['reply_count'],
        'like_count': tweet['public_metrics']['like_count'],
        'quote_count': tweet['public_metrics']['quote_count']
    }
    return data

In [4]:
whitespace = re.compile(r"\s+")
web_address = re.compile(r"(?i)http(s):\/\/[a-z0-9.~_\-\/]+")
tesla = re.compile(r"(?i)@Tesla(?=\b)")
user = re.compile(r"(?i)@[a-z0-9_]+")

In [5]:
#------------------------------------- Twitter Pull  --------------------------------------------------------

# setup the API request
endpoint = 'https://api.twitter.com/2/tweets/search/recent'  # 'https://api.twitter.com/2/tweets/search/all'
headers = {'authorization': f'Bearer {BEARER_TOKEN}'}
params = {
    'query': '(tesla OR tsla OR elon musk) (lang:en) -is:retweet',
    'max_results': '100',
    'tweet.fields': 'created_at,lang,public_metrics'
        }

In [6]:
dtformat = '%Y-%m-%dT%H:%M:%SZ'  # the date format string required by twitter

# we use this function to subtract 60 mins from our datetime string
def time_travel(now, mins):
    now = datetime.strptime(now, dtformat)
    back_in_time = now - timedelta(minutes=mins)
    return back_in_time.strftime(dtformat)

In [7]:
now = datetime.now()  # get the current datetime, this is our starting point
last_week = now - timedelta(days=6)  # datetime one week ago = the finish line
now = now.strftime(dtformat)  # convert now datetime to format for API

In [8]:
df = pd.DataFrame()  # initialize dataframe to store tweets
while True:
    if datetime.strptime(now, dtformat) < last_week:
        # if we have reached 6 days ago, break the loop
        break
    pre60 = time_travel(now, 60)  # get 60 minutes before 'now'
    # assign from and to datetime parameters for the API
    params['start_time'] = pre60
    params['end_time'] = now
    response = requests.get(endpoint,
                            params=params,
                            headers=headers)  # send the request
    now = pre60  # move the window 60 minutes earlier
    # iteratively append our tweet data to our dataframe
    for tweet in response.json()['data']:
        row = get_data(tweet)  # we defined this function earlier
        if row['like_count'] != 0:
            df = df.append(row, ignore_index=True)
df

Unnamed: 0,created_at,id,like_count,quote_count,reply_count,retweet_count,text
0,2021-08-06T13:13:54.000Z,1423633432811384837,21.0,0.0,2.0,13.0,Brexiteers...more great news...\nhttps://t.co/...
1,2021-08-06T13:13:52.000Z,1423633424338898949,1.0,0.0,1.0,0.0,@ValueAnalyst1 It only takes one part! But I’l...
2,2021-08-06T13:13:52.000Z,1423633421461561345,2.0,0.0,1.0,1.0,Saw some idiot Tesla driver doing a lap on the...
3,2021-08-06T13:13:19.000Z,1423633284257312772,19.0,0.0,1.0,0.0,Elon Musk started this crypto revolution but w...
4,2021-08-06T13:12:46.000Z,1423633147481182210,1.0,0.0,0.0,0.0,$TSLA beautiful multi-day bull flag. Entries w...
...,...,...,...,...,...,...,...
6137,2021-07-31T14:07:53.000Z,1421472688673935365,3.0,0.0,1.0,0.0,@cnunezimages @elonmusk @SpaceX @SpaceIntellig...
6138,2021-07-31T14:07:46.000Z,1421472659687280646,1.0,0.0,1.0,0.0,@aaronbrighton @MFrunker @DirtyTesla @gwestr @...
6139,2021-07-31T14:07:39.000Z,1421472631585480709,3.0,0.0,0.0,0.0,"@PJWheeler83 Haha of course, doing Uber/Lyft f..."
6140,2021-07-31T14:07:32.000Z,1421472602782978048,1.0,0.0,0.0,1.0,Good project expecting bigger outcomes\n$paid ...


In [9]:
#---------------------------------------------- Sentiment Model ------------------------------------------------------

import flair
sentiment_model = flair.models.TextClassifier.load('en-sentiment')

# we will append probability and sentiment preds later
probs = []
sentiments = []
clean_tweets = []
timestamp = []

for time in df['created_at']:
    timestamp.append(datetime.strptime(time, '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y-%m-%d %H:%M:%S'))

for tweet in df['text']:
# we then use the sub method to replace anything matching
    tweet = whitespace.sub(' ', tweet)
    tweet = web_address.sub('', tweet)
    tweet = tesla.sub('Tesla', tweet)
    tweet = user.sub('', tweet)
    sentence = flair.data.Sentence(tweet)
    sentiment_model.predict(sentence)
    # extract sentiment prediction
    probs.append(sentence.labels[0].score)  # numerical score 0-1
    sentiments.append(sentence.labels[0].value)  # 'POSITIVE' or 'NEGATIVE'
    clean_tweets.append(tweet)
    # print(tweet)
    # print(' ')

# add probability and sentiment predictions to tweets dataframe
df['text_clean'] = clean_tweets
df['probability'] = probs
df['sentiment'] = sentiments
df['timestamp'] = timestamp
df

2021-08-06 13:19:32,761 loading file C:\Users\djlittle\.flair\models\sentiment-en-mix-distillbert_4.pt


Unnamed: 0,created_at,id,like_count,quote_count,reply_count,retweet_count,text,text_clean,probability,sentiment,timestamp
0,2021-08-06T13:13:54.000Z,1423633432811384837,21.0,0.0,2.0,13.0,Brexiteers...more great news...\nhttps://t.co/...,Brexiteers...more great news...,0.948765,POSITIVE,2021-08-06 13:13:54
1,2021-08-06T13:13:52.000Z,1423633424338898949,1.0,0.0,1.0,0.0,@ValueAnalyst1 It only takes one part! But I’l...,It only takes one part! But I’ll accept that ...,0.985876,POSITIVE,2021-08-06 13:13:52
2,2021-08-06T13:13:52.000Z,1423633421461561345,2.0,0.0,1.0,1.0,Saw some idiot Tesla driver doing a lap on the...,Saw some idiot Tesla driver doing a lap on the...,0.999963,NEGATIVE,2021-08-06 13:13:52
3,2021-08-06T13:13:19.000Z,1423633284257312772,19.0,0.0,1.0,0.0,Elon Musk started this crypto revolution but w...,Elon Musk started this crypto revolution but w...,0.979615,NEGATIVE,2021-08-06 13:13:19
4,2021-08-06T13:12:46.000Z,1423633147481182210,1.0,0.0,0.0,0.0,$TSLA beautiful multi-day bull flag. Entries w...,$TSLA beautiful multi-day bull flag. Entries w...,0.999910,POSITIVE,2021-08-06 13:12:46
...,...,...,...,...,...,...,...,...,...,...,...
6137,2021-07-31T14:07:53.000Z,1421472688673935365,3.0,0.0,1.0,0.0,@cnunezimages @elonmusk @SpaceX @SpaceIntellig...,"""I cannot teach anybody to build starships...",0.949505,POSITIVE,2021-07-31 14:07:53
6138,2021-07-31T14:07:46.000Z,1421472659687280646,1.0,0.0,1.0,0.0,@aaronbrighton @MFrunker @DirtyTesla @gwestr @...,He thrashes it about and complains ab...,0.999333,NEGATIVE,2021-07-31 14:07:46
6139,2021-07-31T14:07:39.000Z,1421472631585480709,3.0,0.0,0.0,0.0,"@PJWheeler83 Haha of course, doing Uber/Lyft f...","Haha of course, doing Uber/Lyft for referral ...",0.979240,POSITIVE,2021-07-31 14:07:39
6140,2021-07-31T14:07:32.000Z,1421472602782978048,1.0,0.0,0.0,1.0,Good project expecting bigger outcomes\n$paid ...,Good project expecting bigger outcomes $paid #...,0.788560,POSITIVE,2021-07-31 14:07:32


In [13]:
#________________________________ Stock Data __________________________________________________________________

import yfinance as yf

tsla = yf.Ticker("TSLA")
tsla_stock = tsla.history(
    start=datetime.strptime(df['created_at'].min(),'%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y-%m-%d'),
    end=datetime.strptime(df['created_at'].max(),'%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y-%m-%d'),
    interval='1d'
        ).reset_index()
tsla_stock

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2021-08-02,700.0,726.940002,698.400024,709.669983,33615800,0,0
1,2021-08-03,719.0,722.650024,701.01001,709.73999,21620300,0,0
2,2021-08-04,711.0,724.900024,708.929993,710.919983,17002600,0,0
3,2021-08-05,716.0,720.950012,711.409973,714.630005,12890100,0,0
