In [1]:
import datetime
from datetime import timedelta
from datetime import timezone
import tweepy
import pandas as pd
import re
from flair.models import TextClassifier
from flair.data import Sentence

from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv('API_KEY')
TOKEN_SECRET = os.getenv('TOKEN_SECRET')
ACCESS_TOKEN = os.getenv('ACCESS_TOKEN')
ACCESS_TOKEN_SECRET = os.getenv('ACCESS_TOKEN_SECRET')
BEARER_TOKEN = os.getenv('BEARER_TOKEN')



NUM_RESULTS = 10
CLEAN_OUT_HASHTAGS = False



client = tweepy.Client(bearer_token=BEARER_TOKEN)

def CleanText(text):
    if (CLEAN_OUT_HASHTAGS):
        return re.sub(" +", " ", re.sub("#[A-Za-z0-9_]+", "", re.sub("@[A-Za-z0-9_]+", "", text)))
    else:
        return re.sub(" +", " ", re.sub("@[A-Za-z0-9_]+", "", text))



def ProcessTweetsToDf(tweets):
    tweets_df = pd.DataFrame()
    users = {u["id"]: u for u in tweets.includes['users']}
    for tweet in tweets.data:
        # The column selections and this method of creating each data point is taken from Aparna's example
        hashtags = []
        
        username = ""
        desc = ""
        verified = False
        if users[tweet.author_id]:
            user = users[tweet.author_id]
            username = user.username
            desc = user.description
            verified = user.verified

        try:
            for hashtag in tweet.entities["hashtags"]:
                hashtags.append(hashtag["text"])
        except:
            pass
        tweets_df = tweets_df.append(pd.DataFrame({'user_name': username, 
                                                   'user_location': None,
                                                    'user_description': desc,
                                                    'user_verified': verified,
                                                    'date': tweet.created_at,
                                                    'text': tweet.text, 
                                                    'hashtags': [hashtags if hashtags else None],
                                                    'source': tweet.source,
                                                    'cleaned_text': CleanText(tweet.text)}))
        tweets_df = tweets_df.reset_index(drop=True)
    return tweets_df


# Duration is how far back we're checking as a timedelta
def SearchRecentTweets(query, duration=None):
    tweets = None
    if duration is not None:
        tweets = client.search_recent_tweets(query=query, start_time=(datetime.datetime.now() - duration), max_results = NUM_RESULTS, expansions='author_id', place_fields="full_name", tweet_fields=["created_at","source"], user_fields=["description", "verified", "username"])
    else:
        tweets = client.search_recent_tweets(query=query, max_results=NUM_RESULTS, expansions='author_id', place_fields="full_name", tweet_fields=["created_at","source"], user_fields=["description", "verified", "username"])
    
    if (tweets.data != None and len(tweets) != 0):
        return ProcessTweetsToDf(tweets)
    else:
        return None


def SearchTimePeriodTweets(query, start_time, end_time):
    tweets = client.search_recent_tweets(query=query, start_time=start_time, end_time=end_time, max_results=NUM_RESULTS, expansions='author_id', place_fields="full_name", tweet_fields=["created_at","source"], user_fields=["description", "verified", "username"])
    
    if (tweets.data != None and len(tweets) != 0):
        return ProcessTweetsToDf(tweets)
    else:
        return None







  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#print(SearchRecentTweets("Tesla", timedelta(days=2))[1][1])
#print(SearchRecentTweets("Tesla OR Elon Musk OR TSLA", timedelta(days=2))[1][1])
#print(SearchRecentTweets("#Tesla", timedelta(days=2))[1][1])
#print(SearchRecentTweets("#Tesla OR #ElonMusk", datetime.datetime(day=1, month=3, year=2022), datetime.datetime(day=1, month=3, year=2022))
#print(SearchTimePeriodTweets("#Tesla OR #ElonMusk", datetime.datetime(day=1, month=3, year=2022), datetime.datetime(day=1, month=3, year=2022)))
data = SearchRecentTweets("#Tesla", timedelta(days=2))

data.to_csv("./data.csv")

  tweets_df = tweets_df.append(pd.DataFrame({'user_name': username,


In [3]:
from flair.models import TextClassifier
from flair.data import Sentence

CSV_PATH = "./data.csv"

classifier = TextClassifier.load('en-sentiment')

df = pd.read_csv(CSV_PATH)
df["sentiment"] = ""

for i, text in enumerate(df["cleaned_text"]):
    sentence = Sentence(text)
    classifier.predict(sentence)
    sentiment = sentence.labels[0].to_dict()['confidence'] 
    if sentence.labels[0].to_dict()['value'] == 'NEGATIVE':
        sentiment *= -1
    df.at[i, 'sentiment'] = sentiment

df


2022-09-13 13:58:53,169 loading file C:\Users\Christian Classen\.flair\models\sentiment-en-mix-distillbert_4.pt


Unnamed: 0.1,Unnamed: 0,user_name,user_location,user_description,user_verified,date,text,hashtags,source,cleaned_text,sentiment
0,0,Buni_mp3,,Zenitsu Fan Account ⚡\nthey call me vacuum boy...,False,2022-09-13 18:58:33+00:00,RT @BLKMDL3: Tesla white seats after almost 11...,,Twitter Web App,"RT : Tesla white seats after almost 110,000 mi...",0.999718
1,1,NoirrGG,,"Long Live Her Majesty, Queen Elizabeth ll 🇬🇧✨ ...",False,2022-09-13 18:57:23+00:00,RT @BLKMDL3: Tesla white seats after almost 11...,,Twitter for iPhone,"RT : Tesla white seats after almost 110,000 mi...",0.999718
2,2,TeslaradarB,,Bot with global #TeslaRadar events. Make sure ...,False,2022-09-13 18:57:12+00:00,Smashing! Tiger just spotted a 2021 Tesla Mode...,,GlobalFirstViews,Smashing! Tiger just spotted a 2021 Tesla Mode...,0.954602
3,3,TeslaradarB,,Bot with global #TeslaRadar events. Make sure ...,False,2022-09-13 18:57:12+00:00,Well done! Tiger just spotted a 2022 Tesla Mod...,,GlobalFirstViews,Well done! Tiger just spotted a 2022 Tesla Mod...,0.999817
4,4,TheTripleT,,"Positive minded, calm, collected wit a great l...",False,2022-09-13 18:56:20+00:00,RT @BLKMDL3: Tesla white seats after almost 11...,,Twitter for iPhone,"RT : Tesla white seats after almost 110,000 mi...",0.999718
5,5,TSLAgang,,"owner of https://t.co/HqJSvS1B8q, https://t.c...",False,2022-09-13 18:56:08+00:00,@QCompounding @Prof_Kalkyl #Tesla is a safer i...,,Twitter for iPhone,#Tesla is a safer investment than a lot of co...,0.988848
6,6,JNHaldemann,,The Flying Haldemans.\n\nIn memory of Joshua N...,False,2022-09-13 18:56:07+00:00,SPCX @elonmusk SpaceX and T-Mobile team up to ...,,dlvr.it,SPCX SpaceX and T-Mobile team up to use Starli...,0.997039
7,7,JNHaldemann,,The Flying Haldemans.\n\nIn memory of Joshua N...,False,2022-09-13 18:56:06+00:00,SPCX @elonmusk SpaceX gets approval to bring S...,,dlvr.it,SPCX SpaceX gets approval to bring Starlink in...,0.979065
8,8,shane_lat,,Crypto Investor | Father & Family Man | Tesla ...,False,2022-09-13 18:55:22+00:00,@elonmusk @Tesla @TeslaCharging When is the CC...,,Twitter for Android,When is the CCS adapter coming to North Ameri...,-0.999712
9,9,tesladvocate,,Tesla and AI evangelist. The future is inevita...,False,2022-09-13 18:55:04+00:00,Elon Musk changes his name on Twitter after en...,,Twitter for iPhone,Elon Musk changes his name on Twitter after en...,0.886231


In [4]:
yes = Sentence("hi")

classifier.predict(yes)
print(yes.labels[0])

Sentence: "hi" → NEGATIVE (0.5724)


In [5]:
OTHER_PATH = "./cleaned_sentiment_tweets.csv"
df2 = pd.read_csv(OTHER_PATH)
df2['sentiment'] = 0





In [6]:
#t = client.search_recent_tweets(query="Tesla OR Elon Musk OR TSLA", start_time=(datetime.datetime.now() - timedelta(days=6)), max_results = 100, expansions=['author_id', "geo.place_id"], place_fields="full_name", tweet_fields=["created_at", "source", "geo"], user_fields=["description", "verified", "username"])



In [7]:
type(t.includes)
#t.includes.keys()

#t.includes["users"][2].description
t.includes.keys()
#t.data[0].created_at
t.data[0].geo

NameError: name 't' is not defined

In [None]:
from flair.models import SequenceTagger

tagger = SequenceTagger.load('ner')
entity_predictions = []
for text in df["cleaned_text"]:
    sentence = Sentence(text)
    tagger.predict(sentence)
    entity_predictions.append(sentence.labels)
entity_predictions

2022-04-19 19:18:30,229 loading file C:\Users\Christian Classen\.flair\models\ner-english\4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4
2022-04-19 19:18:31,913 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>


[[],
 ['Span[1:2]: "Tesla"'/'ORG' (0.969)],
 [],
 ['Span[10:11]: "Tesla"'/'LOC' (0.7981),
  'Span[16:17]: "Congo"'/'LOC' (0.9997),
  'Span[18:19]: "Tesla"'/'ORG' (0.3636)],
 ['Span[3:4]: "Tesla"'/'ORG' (0.9834), 'Span[13:14]: "TSLA"'/'ORG' (0.871)],
 [],
 ['Span[17:19]: "Tesla Insurance"'/'ORG' (0.9661),
  'Span[27:28]: "Tesla"'/'ORG' (0.6714)],
 [],
 ['Span[19:20]: "Tesla"'/'LOC' (0.5688)],
 []]