In [6]:
# !pip3 install twint //tweets
# !pip3 install nest_asyncio // tweets helper
# !pip3 install vaderSentiment // sentiment analysis
# !pip3 install nbimporter // import functions between notebooks
# !pip3 install nbformat


In [2]:
import twint
import nest_asyncio
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import json
import numpy as np
from typing import List
import datetime

In [3]:
class TwitterAggregator():
    REDUCTION_MULTIPLIER = 1e6
    def __init__(self):
        print("Hi from SentimentDataAggregation!")
        nest_asyncio.apply()

    def getKeywordTweetsByDates(self, keyword:str, dateFrom:str, dateTo:str, maxTweets:int = 100) -> any:
        """
        returns tweets for a stock between 2 dates of format (YYYY-MM-DD)
        for many keywords, you can use OR or AND logics

        Returns:
            list: list of tweets
        """
        tweets = []
        g = twint.Config()
        g.Search = keyword
        g.Limit = maxTweets
        g.Since = dateFrom
        g.Until = dateTo
        # g.Verified = True
        # g.Popular = True
        g.Hide_output = True
        # c.Store_json = True
        # c.Since = "2021-01-26"
        g.Store_object = True
        g.Store_object_tweets_list = tweets
        # c.Output = 'AAPL2021.json'
        twint.run.Search(g)
        return tweets

    def getSentimentsAnalysis(self, tweets:list):
        """
        returns tweets analysis in the form of a matrix

        Returns:
            np.array: matrix of columns (neg, neutral, pos, compound)
        """
        analyzer = SentimentIntensityAnalyzer()
        analysisMatrix = np.zeros((0, 4))
        for tweetObj in tweets:
            vs = analyzer.polarity_scores(tweetObj.tweet)
            sentiment = np.array([[vs["neg"], vs["neu"], vs["pos"], vs["compound"]]])
            analysisMatrix = np.append(analysisMatrix, sentiment , axis=0)
        return analysisMatrix

    def datetime_to_float(self,d:datetime)->float:
        epoch = datetime.datetime.utcfromtimestamp(0)
        total_seconds =  (d - epoch).total_seconds()
        # total_seconds will be in decimals (millisecond precision)
        return total_seconds/self.REDUCTION_MULTIPLIER

    def float_to_datetime(self,fl)->datetime:
        return datetime.datetime.fromtimestamp(fl)