In [None]:
import pandas as pd
import re

from textblob import TextBlob
import tweepy

# Use your own token
from keys import api_tokens

## 1) Authenticate

In [None]:
access_token,  access_token_secret = api_tokens.access_token
consumer_key, consumer_secret = api_tokens.consumer_key

In [None]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

## Helper functions

In [None]:
def get_tweets_by_user(screen_name, num_of_tweets=-1):
    """
    Retrieve a list of all tweets from the given screen names
    Input: 
        screen_name: A single screen name or a list of screen_names
        num_of_tweets: Integer value of tweets to retrieve per user, default gets all tweets allowed (last 30 days)
    """
    all_tweets = []

    # Cast to a list if a string is given
    if isinstance(screen_name, str):
        screen_name = [screen_name]

    for user in screen_name:
        all_tweets.extend([tweet._json for tweet in tweepy.Cursor(api.user_timeline, screen_name=user, tweet_mode='extended').items(num_of_tweets)])

    return all_tweets

In [None]:
def get_symbols(tweet):
    """
    Example: {'text': 'NAK', 'indices': [0, 4]}
    Get the symbol mentioned in the tweet directly from the full_text entry
    Retrieving the symbol using the indices is preferred to ensure the symbol begins with '$'
    Return a list of symbols
    """
    out = set()
    for entry in tweet['entities']['symbols']:
        symbol = tweet['full_text'][entry['indices'][0]:entry['indices'][1]]
        if re.match('\$[aA-zZ]+', symbol):
            out.add(symbol.upper())

    return sorted(out)

In [None]:
def get_sentiment(text):
    """
    Get the polarity (sentiment) and subjectivity of a tweet
    Polarity is a float with range -1.0 to 1.0 (-1: negative, 1: positive)
    Subjectivity is a float with range 0.0 to 1.0 (0: very objective, 1: very subjective)
    """
    out = {}
    out['sentiment_value'], out['subjectivity_value'] = TextBlob(text).sentiment

    # These values are interpreted and not necessarily accurate
    if out['sentiment_value'] >= 0.1:
        out['sentiment'] = 'positive'
    elif out['sentiment_value'] <= -0.2:
        out['sentiment'] = 'negative'
    else:
        # Between -0.2 and 0.1
        out['sentiment'] = 'neutral'

    if out['subjectivity_value'] >= 0.5:
        out['subjectivity'] = 'subjective'
    else:
        out['subjectivity'] = 'objective'

    return out

## 2) Get tweets

In [None]:
# Suggested users: AdnansArk, StockDweebs
users = input("Input the usernames you want to get tweets from, separate users with a ',' : ").replace(' ', '').split(',')
print(f"Retrieving tweets from: {users}")

In [None]:
tweets = get_tweets_by_user(users)

## 3) Post-proccess tweets

In [None]:
tickers_mentioned = {}
all_ticker_tweets = []
for tweet in tweets:
    # removes any https links that are not a part of the tweet
    # ex) They\'re pumping the market too! Goldman Sachs https://t.co/RaOoTgTlJj'
    start, end = tweet['display_text_range']
    content = tweet['full_text'][start:end]
    symbols = get_symbols(tweet)
    for ticker in symbols:
        obj = {
            'user': tweet['user']['screen_name'],
            'symbol': ticker,
            'created': str(tweet['created_at']),
            'content': content,
            'favourite_count': tweet['favorite_count'],
            'tickers_mentioned': symbols,
        }

        # Get tweet sentiment
        obj.update(get_sentiment(content))

        all_ticker_tweets.append(obj)
        tickers_mentioned.setdefault(ticker.upper(), []).append(obj)

## 4) Write results to excel

In [None]:
df_tweets = pd.DataFrame(all_ticker_tweets)
if not df_tweets.empty:
    # Create filename i.e AdnansArk_<date>
    file_name = f"{'_'.join(df_tweets.user.unique())}_{datetime.datetime.today().strftime('%Y_%h_%d_%s')}"
    df_tweets.to_excel(f"{file_name}.xlsx", index=False)