In [None]:
import emoji
import tweepy
import pandas as pd
from os import getenv
from dotenv import load_dotenv

from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
from scipy.special import softmax
import numpy as np

from helper import tweet_history, strip_all_entities, clean_hashtags, filter_chars, remove_mult_spaces

load_dotenv()
pd.set_option('display.expand_frame_repr', False)

auth = tweepy.OAuthHandler(getenv('TWITTER_API_KEY'), getenv('TWITTER_API_KEY_SECRET'))
auth.set_access_token(getenv('TWITTER_ACCESS_TOKEN'), getenv('TWITTER_ACCESS_TOKEN_SECRET'))
api = tweepy.API(auth, wait_on_rate_limit=True, parser=tweepy.parsers.JSONParser())

client = tweepy.Client(bearer_token=getenv('TWITTER_BEARER_TOKEN'), access_token=getenv('TWITTER_ACCESS_TOKEN'), ) 

MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [3]:
def check_replys(tweet_ID):
    query = f"conversation_id:{tweet_ID} is:reply"
    replys = client.search_recent_tweets(query=query)
    return replys

def clean_text(text):
    return remove_mult_spaces(filter_chars(clean_hashtags(strip_all_entities(emoji.demojize(text)))))

In [23]:
def get_ranked_labels(text):
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    
    ranked_labels = []
    for i in range(scores.shape[0]):
        l = config.id2label[ranking[i]]
        s = scores[ranking[i]]
        ranked_labels.append(f"{l} {np.round(float(s), 4)}")
    
    return ranked_labels

In [4]:
user_name = 'BuildOnBase'

In [12]:
df = tweet_history(user_name, 10, client, api)

tweets = []
for tid in df['tweet_id']:
    tid_tweet = check_replys(tid)
    tweet = tid_tweet._asdict()
    tweet['tid'] = tid  # add tid to tweet dictionary
    tweets.append(tweet)

maybe = []
for t in tweets:
    if t['data'] != None:
        random = t['data']
        temp_df = pd.DataFrame(random)
        temp_df['original_id'] = t['tid']
        maybe.append(temp_df)

reduce = pd.concat(maybe, ignore_index=True)
reduce['id'] = reduce['id'].astype(str)
reduce['original_id'] = reduce['original_id'].astype(str)

combined = pd.merge(df, reduce, left_on='tweet_id', right_on='original_id', how='left')
combined = combined[~combined['id'].isnull()].copy()

combined['text_cleaned'] = combined['text'].apply(clean_text)
combined = combined[combined['text_cleaned'] != ''].copy()

combined['sentiment'] = combined['text_cleaned'].apply(get_ranked_labels)