In [1]:
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from get_tweets import make_twitter_call
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sentiment_pipeline = pipeline("text-classification", model='cardiffnlp/twitter-roberta-base-sentiment-latest')
emotion_pipeline = pipeline("text-classification", model='bhadresh-savani/distilbert-base-uncased-emotion')
topic_modeling_summary_model = SentenceTransformer('sentence-transformers/paraphrase-albert-small-v2')
summarization_pipeline = pipeline("summarization")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [3]:
def get_sentiments_and_emotions(data):
    tweets = list(data["clean_text"])
    tweets_sentiment = sentiment_pipeline(tweets)
    data["sentiment_label"] = [s['label'] for s in tweets_sentiment]
    data["sentiment_score"] = [s['score'] for s in tweets_sentiment]

    tweets_emotion = emotion_pipeline(tweets)
    data["emotion_label"] = [s['label'] for s in tweets_emotion]
    data["emotion_score"] = [s['score'] for s in tweets_emotion]

    return data


def do_text_summarization(data):
    # Append the 'rating' Column to the Dataset
    data['rating'] = data[['retweet_count', 'reply_count', 'like_count', 'quote_count']].astype(float).sum(1)

    top_16 = data.nlargest(16, "rating")
    tweets = list(top_16["clean_text"])
    tweets_summary = summarization_pipeline(" ".join(tweets))

    return data, tweets_summary

In [4]:
tags="iphone,apple"


tags = [tag.strip() for tag in tags.strip().split(",") if len(tag.strip()) > 0]
tags = " OR ".join(tags)

In [5]:
tags

'iphone OR apple'

In [6]:
# data = make_twitter_call(tags)
data = pd.read_json("./Experiments/test_data.json")
data

Unnamed: 0,id,text,clean_text,created_at,is_sensitive,retweet_count,reply_count,like_count,quote_count,hashtags
0,1593098408189120514,RT @groundzerofm: #NowPlaying: University of T...,NowPlaying University of Texas at Austin Earth...,2022-11-17 04:27:13,False,1,0,0,0,"[NowPlaying, Alexa, Android, Apple]"
1,1593098273359007745,RT @orfonline: 🚨 #COP27 PolicyPod: Is the worl...,COP27 PolicyPod Is the world climate disaster ...,2022-11-17 04:26:41,False,25,0,0,0,"[COP27, climate]"
2,1593098232405831681,"The controversy of #MLS and #Apple +, my opini...",The controversy of MLS and Apple my opinion via,2022-11-17 04:26:31,False,0,0,0,0,"[MLS, Apple]"
3,1593098049509031936,"Pixel7pro is big mistake?\nNot solved, indian ...",Pixel7pro is big mistake Not solved indian cus...,2022-11-17 04:25:47,False,0,0,0,0,"[teampixel, sunderpichai, googlepixel7pro, goo..."
4,1593097989958291456,RT @Tian_A1: BrainKids Educative Game Now avai...,BrainKids Educative Game Now available Apple A...,2022-11-17 04:25:33,False,3,0,0,0,"[Apple, Google]"
...,...,...,...,...,...,...,...,...,...,...
95,1593088146547380225,#Apple (@Apple) Watch : #SteveWozniak (@stevew...,Apple Watch SteveWozniak est un fan,2022-11-17 03:46:26,False,0,0,0,0,"[Apple, SteveWozniak]"
96,1593087913214238721,That’s how #Apple so wealthy. https://t.co/zlK...,That s how Apple so wealthy,2022-11-17 03:45:31,False,0,0,1,0,[Apple]
97,1593087783010082816,. @Apple rolls out #iPhone emergency SOS satel...,rolls out iPhone emergency SOS satellite alert...,2022-11-17 03:45:00,False,0,0,1,0,"[iPhone, Mobile, Technology]"
98,1593087425072746497,RT @TechInRL: How to Find your Apple Watch! (U...,How to Find your Apple Watch Updated applewatc...,2022-11-17 03:43:34,False,2,0,0,0,"[applewatchseries7, AppleWatch, AppleWatchSeri..."


In [7]:
data = get_sentiments_and_emotions(data)
data

Unnamed: 0,id,text,clean_text,created_at,is_sensitive,retweet_count,reply_count,like_count,quote_count,hashtags,sentiment_label,sentiment_score,emotion_label,emotion_score
0,1593098408189120514,RT @groundzerofm: #NowPlaying: University of T...,NowPlaying University of Texas at Austin Earth...,2022-11-17 04:27:13,False,1,0,0,0,"[NowPlaying, Alexa, Android, Apple]",Neutral,0.921415,joy,0.911417
1,1593098273359007745,RT @orfonline: 🚨 #COP27 PolicyPod: Is the worl...,COP27 PolicyPod Is the world climate disaster ...,2022-11-17 04:26:41,False,25,0,0,0,"[COP27, climate]",Neutral,0.737647,joy,0.875336
2,1593098232405831681,"The controversy of #MLS and #Apple +, my opini...",The controversy of MLS and Apple my opinion via,2022-11-17 04:26:31,False,0,0,0,0,"[MLS, Apple]",Neutral,0.874647,anger,0.990544
3,1593098049509031936,"Pixel7pro is big mistake?\nNot solved, indian ...",Pixel7pro is big mistake Not solved indian cus...,2022-11-17 04:25:47,False,0,0,0,0,"[teampixel, sunderpichai, googlepixel7pro, goo...",Negative,0.853460,joy,0.908345
4,1593097989958291456,RT @Tian_A1: BrainKids Educative Game Now avai...,BrainKids Educative Game Now available Apple A...,2022-11-17 04:25:33,False,3,0,0,0,"[Apple, Google]",Neutral,0.697752,joy,0.932771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1593088146547380225,#Apple (@Apple) Watch : #SteveWozniak (@stevew...,Apple Watch SteveWozniak est un fan,2022-11-17 03:46:26,False,0,0,0,0,"[Apple, SteveWozniak]",Neutral,0.747318,joy,0.613935
96,1593087913214238721,That’s how #Apple so wealthy. https://t.co/zlK...,That s how Apple so wealthy,2022-11-17 03:45:31,False,0,0,1,0,[Apple],Neutral,0.566213,joy,0.993431
97,1593087783010082816,. @Apple rolls out #iPhone emergency SOS satel...,rolls out iPhone emergency SOS satellite alert...,2022-11-17 03:45:00,False,0,0,1,0,"[iPhone, Mobile, Technology]",Neutral,0.848590,joy,0.559165
98,1593087425072746497,RT @TechInRL: How to Find your Apple Watch! (U...,How to Find your Apple Watch Updated applewatc...,2022-11-17 03:43:34,False,2,0,0,0,"[applewatchseries7, AppleWatch, AppleWatchSeri...",Neutral,0.533736,joy,0.940285


In [8]:
from topic_modeling import do_topic_modeling

data, topics = do_topic_modeling(data, topic_modeling_summary_model)
data



Unnamed: 0,id,text,clean_text,created_at,is_sensitive,retweet_count,reply_count,like_count,quote_count,hashtags,sentiment_label,sentiment_score,emotion_label,emotion_score,cluster_id,cluster_x,cluster_y
0,1593098408189120514,RT @groundzerofm: #NowPlaying: University of T...,NowPlaying University of Texas at Austin Earth...,2022-11-17 04:27:13,False,1,0,0,0,"[NowPlaying, Alexa, Android, Apple]",Neutral,0.921415,joy,0.911417,7,14.209333,0.877520
1,1593098273359007745,RT @orfonline: 🚨 #COP27 PolicyPod: Is the worl...,COP27 PolicyPod Is the world climate disaster ...,2022-11-17 04:26:41,False,25,0,0,0,"[COP27, climate]",Neutral,0.737647,joy,0.875336,5,7.291540,2.613396
2,1593098232405831681,"The controversy of #MLS and #Apple +, my opini...",The controversy of MLS and Apple my opinion via,2022-11-17 04:26:31,False,0,0,0,0,"[MLS, Apple]",Neutral,0.874647,anger,0.990544,1,10.209354,-2.163689
3,1593098049509031936,"Pixel7pro is big mistake?\nNot solved, indian ...",Pixel7pro is big mistake Not solved indian cus...,2022-11-17 04:25:47,False,0,0,0,0,"[teampixel, sunderpichai, googlepixel7pro, goo...",Negative,0.853460,joy,0.908345,1,11.204018,-0.685552
4,1593097989958291456,RT @Tian_A1: BrainKids Educative Game Now avai...,BrainKids Educative Game Now available Apple A...,2022-11-17 04:25:33,False,3,0,0,0,"[Apple, Google]",Neutral,0.697752,joy,0.932771,7,14.340724,0.735864
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1593088146547380225,#Apple (@Apple) Watch : #SteveWozniak (@stevew...,Apple Watch SteveWozniak est un fan,2022-11-17 03:46:26,False,0,0,0,0,"[Apple, SteveWozniak]",Neutral,0.747318,joy,0.613935,7,14.594671,0.967804
96,1593087913214238721,That’s how #Apple so wealthy. https://t.co/zlK...,That s how Apple so wealthy,2022-11-17 03:45:31,False,0,0,1,0,[Apple],Neutral,0.566213,joy,0.993431,3,14.830197,-3.055272
97,1593087783010082816,. @Apple rolls out #iPhone emergency SOS satel...,rolls out iPhone emergency SOS satellite alert...,2022-11-17 03:45:00,False,0,0,1,0,"[iPhone, Mobile, Technology]",Neutral,0.848590,joy,0.559165,5,6.908204,2.518192
98,1593087425072746497,RT @TechInRL: How to Find your Apple Watch! (U...,How to Find your Apple Watch Updated applewatc...,2022-11-17 03:43:34,False,2,0,0,0,"[applewatchseries7, AppleWatch, AppleWatchSeri...",Neutral,0.533736,joy,0.940285,6,12.538852,-4.064796


In [9]:
topics

{'tweets': [{'id': 1593098408189120514,
   'text': 'RT @groundzerofm: #NowPlaying: University of Texas at Austin - Earth Date - Listen on #Alexa #Android and #Apple https://t.co/Acdz3SEyqJ',
   'clean_text': 'NowPlaying University of Texas at Austin Earth Date Listen on Alexa Android and Apple',
   'created_at': '2022-11-17 04:27:13',
   'is_sensitive': False,
   'retweet_count': 1,
   'reply_count': 0,
   'like_count': 0,
   'quote_count': 0,
   'hashtags': ['NowPlaying', 'Alexa', 'Android', 'Apple'],
   'sentiment_label': 'Neutral',
   'sentiment_score': 0.9214152693748474,
   'emotion_label': 'joy',
   'emotion_score': 0.9114171862602234,
   'cluster_id': 7,
   'cluster_x': 14.209333419799805,
   'cluster_y': 0.8775196075439453},
  {'id': 1593098273359007745,
   'text': 'RT @orfonline: 🚨 #COP27 PolicyPod: Is the world #climate disaster ready? 🚨\n\nWith @RichardJRand, Hugh Hilton Todd (@mfaguyana), @MohamedNash…',
   'clean_text': 'COP27 PolicyPod Is the world climate disaster ready 

In [10]:
data, tweets_summary = do_text_summarization(data)
data

Unnamed: 0,id,text,clean_text,created_at,is_sensitive,retweet_count,reply_count,like_count,quote_count,hashtags,sentiment_label,sentiment_score,emotion_label,emotion_score,cluster_id,cluster_x,cluster_y,rating
0,1593098408189120514,RT @groundzerofm: #NowPlaying: University of T...,NowPlaying University of Texas at Austin Earth...,2022-11-17 04:27:13,False,1,0,0,0,"[NowPlaying, Alexa, Android, Apple]",Neutral,0.921415,joy,0.911417,7,14.209333,0.877520,1.0
1,1593098273359007745,RT @orfonline: 🚨 #COP27 PolicyPod: Is the worl...,COP27 PolicyPod Is the world climate disaster ...,2022-11-17 04:26:41,False,25,0,0,0,"[COP27, climate]",Neutral,0.737647,joy,0.875336,5,7.291540,2.613396,25.0
2,1593098232405831681,"The controversy of #MLS and #Apple +, my opini...",The controversy of MLS and Apple my opinion via,2022-11-17 04:26:31,False,0,0,0,0,"[MLS, Apple]",Neutral,0.874647,anger,0.990544,1,10.209354,-2.163689,0.0
3,1593098049509031936,"Pixel7pro is big mistake?\nNot solved, indian ...",Pixel7pro is big mistake Not solved indian cus...,2022-11-17 04:25:47,False,0,0,0,0,"[teampixel, sunderpichai, googlepixel7pro, goo...",Negative,0.853460,joy,0.908345,1,11.204018,-0.685552,0.0
4,1593097989958291456,RT @Tian_A1: BrainKids Educative Game Now avai...,BrainKids Educative Game Now available Apple A...,2022-11-17 04:25:33,False,3,0,0,0,"[Apple, Google]",Neutral,0.697752,joy,0.932771,7,14.340724,0.735864,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1593088146547380225,#Apple (@Apple) Watch : #SteveWozniak (@stevew...,Apple Watch SteveWozniak est un fan,2022-11-17 03:46:26,False,0,0,0,0,"[Apple, SteveWozniak]",Neutral,0.747318,joy,0.613935,7,14.594671,0.967804,0.0
96,1593087913214238721,That’s how #Apple so wealthy. https://t.co/zlK...,That s how Apple so wealthy,2022-11-17 03:45:31,False,0,0,1,0,[Apple],Neutral,0.566213,joy,0.993431,3,14.830197,-3.055272,1.0
97,1593087783010082816,. @Apple rolls out #iPhone emergency SOS satel...,rolls out iPhone emergency SOS satellite alert...,2022-11-17 03:45:00,False,0,0,1,0,"[iPhone, Mobile, Technology]",Neutral,0.848590,joy,0.559165,5,6.908204,2.518192,1.0
98,1593087425072746497,RT @TechInRL: How to Find your Apple Watch! (U...,How to Find your Apple Watch Updated applewatc...,2022-11-17 03:43:34,False,2,0,0,0,"[applewatchseries7, AppleWatch, AppleWatchSeri...",Neutral,0.533736,joy,0.940285,6,12.538852,-4.064796,2.0


In [11]:
tweets_summary

[{'summary_text': ' Daily Giveaways MacBook IPhone Pro 500 PayPal gift card and Cash Enter Here Winne Daily Giveaway includes a MacBook Pro 500 gift card . USDC has announced an integration of the ApplePay payment gateway . Emergency SOS via satellite is available today on the iPhone 14 lineup in the US and Canada .'}]

In [12]:
# Combine the DataFrame and the topic clusters into one JSON file.
# convert dates to strings
data['created_at'] = data['created_at'].astype(str)

# Combine the DataFrame and the topic clusters into one JSON file.
json_data = {
    "tweets": data.to_dict(orient="records"),
    "topics": topics,
    "summary": tweets_summary
}

json_data

{'tweets': [{'id': 1593098408189120514,
   'text': 'RT @groundzerofm: #NowPlaying: University of Texas at Austin - Earth Date - Listen on #Alexa #Android and #Apple https://t.co/Acdz3SEyqJ',
   'clean_text': 'NowPlaying University of Texas at Austin Earth Date Listen on Alexa Android and Apple',
   'created_at': '2022-11-17 04:27:13',
   'is_sensitive': False,
   'retweet_count': 1,
   'reply_count': 0,
   'like_count': 0,
   'quote_count': 0,
   'hashtags': ['NowPlaying', 'Alexa', 'Android', 'Apple'],
   'sentiment_label': 'Neutral',
   'sentiment_score': 0.9214152693748474,
   'emotion_label': 'joy',
   'emotion_score': 0.9114171862602234,
   'cluster_id': 7,
   'cluster_x': 14.209333419799805,
   'cluster_y': 0.8775196075439453,
   'rating': 1.0},
  {'id': 1593098273359007745,
   'text': 'RT @orfonline: 🚨 #COP27 PolicyPod: Is the world #climate disaster ready? 🚨\n\nWith @RichardJRand, Hugh Hilton Todd (@mfaguyana), @MohamedNash…',
   'clean_text': 'COP27 PolicyPod Is the world clima