In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import re, json

from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


## Load the Text Summarization Pipeline

In [2]:
summarization_pipeline = pipeline("summarization")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


## Load the Dataset

In [3]:
RAW_DATA = pd.read_json("./test_data_with_sentiment_and_clusters.json")

## Append the 'rating' Column to the Dataset

In [4]:
RAW_DATA['rating'] = RAW_DATA[
    ['retweet_count', 'reply_count', 'like_count', 'quote_count']
].astype(float).sum(1)

RAW_DATA

Unnamed: 0,id,text,clean_text,created_at,is_sensitive,retweet_count,reply_count,like_count,quote_count,hashtags,sentiment_label,sentiment_score,emotion_label,emotion_score,cluster_id,cluster_x,cluster_y,rating
0,1593098408189120514,RT @groundzerofm: #NowPlaying: University of T...,NowPlaying University of Texas at Austin Earth...,2022-11-17 04:27:13,False,1,0,0,0,"[NowPlaying, Alexa, Android, Apple]",Neutral,0.921415,joy,0.911417,7,14.209333,0.877520,1.0
1,1593098273359007745,RT @orfonline: 🚨 #COP27 PolicyPod: Is the worl...,COP27 PolicyPod Is the world climate disaster ...,2022-11-17 04:26:41,False,25,0,0,0,"[COP27, climate]",Neutral,0.737647,joy,0.875336,5,7.291540,2.613396,25.0
2,1593098232405831681,"The controversy of #MLS and #Apple +, my opini...",The controversy of MLS and Apple my opinion via,2022-11-17 04:26:31,False,0,0,0,0,"[MLS, Apple]",Neutral,0.874647,anger,0.990544,1,10.209354,-2.163689,0.0
3,1593098049509031936,"Pixel7pro is big mistake?\nNot solved, indian ...",Pixel7pro is big mistake Not solved indian cus...,2022-11-17 04:25:47,False,0,0,0,0,"[teampixel, sunderpichai, googlepixel7pro, goo...",Negative,0.853460,joy,0.908345,1,11.204018,-0.685552,0.0
4,1593097989958291456,RT @Tian_A1: BrainKids Educative Game Now avai...,BrainKids Educative Game Now available Apple A...,2022-11-17 04:25:33,False,3,0,0,0,"[Apple, Google]",Neutral,0.697752,joy,0.932771,7,14.340724,0.735864,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1593088146547380225,#Apple (@Apple) Watch : #SteveWozniak (@stevew...,Apple Watch SteveWozniak est un fan,2022-11-17 03:46:26,False,0,0,0,0,"[Apple, SteveWozniak]",Neutral,0.747318,joy,0.613935,7,14.594671,0.967804,0.0
96,1593087913214238721,That’s how #Apple so wealthy. https://t.co/zlK...,That s how Apple so wealthy,2022-11-17 03:45:31,False,0,0,1,0,[Apple],Neutral,0.566213,joy,0.993431,3,14.830197,-3.055272,1.0
97,1593087783010082816,. @Apple rolls out #iPhone emergency SOS satel...,rolls out iPhone emergency SOS satellite alert...,2022-11-17 03:45:00,False,0,0,1,0,"[iPhone, Mobile, Technology]",Neutral,0.848590,joy,0.559165,5,6.908204,2.518192,1.0
98,1593087425072746497,RT @TechInRL: How to Find your Apple Watch! (U...,How to Find your Apple Watch Updated applewatc...,2022-11-17 03:43:34,False,2,0,0,0,"[applewatchseries7, AppleWatch, AppleWatchSeri...",Neutral,0.533736,joy,0.940285,6,12.538852,-4.064796,2.0


## Get the Top 16 Tweets Based on Rating

In [5]:
top_16 = RAW_DATA.nlargest(16, "rating")

top_16

Unnamed: 0,id,text,clean_text,created_at,is_sensitive,retweet_count,reply_count,like_count,quote_count,hashtags,sentiment_label,sentiment_score,emotion_label,emotion_score,cluster_id,cluster_x,cluster_y,rating
71,1593091439445299200,"RT @Long_Commute: Daily Giveaways: MacBook, IP...",Daily Giveaways MacBook IPhone Pro 500 PayPal ...,2022-11-17 03:59:32,False,681,0,0,0,[],Positive,0.644732,joy,0.967767,3,15.503637,-2.805233,681.0
75,1593091222734004226,"RT @Long_Commute: Daily Giveaways: MacBook, IP...",Daily Giveaways MacBook IPhone Pro 500 PayPal ...,2022-11-17 03:58:40,False,681,0,0,0,[],Positive,0.644732,joy,0.967767,3,15.486293,-2.7462,681.0
40,1593094832872120324,RT @Elsa247635011: Black apple🍎🖤#bts #TAEHYUN ...,Black apple bts TAEHYUN btsedits apple taekook...,2022-11-17 04:13:01,True,105,0,0,0,"[bts, TAEHYUN, btsedits, apple, taekookfanart]",Neutral,0.837005,anger,0.633115,7,13.592708,-1.049175,105.0
44,1593094622724902912,RT @jacobincambodia: Captured this lightning s...,Captured this lightning storm during a quick s...,2022-11-17 04:12:10,False,68,0,0,0,"[iphone, cambodia]",Neutral,0.810941,fear,0.792343,7,14.906666,1.554596,68.0
85,1593089932196970496,RT @JosephWalmsley2: Sea Isle City Sunrise Pho...,Sea Isle City Sunrise Phone Case Available Her...,2022-11-17 03:53:32,False,57,0,0,0,"[BuyIntoArt, FallForArt, ShopEarly]",Neutral,0.883269,joy,0.916981,7,15.784168,-0.705563,57.0
50,1593093804940140544,RT @timd_ca: WIP: New UI for zooming around a ...,WIP New UI for zooming around a 250 million at...,2022-11-17 04:08:56,False,55,0,0,0,[],Neutral,0.723719,joy,0.809654,7,13.631082,-0.451409,55.0
1,1593098273359007745,RT @orfonline: 🚨 #COP27 PolicyPod: Is the worl...,COP27 PolicyPod Is the world climate disaster ...,2022-11-17 04:26:41,False,25,0,0,0,"[COP27, climate]",Neutral,0.737647,joy,0.875336,5,7.29154,2.613396,25.0
26,1593095574659956738,📢 @Circle $USDC has announced an integration o...,USDC has announced an integration of the Apple...,2022-11-17 04:15:57,False,0,20,3,0,"[ApplePay, Apple, iPhone, iPad]",Positive,0.639421,joy,0.972249,7,13.375722,0.875492,23.0
60,1593092223817879553,RT @EdwardFerguson_: Emergency SOS via satelli...,Emergency SOS via satellite is available today...,2022-11-17 04:02:39,False,20,0,0,0,[],Neutral,0.601582,fear,0.448802,5,7.313075,2.409809,20.0
45,1593094455183446018,RT @TechTravie: Letv Y1 Pro+ With an iPhone 13...,Letv Y1 Pro With an iPhone 13 Styled Design ju...,2022-11-17 04:11:31,False,18,0,0,0,[iphone13],Positive,0.569156,joy,0.996612,2,7.057625,-0.572772,18.0


### Summarize the Top 16 Tweets

In [6]:
tweets = list(top_16["clean_text"])
tweets_text = " ".join(tweets)

tweets_summary = summarization_pipeline(tweets_text)

tweets_summary

[{'summary_text': ' Daily Giveaways MacBook IPhone Pro 500 PayPal gift card and Cash Enter Here Winne Daily Giveaway includes a MacBook Pro 500 gift card . USDC has announced an integration of the ApplePay payment gateway . Emergency SOS via satellite is available today on the iPhone 14 lineup in the US and Canada .'}]

## Save as a New Dataset (with the 'rating' column)

In [7]:
RAW_DATA.to_json("test_data_with_sentiment_and_clusters_and_rating.json", orient='records')

## Save New JSON File with Topic Clusters
This is an example of the data sent to the front-end by the NLP Engine

In [8]:
with open('sample_api_response.json', 'r') as openfile:
    json_object = json.load(openfile)

In [9]:
# Combine the DataFrame and the topic clusters into one JSON file.
# convert dates to strings
RAW_DATA['created_at'] = RAW_DATA['created_at'].astype(str)

# Combine the DataFrame and the topic clusters into one JSON file.
json_data = {
    "tweets": RAW_DATA.to_dict(orient="records"),
    "topics": json_object['topics'],
    "summary": tweets_summary[0]['summary_text']
}

# Writing to sample.json
with open("sample_api_response.json", "w") as outfile:
    outfile.write(json.dumps(json_data))