In [1]:
import pandas as pd
from tqdm import tqdm
import os
from src.sentiment_analysis import SentimentAnalysis
import time

In [2]:
file_path = "data/climate_videos.jsonl"
df = pd.read_json(file_path, lines=True)
data = df.copy()
data.set_index('display_id', inplace=True)
titles = data['title'].iloc[:3000]

In [3]:
file_name = 'data/video_title_sentiment_analysis.csv'

def save_new_title_sentiments(sentiment_res: pd.DataFrame):
    if not os.path.isfile(file_name):
        sentiment_res.to_csv(file_name)
    else:
        old_data = pd.read_csv(file_name, index_col=0)
        new_data = pd.concat([old_data, sentiment_res])
        new_data.to_csv(file_name)

# Perform sentiment analysis on titles

In [4]:
sa = SentimentAnalysis()
sentiment_res = pd.DataFrame(columns=sa.sentiment_names)

batch_size = 1000
last_iter = 0
for i in tqdm(range(last_iter * batch_size, len(titles), batch_size), desc='Sentiment Analysis'):
    batch = titles.iloc[i:i+batch_size]
    sentiment_res[sa.sentiment_names] = batch.apply(sa.get_scores).apply(pd.Series)
    save_new_title_sentiments(sentiment_res)
    sentiment_res = pd.DataFrame(columns=sa.sentiment_names)    

Sentiment Analysis: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [02:04<00:00, 41.43s/it]


# Analyze sentiment results

In [5]:
sentiment_res = pd.read_csv(file_name, index_col=0)
sentiment_res.head()

Unnamed: 0_level_0,positive,neutral,negative
display_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XPSYzLZ7xKU,0.722696,0.152849,0.124455
8riq9piAdiE,0.488949,0.181014,0.330037
yeVLjOTThEM,0.285087,0.257673,0.45724
-fNCUzVRcL4,0.416952,0.178746,0.404303
-3YhFNs1XNY,0.230002,0.170134,0.599864


# Sanity check

In [8]:
for label in SentimentAnalysis.sentiment_names:
    indexes = sentiment_res[label].sort_values(ascending=False).head(10)
    print(label)
    print(data.loc[indexes.index]['title'])
    print('\n')

positive
display_id
pes8_eAAmpw    It's The Best Time In History To Be Alive and ...
Udm2Uz64G9Y                                    Thank you, Emily!
FCmVP-ionLo                  Jordan Peterson: I LOVE Rock Music!
V4ZCSEhMgbk        ROBLOX SURVIVING THE BIGGEST TORNADO EVER!! ðŸŒª
Udm2Uz64G9Y                                    Thank you, Emily!
pes8_eAAmpw    It's The Best Time In History To Be Alive and ...
bmKM2_mK8qQ     Pope Francis receives jubilant welcome in Maputo
giUuitZd_Ww    Bon Appetit Resident Evil 2 Remake Trophy / Ac...
Mb_-rhZzX74                3 Amazing Things To Do After You Die!
jXkGAerJFfg    Gotcha! Resident Evil 2 Remake Trophy / Achiev...
Name: title, dtype: object


neutral
display_id
vaPRpbyY-xk    Video Interview: Max Entman Interviews Joseph ...
Sn16VcrGktg          Tropical Storm Pabuk Hits Northern Thailand
1y9e-ArgLHc     The Banyan Tree Leadership Forum with Wayne Swan
sepkwzt4Ay8    Decking/Sheeting for Patio Roof Construction  ...
lVfo4daWff4    Wind

In [7]:
sentiment_res['title'] = data['title']
sentiment_res

Unnamed: 0_level_0,positive,neutral,negative,title
display_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
XPSYzLZ7xKU,0.722696,0.152849,0.124455,Transforming Air Into Pure Drinking Water Is F...
8riq9piAdiE,0.488949,0.181014,0.330037,Could Injecting Carbon Into the Earth Save Our...
yeVLjOTThEM,0.285087,0.257673,0.457240,The Amazon Rainforest Doesnâ€™t Produce 20% of O...
-fNCUzVRcL4,0.416952,0.178746,0.404303,"Glaciers Are Collapsing, Here's Why We Need a ..."
-3YhFNs1XNY,0.230002,0.170134,0.599864,Why Scientists Are Exploring Earthâ€™s Dangerous...
...,...,...,...,...
TtjcELcNH1E,0.065040,0.760044,0.174916,Accelerating Sea-Level Rise with Increase in E...
vvJ412PxHR0,0.021138,0.374240,0.604621,Arctic-Sea Ice Collapse: Greenland Vulnerabili...
CexQmFFtoTw,0.042206,0.629392,0.328403,Greenland Vulnerability to BOE and Acceleratin...
s4AczmNQByY,0.033923,0.498165,0.467912,Arctic Wildfires and Peat Fires Darken Arctic ...
