In [13]:
import pandas as pd
from tqdm import tqdm
import os
from src.sentiment_analysis import SentimentAnalysis
import time

In [14]:
file_path = "data/climate_videos.jsonl"
df = pd.read_json(file_path, lines=True)
data = df.copy()
data.set_index('display_id', inplace=True)
titles = data['title'] # .iloc[:3000]

In [15]:
file_name = 'data/video_title_sentiment_analysis.csv'

def save_new_title_sentiments(sentiment_res: pd.DataFrame):
    if not os.path.isfile(file_name):
        sentiment_res.to_csv(file_name)
    else:
        old_data = pd.read_csv(file_name, index_col=0)
        new_data = pd.concat([old_data, sentiment_res])
        new_data.to_csv(file_name)

# Perform sentiment analysis on titles

In [16]:
sa = SentimentAnalysis()
sentiment_res = pd.DataFrame(columns=sa.sentiment_names)

batch_size = 1000
last_iter = 66 + 31
for i in tqdm(range(last_iter * batch_size, len(titles), batch_size), desc='Sentiment Analysis'):
    batch = titles.iloc[i:i+batch_size]
    sentiment_res[sa.sentiment_names] = batch.apply(sa.get_scores).apply(pd.Series)
    save_new_title_sentiments(sentiment_res)
    sentiment_res = pd.DataFrame(columns=sa.sentiment_names)    

Sentiment Analysis: 100%|██████████| 80/80 [1:01:34<00:00, 46.18s/it]


# Analyze sentiment results

In [17]:
sentiment_res = pd.read_csv(file_name, index_col=0)
sentiment_res.head()

Unnamed: 0_level_0,positive,neutral,negative
display_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XPSYzLZ7xKU,0.697066,0.298655,0.004279
8riq9piAdiE,0.093576,0.765058,0.141366
yeVLjOTThEM,0.014018,0.151829,0.834153
-fNCUzVRcL4,0.0392,0.488308,0.472492
-3YhFNs1XNY,0.038703,0.599564,0.361734


# Sanity check

In [18]:
for label in SentimentAnalysis.sentiment_names:
    indexes = sentiment_res[label].sort_values(ascending=False).head(10)
    print(label)
    print(data.loc[indexes.index]['title'])
    print('\n')

positive
display_id
mhK6AbNowLA                  Exciting updates! + bonus love read
mPmW04toHIs    I'm SO Proud of the Climate Change Walkout Kid...
cMj3INm3kCc    Happy Birthday is free at last! Song belongs t...
TSGAXRk0XnM    We love the Kelp Forest at the Monterey Bay Aq...
7K7JlxPAGi8       WELCOME TO SEPTEMBER!  THANK YOU FILM PATRONS!
JDGgCLr3wYE          This is the greatest year in world history.
X-pNSLaSl0I             Things I'm Currently Loving!  FAVORITES!
hVEkgF7BBMg             The most beautiful day on earth 8-28-17.
m7uy7QBMS1k    This is an awesome daddy daughter moment durin...
qtNUkON082A    I love LA.  Los Angles new  Feed In Tariff.  Wow.
Name: title, dtype: object


neutral
display_id
oj6OU-TRVYE    Extreme Heat to Hit Phoenix Arizona on Friday ...
en93y_-55cA    US Congress woman Tulsi Gabbard interacts at G...
MbulPJYeKOQ    Delhi to conduct second round of odd-even from...
4qsfahbWqxw    KHOU 11 News Top Headlines at 5 a.m. Friday, A...
4EFiZLicMQ8    Alex an

In [19]:
sentiment_res['title'] = data['title']
sentiment_res

Unnamed: 0_level_0,positive,neutral,negative,title
display_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
XPSYzLZ7xKU,0.697066,0.298655,0.004279,Transforming Air Into Pure Drinking Water Is F...
8riq9piAdiE,0.093576,0.765058,0.141366,Could Injecting Carbon Into the Earth Save Our...
yeVLjOTThEM,0.014018,0.151829,0.834153,The Amazon Rainforest Doesn’t Produce 20% of O...
-fNCUzVRcL4,0.039200,0.488308,0.472492,"Glaciers Are Collapsing, Here's Why We Need a ..."
-3YhFNs1XNY,0.038703,0.599564,0.361734,Why Scientists Are Exploring Earth’s Dangerous...
...,...,...,...,...
66ltl9zJ4PM,0.032409,0.872051,0.095540,Shri Prakash Javadekar's reply on the Situatio...
KTDp4-1BErQ,0.037866,0.816757,0.145378,Shri Prakash Javadekar moves the Situation ari...
zyOT2R9TIVw,0.081865,0.894380,0.023755,"Metro extended to 10 cities, Waterways operati..."
ZgFdG7CA2Bs,0.065559,0.657118,0.277322,India's contribution to global warming has bee...
