In [2]:
import pandas as pd
from tqdm import tqdm

from src.models.llm_call_helpers import *

# Load data

Load the videos of interest = videos around declines for each channel. Each video comes with its channel, upload week, title and description.

In [3]:
videos = pd.read_csv('data/bb_videos_title_around_declines1.csv', index_col=(0))

videos

Unnamed: 0_level_0,channel_id,week,title
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12471,UCzVw9odnihM5PgKSv5UnDPA,247,Most Funny Babies and Kids Playing in Water - ...
12472,UCzVw9odnihM5PgKSv5UnDPA,246,Kids and Babies Trying to do Exercises Funny...
12473,UCzVw9odnihM5PgKSv5UnDPA,246,Cute and Adorable Moments of kids meeting newb...
12474,UCzVw9odnihM5PgKSv5UnDPA,245,Cute Baby and His Daughter playing and Laughing
12475,UCzVw9odnihM5PgKSv5UnDPA,244,Kids and Babies Meeting Animals in Village and...
...,...,...,...
72787994,UCs06q9pyRn8d8xy0NgzKPrA,216,Tay K Allegedly Started a new gang in Jail cal...
72787995,UCs06q9pyRn8d8xy0NgzKPrA,216,YNW Melly being investigated for a THIRD Murde...
72787996,UCs06q9pyRn8d8xy0NgzKPrA,215,Face To Face: Lil Reese x DJ Akademiks: Talks ...
72901188,UCrwmu-gceGOmtZeuTsn7DlQ,193,The Black Eyed Peas - BIG LOVE


# Apply LLM functions

For each video, apply the LLM functions to extract the following features:
- whether the video is an apology video
- whether the video addresses the decline
- whether the video announces a comeback
- whether the video announces a break
- whether the video is a collaboration video
- whether the video is a clickbait video

In [3]:
# apply the LLM model to the videos
tqdm.pandas()

videos['featuring'] = videos['title'].progress_apply(lambda x: featuring_another_creator(x))

100%|██████████| 107617/107617 [8:55:53<00:00,  3.35it/s]  


In [4]:
videos

Unnamed: 0,channel_id,week,title,featuring
0,UCzVw9odnihM5PgKSv5UnDPA,247,Most Funny Babies and Kids Playing in Water - ...,False
1,UCzVw9odnihM5PgKSv5UnDPA,246,Kids and Babies Trying to do Exercises Funny...,False
2,UCzVw9odnihM5PgKSv5UnDPA,246,Cute and Adorable Moments of kids meeting newb...,False
3,UCzVw9odnihM5PgKSv5UnDPA,245,Cute Baby and His Daughter playing and Laughing,False
4,UCzVw9odnihM5PgKSv5UnDPA,244,Kids and Babies Meeting Animals in Village and...,False
...,...,...,...,...
107612,UCs06q9pyRn8d8xy0NgzKPrA,216,Tay K Allegedly Started a new gang in Jail cal...,False
107613,UCs06q9pyRn8d8xy0NgzKPrA,216,YNW Melly being investigated for a THIRD Murde...,False
107614,UCs06q9pyRn8d8xy0NgzKPrA,215,Face To Face: Lil Reese x DJ Akademiks: Talks ...,True
107615,UCrwmu-gceGOmtZeuTsn7DlQ,193,The Black Eyed Peas - BIG LOVE,False


In [5]:
videos['featuring'].value_counts()

featuring
False    103463
True       4150
I             1
I'm           1
Not           1
To            1
Name: count, dtype: int64

In [6]:
# print the index of the videos that are apologies
apologies = videos[videos['featuring'] == 'True']
apologies

Unnamed: 0,channel_id,week,title,featuring
132,UCzUV5283-l5c0oKRtyenj6Q,236,Jim Acosta Meets Mark Dice (Again),True
217,UCzT17-Lvc5L_gIT10JQsjSA,222,Buying YouTuber Merch and DIYing It With LaurD...,True
218,UCzT17-Lvc5L_gIT10JQsjSA,222,Doing Alisha Marie's Makeup for Coachella!,True
220,UCzT17-Lvc5L_gIT10JQsjSA,221,Adelaine x Tarte Reveal,True
230,UCzT17-Lvc5L_gIT10JQsjSA,162,I Copied CloeCouture's Instagram for a Week,True
...,...,...,...,...
107521,UCs11Yv5OnpY_b3Bff5ZJ9kw,191,BOYFRIEND TAG! FT Jordan Beau | Jordyn Jones,True
107526,UCs11Yv5OnpY_b3Bff5ZJ9kw,188,NEVER HAVE I EVER FT JORDAN BEAU | Jordyn Jones,True
107534,UCs11Yv5OnpY_b3Bff5ZJ9kw,178,Jordyn Jones l New by Daya l BEHIND THE SCENES,True
107537,UCs11Yv5OnpY_b3Bff5ZJ9kw,173,No Tears Left to Cry by Ariana Grande l Cover ...,True


In [7]:
# Basic stats on the featuring
print('Number of featuring:', len(apologies))
print('Number of videos:', len(videos))
print('Percentage of featuring:', len(apologies) / len(videos))

Number of featuring: 4150
Number of videos: 107617
Percentage of featuring: 0.03856268061737458


In [None]:
videos.to_csv('data/bb_videos_featuring.csv')