In [None]:
import requests as rq
from bs4 import BeautifulSoup as bs4
import re
import json
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import csv

In [None]:
def get(url):
    pg = rq.get(url).text
    pg_soup = bs4(pg, "lxml")

    script = pg_soup.find('script', string=re.compile(r'ytInitialData'))
    if script:
        script_text = re.search(r'var ytInitialData = (.*?);', script.string, re.DOTALL)
        if script_text:
            ytInitialData = json.loads(script_text.group(1))
            return ytInitialData
    return None

In [None]:
def find_video_ids(ytInitialData):
    def recursive_search(data, key):
        if isinstance(data, dict):
            for k, v in data.items():
                if k == key:
                    yield v
                else:
                    yield from recursive_search(v, key)
        elif isinstance(data, list):
            for item in data:
                yield from recursive_search(item, key)

    video_ids = list(set(recursive_search(ytInitialData, "videoId")))
    return video_ids

In [None]:
def fetch_metadata(url, itemprop_list):
    try:
        pg = rq.get(url).text
        pg_soup = bs4(pg, "lxml")
        meta_tags = pg_soup.find_all('meta')

        metadata = {}
        for tag in meta_tags:
            itemprop = tag.get('itemprop')
            if itemprop in itemprop_list:
                content = tag.get('content')
                if content:
                    metadata[itemprop] = content

        return metadata
    except Exception as e:
        print(f"Failed to fetch metadata for {url}: {e}")
        return {}

In [None]:
def get_info(ids, itemprop_list):
    urls = ['https://www.youtube.com/watch?v=' + id for id in ids]
    all_metadata = []

    with ThreadPoolExecutor() as executor:
        results = executor.map(lambda url: fetch_metadata(url, itemprop_list), urls)

    all_metadata.extend(results)
    return all_metadata

In [None]:
creator = '@#YOUTUBEUSERNAME'
result = get('https://www.youtube.com/'+creator+'/streams')
urls = find_video_ids(result)
itemprop_list = ['interactionCount', 'datePublished', 'name']
all_info = get_info(urls, itemprop_list)

df = pd.DataFrame(all_info)
df['creator'] = creator

try:
    old_df = pd.read_excel('/content/drive/MyDrive/Tugas/ai/scrapper/youtube_video_metadata_male.xlsx')
    df = pd.concat([old_df, df])
except FileNotFoundError:
    print("create new file...")


df = df[df['interactionCount'] != '0']

# Display the DataFrame
print(df)

# Save to Excel
output_file = '/content/drive/MyDrive/Tugas/ai/scrapper/youtube_video_metadata_male.xlsx'
df.to_excel(output_file, index=False)

print(f"Data has been saved to {output_file}")

create new file...
                                                 name interactionCount  \
0   【MINECRAFT】Build Basement + Exploring again! #...            42188   
1              【GAME NIGHT】MINGGU MALAM BARENG ZONA!!            65728   
2                                 【FREE TALK】OI OI OI            48670   
3                【FREE TALK】200K SUBSCRIBER THANKYOU💜            34372   
5                       【GTA V RP】TEORI BENTARAN #TNF            96352   
6    【MINECRAFT】ANOMALI SMP DIMULAI! #1 | #anomalismp           135443   
7            【WUTHERING WAVES】MARI KITA COBA WUWAAA!!            19921   
8    【MINECRAFT】Ngapain ya hari ini? #4 | #anomalismp            46822   
9        【MORNING TALK】BABLAS DIKIT GA NGARUH... hehe            23201   
10                     【VALORANT】PUSH RANK SAMPAI....            34493   
11        【MORTUARY ASSISTANT】KENAPA HARUS GAME INI..            66758   
12  【A DUSTY TRIP】SURVIVAL BARENG SOL.4CE DI TENGA...            58115   
13          【ROUTE 