In [1]:
!pip install --upgrade yt-dlp




[notice] A new release of pip is available: 23.0.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# Libraaary

The objective here is to gather data about Mongolian youtube channels using the google api discovery package. The data im trying to gather is mostly numerical to make it easier for the app.

In [5]:
import googleapiclient.discovery
import isodate
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
api_service_name = 'youtube'
api_version = 'v3'
DEVELOPER_KEY = 'AIzaSyAiSIVYsl2XUoXseYxzqp6a0nl1UGO9WEE'

In [7]:
youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey = DEVELOPER_KEY)

# 1. Start 

The code below performs a search query for "mongolian content," retrieves the channel IDs from the search results

In [8]:
def get_channels(youtube):
   
    request = youtube.search().list(
        part='snippet', 
        q = 'mongolian content', 
        maxResults = 50,
        type = 'video',
        regionCode = 'MN')
    
    response = request.execute() 
    channel_ids = []
    
    for i in range(len(response['items'])):
        channel_ids.append(response['items'][i]['snippet']['channelId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.search().list(
                part='snippet', 
                q = 'mongolian content', 
                maxResults = 50,
                type = 'video',
                regionCode = 'MN',
                pageToken = next_page_token)
            response = request.execute()
    
            for i in range(len(response['items'])):
                channel_ids.append(response['items'][i]['snippet']['channelId'])
            
            next_page_token = response.get('nextPageToken')
    
    return channel_ids

In [9]:
channel_ids = get_channels(youtube)

In [10]:
len(channel_ids)

600

# 2. Getting Information

Fetches statistics and details for each channel ID

In [11]:
def get_channel_stats(youtube, channel_ids):
    
    all_data = []
    for i in range(0, len(channel_ids), 50):
        request = youtube.channels().list(
                    part='snippet,contentDetails,statistics',
                    id=','.join(channel_ids[i:i+50]))
        response = request.execute() 
    
        for i in range(len(response['items'])):
            data = dict(channel_name = response['items'][i]['snippet']['title'],
                        description = response['items'][i]['snippet']['description'],
                        subscribers = response['items'][i]['statistics']['subscriberCount'],
                        hidden_subs = response['items'][i]['statistics']['hiddenSubscriberCount'],
                        views = response['items'][i]['statistics']['viewCount'],
                        total_videos = response['items'][i]['statistics']['videoCount'],
                        playlist_id = response['items'][i]['contentDetails']['relatedPlaylists']['uploads']
                       )
            all_data.append(data)
    
    return all_data


In [12]:
channel_stats = get_channel_stats(youtube, channel_ids)

In [13]:
channel_data = pd.DataFrame(channel_stats)

In [14]:
channel_data.head()

Unnamed: 0,channel_name,description,subscribers,hidden_subs,views,total_videos,playlist_id
0,gokugym_sportsupplemets,Never give up!,3780,False,249900,80,UU3dDTSVkJvYXSAHr6ydBd5Q
1,Best Ever Food Review Show,I travel to unique parts of the world on the h...,10400000,False,2507745737,658,UUcAd5Np7fO8SeejB1FVKcYw
2,Yore History,Yore History covers historical documentaries i...,48000,False,5188163,83,UUyLWWkYE_7c6cDbzQWgaWYw
3,History Nerds,Who • What • When • Where • Why • Was • How\n\...,265,False,71221,29,UUJsigxjty89nYHWpvNfMwzw
4,Heart Sounds of Mongols,Hello there! We will introduce Mongolian Cultu...,174,False,37583,19,UUHOA9n3ET96qsKGVKQfg9NA


In [15]:
channel_data[['subscribers', 'views', 'total_videos']] = channel_data[['subscribers', 'views', 'total_videos']].apply(pd.to_numeric)

In [16]:
channel_data = channel_data.drop_duplicates()

In [17]:
channel_data  = channel_data.reset_index()

In [18]:
channel_data.to_csv('Chan.csv', index = False, encoding = 'utf-8')

# 3. Vids


The code below will get the video IDs from a specific playlist associated with a channel

In [19]:
def get_video_ids(youtube, playlist_id):
    
    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()
    
    video_ids = []
    
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()
    
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            
            next_page_token = response.get('nextPageToken')
        
    return video_ids

# 4. Stats

Gets the statistics and information for each video. It specifically gets the video details like title, description, views, likes, comments from each gained API

In [20]:
def get_video_details(youtube, video_ids):
    
    all_video_stats = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
                    part='snippet,id,contentDetails,statistics',
                    id=','.join(video_ids[i:i+50]))
        response = request.execute()
        
        for video in response['items']:
            video_stats = dict(channel_name = video['snippet']['channelTitle'],
                               title = video['snippet']['title'],
                               tags = video['snippet'].get('tags', 0),
                               description = video['snippet']['description'],
                               published_date = video['snippet']['publishedAt'],
                               id = video['id'],
                               duration = video['contentDetails']['duration'],
                               views = video['statistics'].get('viewCount'),
                               likes = video['statistics'].get('likeCount', 0),
                               dislike = video['statistics'].get('dislikeCount', 0),
                               favorites = video['statistics']['favoriteCount'], 
                               comments = video['statistics'].get('commentCount', 0),
                               definition = video['contentDetails']['definition'],
                               caption = video['contentDetails']['caption'],
                               )
            all_video_stats.append(video_stats)
    return all_video_stats

# 5.  Comments

This code gathers the top 10 comments from each vid. Didnt end up getting much use out of it.

In [21]:
def get_comment(youtube, video_ids):
    
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            print('Could not get comments for video ' + video_id)
        
        
    return all_comments  

# 6. Gather into data Frames

In [22]:
video_df = pd.DataFrame()
comments_df = pd.DataFrame()

for c in channel_data['channel_name'].unique():
    print("Getting video information from channel: " + c)
    playlist_id = channel_data.loc[channel_data['channel_name']== c, 'playlist_id'].iloc[0]
    video_ids = get_video_ids(youtube, playlist_id)
    video_data = get_video_details(youtube, video_ids)
    video_df = video_df.append(video_data, ignore_index=True)

Getting video information from channel: gokugym_sportsupplemets


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Best Ever Food Review Show


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Yore History


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: History Nerds


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Heart Sounds of Mongols


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: History of Everything Podcast


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: WORLDSTARHIPHOP


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: bayaraa video room


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Boomerang Mongolia


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: KOCOWA TV


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Pack a Bag


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Real Mongolia


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Zolboo TV


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Tugo OneSixteen


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: CrashCourse


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The Life Guide


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: More Best Ever Food Review Show


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Tasting History with Max Miller


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: VADDA MAGAZINE


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Food Network


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Kings and Generals


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Great Big Story


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Mongolian channel


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: gus1thego


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Visual Data


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Odbayar Dēmos


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Francois Marc


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: 2B entertainment


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: JuLingo


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: K-contents Voyage


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Joy


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: National Geographic


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Marion's Kitchen


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Relaxation Film


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Hachapuri


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Toonot Records


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Luke Martin


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: VolumePlusTV


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: SLICE


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: ARTGER


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: MrBrynnorth


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Tumee


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Mongolian Funny videos


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Suibhne


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: FoodJunkies


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: kathe kuni


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Mongolian Nomad Family


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Fe Doro


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: GoPhrazy


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Gangster Gang


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: THEREALGSNEWS


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: iMRewth


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: School of Culture, History & Language ANU


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: HB Kennel


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Dilz Kitchen


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Jack, Bo & Dad


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Radio Free Asia


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Chronicles & Classics


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Dr.Kayforkids


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: CCTV Video News Agency


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Praba & Sujee's Kitchen


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Mongolian Language \ Nomiin Ger School


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Center. Yummy


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Mongol Hel


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Julia


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Sxcoal


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: MyIndie Productions


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Daily Motivate Clip


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: MNB WORLD


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: WiseSaying


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: China Icons


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: US Military Moments 4


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Canadian reviewer


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Premier Sports Network


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: NOMAAD TRAVEL


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Caters Video


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: JRE-Shorts


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Wisdom verse


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: CONTENT DELETED


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Matt Reynecke 


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The Past Unboxed


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Metafood


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Cook With Leo


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Inertia Tv


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Wise Traditions


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The Warrior's Chronicles


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Pfft Khaganate


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: JeromeTronic


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: think about it...


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: freedomsherald


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Travelling Welshman


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: FoodExpert


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Nomadic Mongolian


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Coffeehouse Crime


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Master Chef


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Digg


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: War Fighters


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Mr.Foodie


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: SANETER STUDIOS


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Armament Facts


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The Content...!!!


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Wise Quotes


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Nomadic Seekers


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: StoryTrender


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Baagii Official


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Daily dose of clever


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Global Defense


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: ClipContent


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: SHU GAMEPLAY


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Ariunaa


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Dub Soldier


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: NoodlesandPasta


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: GOBI Cashmere


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Chocolate_ Emojii


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Ihsan Dincer


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Tamara Kaye


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: MongolPeace


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: MizzimaTV


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Misschic ASMR


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: BIG FIRE


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Meals with Moses


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Pilps


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Enkherdene D


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: MGL123


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Noba's Cast


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: BazukaRage


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Fact Thinker


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The Ultimate HipHop Trap Music


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Thusonit HEALTH & FITNESS Guide


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Podtastic Entertainment


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: 5 Minute Eats


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Mythical Sounds


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: US Military Moments 6


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Quotes & Motivation


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The Classical Mongolian Script


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: S N Films


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: 3 Musketeers


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: shawneydepp


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: GUTOM


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Alquatica


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Captain Max - inside aviation


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Momma Prime


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Danjori


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Kamerad Anthem


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: HTOFU Research and Production


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Success Documentary


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Amateur Hour With Chef Tyler


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Urban Dictionary


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: passion travel


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Marks Home Kitchen


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Made With Lau


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: grubwitus


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: iRaap OnPoint


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Nutrition in Food


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Simple Wild Living


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Zoulen Sanaa


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The Strangerous


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: TAPA: Trinity Academy for the Performing Arts


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Uuganaa from mongolia


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: The US Quotes


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: ALL IN ONE ROSHAN MAGAR


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Quotesfy


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Rare Facts World


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Yo. Tuul


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Orgil B


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Dinner By Dennis


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Hawladar 5


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Dota Clips


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: BalkothWarcraft


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Munchies & More


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: BeastyqtSC2


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: HellsenStrat


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Success Quotation & Motivation


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: CJ Eats


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Video Game Content Archive


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Just Quotes


  video_df = video_df.append(video_data, ignore_index=True)


Getting video information from channel: Caters Clips


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/playlistItems?part=contentDetails&playlistId=UUcR4cyQyAF-guvk7Wm5Lbqg&maxResults=50&pageToken=EAAaJVBUOkNOSWZJaEEyUVRNek9FSkNRamMzTWtKQ09USkdLQUZRQVE&key=AIzaSyCpe4xFpaHG9PAf1NgMuf25sixxMz9BL38&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [23]:
video_df.to_csv('video_details1.csv', index = False, encoding = 'utf-8')

In [24]:
comments_df.to_csv('comments_data1.csv', index = False, encoding = 'utf-8')

In [25]:
len(video_df)

177556

In [26]:
video_df.head()

Unnamed: 0,channel_name,title,tags,description,published_date,id,duration,views,likes,dislike,favorites,comments,definition,caption
0,gokugym_sportsupplemets,#Part6 Мэргэжлээ буруу сонгосон Дунгаамаа багш...,0,,2022-11-12T05:12:40Z,W448Mui9I24,PT1M1S,397,15,0,0,0,hd,False
1,gokugym_sportsupplemets,Цээжний дээд хэсэг хөгжүүлдэг дасгал налуу дэ...,0,Та нарт иймэрхүү бичлэг таалагдаж байвал #Like...,2022-11-09T14:18:04Z,zDvJSqDi6f8,PT2M20S,3607,79,0,0,4,hd,False
2,gokugym_sportsupplemets,Gym-н андууд сэтгэлээсээ байдгымаа 😎😂 #gokugy...,0,,2022-11-05T13:37:56Z,Jqjf1EMBq0U,PT34S,458,23,0,0,2,hd,False
3,gokugym_sportsupplemets,#Part5 Мэргэжлээ буруу сонгосон Дунгаамаа багш...,0,,2022-11-04T12:10:57Z,CYJpv1Ak4pE,PT46S,549,21,0,0,0,hd,False
4,gokugym_sportsupplemets,3 listopada 2022,0,,2022-11-03T11:44:25Z,Y7W4m99SwEg,PT11S,505,9,0,0,0,hd,False


In [27]:
video_df['published_date'] = pd.to_datetime(video_df['published_date']).dt.tz_localize(None)

In [28]:
video_df[['views', 'likes', 'favorites', 'comments']] = video_df[['views', 'likes', 'favorites', 'comments']].apply(pd.to_numeric)

In [29]:
video_df['engagement'] = video_df['likes'] + video_df['comments']

In [30]:
video_df['month'] = pd.to_datetime(video_df['published_date']).dt.strftime('%b')

In [31]:
total_seconds = [] 
for i in video_df['duration']:
    dur = isodate.parse_duration(i)
    total_seconds.append(dur.total_seconds())

In [32]:
video_df['total_seconds'] = total_seconds

In [33]:
video_df['total_seconds'] = video_df['total_seconds'].astype('int64')

In [34]:
video_df.head()

Unnamed: 0,channel_name,title,tags,description,published_date,id,duration,views,likes,dislike,favorites,comments,definition,caption,engagement,month,total_seconds
0,gokugym_sportsupplemets,#Part6 Мэргэжлээ буруу сонгосон Дунгаамаа багш...,0,,2022-11-12 05:12:40,W448Mui9I24,PT1M1S,397.0,15,0,0,0,hd,False,15,Nov,61
1,gokugym_sportsupplemets,Цээжний дээд хэсэг хөгжүүлдэг дасгал налуу дэ...,0,Та нарт иймэрхүү бичлэг таалагдаж байвал #Like...,2022-11-09 14:18:04,zDvJSqDi6f8,PT2M20S,3607.0,79,0,0,4,hd,False,83,Nov,140
2,gokugym_sportsupplemets,Gym-н андууд сэтгэлээсээ байдгымаа 😎😂 #gokugy...,0,,2022-11-05 13:37:56,Jqjf1EMBq0U,PT34S,458.0,23,0,0,2,hd,False,25,Nov,34
3,gokugym_sportsupplemets,#Part5 Мэргэжлээ буруу сонгосон Дунгаамаа багш...,0,,2022-11-04 12:10:57,CYJpv1Ak4pE,PT46S,549.0,21,0,0,0,hd,False,21,Nov,46
4,gokugym_sportsupplemets,3 listopada 2022,0,,2022-11-03 11:44:25,Y7W4m99SwEg,PT11S,505.0,9,0,0,0,hd,False,9,Nov,11


In [35]:
video_df.to_csv('Vid Stat.csv', index = False, encoding = 'utf-8')

# * This was my initial attempt at gathering

Ended with my pc black screening.

In [3]:
@lru_cache(None)
def get_channel_info(channel_id: str) -> dict:

    channel_tab_url = f'https://www.youtube.com/{channel_id}/videos'

    ydl_opts = {}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(channel_tab_url, download=False)
        data = ydl.sanitize_info(info)
    
    def get_total(key: str) -> int:
        return sum(
            x[key]
            for x in data["entries"]
            if key in x and x[key] is not None)
    
    def get_number_occurances(key: str) -> int:
        return sum(
            int(len(x[key]) > 0)
            for x in data["entries"]
            if key in x and x[key] is not None)
        
    return {
        "id": data["id"],
        "channel_url": data["channel_url"],
        "title": data["title"],
        "number_of_videos": len(data["entries"]),
        "total_views": get_total("view_count"),
        "total_likes": get_total("like_count"),
        "total_comments": get_total("comment_count"),
    }

In [4]:
channels = [
    "@TheHU",
    "@Tushig",
    "@ARTGER",
    "@RedBurger",
    "@Maamuu",
    "@EdutainmentTV",
    "@PositiveMongolians",
    "@YZEntertainment",
    "@TsustSuvag",
    "@AM-C",
    "@TheWasabies",
    "@Mendy&Melison",
    "@BoomerangMongolia",
    "@KinoZadlan",
    "@ERDENE",
    "@MANDUULERO",
    "@NTVOfficialChannel",
    "@MunhZul",
    "@RubysChannel",
    "@ApoxTV",
    "@Pacrap",
    "@HanuComedian",
    "@FunTimeKiller",
    "@KhulanNemekhbayar",
    "@YourAshe",
    "@TestwithSanjaa",
    "@TechNomadGaming",
    "@TV5Mongolia",
    "@REAL",
    "@Newsac",
    "@MOLBOYZofficial",
    "@BERXEE",
    "@2016ENTERTAINMENT",
    "@ЧИМЭГЛЭН",
    "@Unitel",
    "@Lumino",
    "@Goodmom",
    "@MongolianNationalBroadcaster",
    "@TsatsralDalaijargal",
    "@HYPCHANNEL",
    "@BUIKASHITV",
    "@COLIZEUMMongolia",
    "@TOMtolgoit",
    "@NAAGII",
    "@XMF-XyypMusicFestival",
    "@BeSTstudioofficial",
    "@Munhluu",
    "@GanaasChannel",
    "@BayarmagnaiMask",
    "@princensadecozinha",
]

In [5]:
info_list = []
for channel_id in channels:
    info_list.append(get_channel_info(channel_id))
info_df = pd.DataFrame(info_list)

[youtube:tab] Extracting URL: https://www.youtube.com/@TheHU/videos
[youtube:tab] @TheHU/videos: Downloading webpage
[download] Downloading playlist: Christopher Hu - Videos
[youtube:tab] Playlist Christopher Hu - Videos: Downloading 18 items of 18
[download] Downloading item 1 of 18
[youtube] Extracting URL: https://www.youtube.com/watch?v=KLEKCdP51PE
[youtube] KLEKCdP51PE: Downloading webpage
[youtube] KLEKCdP51PE: Downloading ios player API JSON
[youtube] KLEKCdP51PE: Downloading android player API JSON
[youtube] KLEKCdP51PE: Downloading m3u8 information
[download] Downloading item 2 of 18
[youtube] Extracting URL: https://www.youtube.com/watch?v=OQeh4Qvvd2U
[youtube] OQeh4Qvvd2U: Downloading webpage
[youtube] OQeh4Qvvd2U: Downloading ios player API JSON
[youtube] OQeh4Qvvd2U: Downloading android player API JSON
[youtube] OQeh4Qvvd2U: Downloading m3u8 information
[download] Downloading item 3 of 18
[youtube] Extracting URL: https://www.youtube.com/watch?v=nCqP8CN9X8s
[youtube] nCqP8

ERROR: [youtube:tab] @Tushig: This channel does not have a videos tab


DownloadError: ERROR: [youtube:tab] @Tushig: This channel does not have a videos tab

In [None]:
info_df["total_duration_hour"] = info_df.total_duration_sec / 3600
info_df["views_per_video"] = info_df.total_views / info_df.number_of_videos

info_df.title = info_df.title.str.replace("- Videos", "")

In [None]:
print("Total: ~{} Hours".format(int(info_df.total_duration_sec.sum() / 60 / 60)))
print("Today's date:", date.today())

_show_cols = ["title", "number_of_videos", "total_duration_hour", "total_views", "total_likes", "total_comments", "views_per_video"]

info_df.sort_values("views_per_video", ascending=False)[_show_cols].style.format({
    "total_duration_hour": "{:.1f}",
    "total_views": "{:,d}",
    "total_likes": "{:,d}",
    "total_comments": "{:,d}",
    "views_per_video": "{:,.1f}",
}).hide_index()

In [None]:
info_list = []
for channel_id in channels:
    info_list.append(get_channel_info(channel_id))

info_df = pd.DataFrame(info_list)
info_df.to_csv('youtube_channel_info.csv', index=False)