In [1]:
import requests
import random
import json
import re
import datetime as dt
from bs4 import BeautifulSoup
from urllib.parse import unquote
from headers import headers_list

## Channel info

In [2]:
def get_channel_info(channel_id):
    base_url = f'https://www.youtube.com/channel/{channel_id}/about'
    page = requests.get(base_url, headers=random.choice(headers_list))
    if page.status_code != 200:
        print(page, page.reason)
        return
    soup = BeautifulSoup(page.content, 'html.parser')
    json_text = str(soup.find_all('script')).split('var ytInitialData = ')[-1].split(';</script>')[0]
    res = json.loads(json_text)
    # Get content
    content = None
    for con in res['contents']['twoColumnBrowseResultsRenderer']['tabs']:
        if 'tabRenderer' not in con:
            continue
        if 'content' in con['tabRenderer']:
            content = con['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']\
                ['contents'][0]['channelAboutFullMetadataRenderer']
    if content is None:
        return
    return {
        'id': content['channelId'],
        'title': get_simple_text(content, 'title'),
        'description': get_simple_text(content, 'description'),
        'country': get_simple_text(content, 'country'),
        'url': content['canonicalChannelUrl'],
        'join_date': get_join_date(content),
        'view_count': get_view_count(content),
        'links': get_links(content)
    }


def get_simple_text(content, info):
    try:
        return content[info]['simpleText']
    except:
        return None

def get_view_count(content):
    try:
        view_count = content['viewCountText']['simpleText']
        view_count = view_count.split()[0].replace(',', '')
        return int(view_count)
    except:
        return None

def get_join_date(content):
    try:
        join_date = content['joinedDateText']['runs'][-1]['text']
        return dt.datetime.strptime(join_date, '%d %b %Y').strftime('%Y-%m-%d')
    except:
        return None

def get_links(content):
    links = {}
    for con in content['primaryLinks']:
        try:
            title = con['title']['simpleText']
            url = con['navigationEndpoint']['urlEndpoint']['url']
            url = unquote(url).split('q=')[-1]
            links[title] = url
        except:
            pass
    return links

## Channel videos

In [12]:
def get_channel_videos(channel_id):
    base_url = f'https://www.youtube.com/channel/{channel_id}/videos?view=0&sort=p&flow=grid'
    page = requests.get(base_url, headers=random.choice(headers_list))
    if page.status_code != 200:
        print(page, page.reason)
        return
    soup = BeautifulSoup(page.content, 'html.parser')
    json_text = str(soup.find_all('script')).split('var ytInitialData = ')[-1].split(';</script>')[0]
    res = json.loads(json_text)
    tabs = res['contents']['twoColumnBrowseResultsRenderer']['tabs']
    contents = tabs[1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']\
        ['contents'][0]['gridRenderer']['items']
    videos = []
    for content in contents:
        if 'gridVideoRenderer' not in content:
            continue
        content = content['gridVideoRenderer']
        videos.append({
            'video_id': get_video_id(content),
            'title': get_title(content),
            'published_time': get_published_time(content),
            'view_count': get_view_count(content),
            'url': get_url(content),
            'length': get_length(content),
        })
    return videos


def get_video_id(content):
    try:
        return content['videoId']
    except:
        return None

def get_title(content):
    try:
        return content['title']['runs'][0]['text']
    except:
        return None

def get_published_time(content):
    try:
        return content['publishedTimeText']['simpleText']
    except:
        return None
    
def get_view_count(content):
    try:
        view_count = content['viewCountText']['simpleText']
        view_count = view_count.split(' views')[0].replace(',', '')
        return int(view_count)
    except:
        return None
    
def get_url(content):
    try:
        url = content['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
        return 'https://www.youtube.com' + url
    except:
        return None

def get_length(content):
    try:
        return content['thumbnailOverlays'][0]['thumbnailOverlayTimeStatusRenderer']['text']['simpleText']
    except:
        return None

## Examples

In [6]:
get_channel_info('UCBwmMxybNva6P_5VmxjzwqA')

{'id': 'UCBwmMxybNva6P_5VmxjzwqA',
 'title': 'Apna College',
 'description': 'Hey guys, welcome to this new channel - Apna College. Here Shradha Didi and I(your Aman Bhaiya) will help you in finding your right college, career options, soft skills and will also help you learn to code. \nChalo Phodte hain!\nFeel free to contact Shradha Didi for Seminars, Hackathons & Collaborations at the given email id below.\n\n\n',
 'country': None,
 'url': 'http://www.youtube.com/c/ApnaCollegeOfficial',
 'join_date': None,
 'view_count': 179100918,
 'links': {'Facebook': 'https://www.facebook.com/amandhattarwal',
  'Twitter': 'https://www.twitter.com/amandhattarwal',
  'Instagram': 'https://www.instagram.com/dhattarwalaman'}}

In [13]:
get_channel_videos('UCBwmMxybNva6P_5VmxjzwqA')

[{'video_id': 'VVvVxMOrh14',
  'title': 'Ab India seekhega Coding ❤️',
  'published_time': '1 month ago',
  'view_count': 4772309,
  'url': 'https://www.youtube.com/shorts/VVvVxMOrh14',
  'length': 'SHORTS'},
 {'video_id': 'z9bZufPHFLU',
  'title': '1. Introduction to C++ | Data Structures and Algorithms | College Placement Course | Lecture 1',
  'published_time': '1 year ago',
  'view_count': 4532608,
  'url': 'https://www.youtube.com/watch?v=z9bZufPHFLU',
  'length': '26:03'},
 {'video_id': 'hIiz8Km2tpo',
  'title': '5 Tricks of Google Search',
  'published_time': '2 months ago',
  'view_count': 4233174,
  'url': 'https://www.youtube.com/shorts/hIiz8Km2tpo',
  'length': 'SHORTS'},
 {'video_id': '_N6j8HXPt2U',
  'title': '4 Years of Coding in 4 Minutes - A Short Movie',
  'published_time': '4 months ago',
  'view_count': 3823814,
  'url': 'https://www.youtube.com/watch?v=_N6j8HXPt2U',
  'length': '3:49'},
 {'video_id': 'k2oGoqxRLaw',
  'title': 'Watch this before buying Laptop | Best 

In [8]:
get_channel_info('UCLpovxJVLBZrXJGCymB6LYw')

{'id': 'UCLpovxJVLBZrXJGCymB6LYw',
 'title': 'Namanh Kapur',
 'description': "Hi! 👋 Welcome to my channel — I'm a 23 year old, recent graduate working at a high growth startup! Follow along for insights into the tech industry, all things startup, and what to expect as a recent-grad remote software engineer. I also dabble in sketch-comedy and investing. Thanks for being here!\n \nMilestones:\n[2022]\nJan 05 -- 0 subs (Video #1)\nFeb 07 -- 100 subs\nFeb 25 -- 500 subs\nMar 22 -- 1,000 subs\nMar 30 -- 5,000 subs\nApr 01 -- 10,000 subs\nApr 23 -- 25,000 subs\n",
 'country': 'United States',
 'url': 'http://www.youtube.com/c/NamanhKapur',
 'join_date': None,
 'view_count': 2323475,
 'links': {'namanhkapur.com': 'namanhkapur.com'}}

In [14]:
get_channel_videos('UCLpovxJVLBZrXJGCymB6LYw')

[{'video_id': 'k9WqpQp8VSU',
  'title': 'How I Would Learn To Code (If I Could Start Over)',
  'published_time': '1 month ago',
  'view_count': 1032465,
  'url': 'https://www.youtube.com/watch?v=k9WqpQp8VSU',
  'length': '13:43'},
 {'video_id': 'dHN_tlBEt2c',
  'title': "I'm Not The Best Programmer",
  'published_time': '2 months ago',
  'view_count': 552271,
  'url': 'https://www.youtube.com/watch?v=dHN_tlBEt2c',
  'length': '8:52'},
 {'video_id': 'TURmIFBcgVY',
  'title': 'A Day in the Life of a Software Engineer in San Francisco',
  'published_time': '2 months ago',
  'view_count': 170153,
  'url': 'https://www.youtube.com/watch?v=TURmIFBcgVY',
  'length': '5:55'},
 {'video_id': 'bu1blwFrk70',
  'title': 'A Day in the Life of a Software Engineer in New York City',
  'published_time': '2 months ago',
  'view_count': 163640,
  'url': 'https://www.youtube.com/watch?v=bu1blwFrk70',
  'length': '7:52'},
 {'video_id': 'PrS2e1HSP2U',
  'title': "10 Years of Coding: Everything I've Ever Lea

In [10]:
get_channel_info('UCsvqVGtbbyHaMoevxPAq9Fg')

{'id': 'UCsvqVGtbbyHaMoevxPAq9Fg',
 'title': 'Simplilearn',
 'description': 'Simplilearn is the world’s #1 online Bootcamp and one of the world’s leading certification training providers. Based in San Francisco, California, and Bangalore, India, we provide training in areas where technologies and best practices are changing rapidly, and the demand for qualified candidates significantly exceeds the supply. We have trained over 3,000,000 professionals, have over 2000 qualified trainers on board, and offer over 400 courses with 40 plus global accreditations. With live instructions from leading experts, interactive labs & projects, peer-to-peer collaboration, on-demand lessons, and 24/7 learning support, we provide learners with a comprehensive curriculum at a fraction of the cost of an on-campus program. \n\nFor more information, visit https://www.simplilearn.com/\nOur FREE courses via SkillUp by Simplilearn now come with Completion Certificates! :https://www.simplilearn.com/skillup-free-

In [15]:
get_channel_videos('UCsvqVGtbbyHaMoevxPAq9Fg')

[{'video_id': 'VWJMlG_5Pdk',
  'title': 'Ambition #CannotBeLockedDown \u200b| Nikhil Got His Dream Job | And You? | Upskill Now With Simplilearn',
  'published_time': '8 months ago',
  'view_count': 18202449,
  'url': 'https://www.youtube.com/watch?v=VWJMlG_5Pdk',
  'length': '0:45'},
 {'video_id': 'qkdQGxpiymQ',
  'title': '#JobGuaranteed | Simplilearn Job Guarantee Programs | Upskill. Get A Job. Guaranteed | SabkoPataaHai',
  'published_time': '4 months ago',
  'view_count': 12488159,
  'url': 'https://www.youtube.com/watch?v=qkdQGxpiymQ',
  'length': '0:31'},
 {'video_id': '5nz9xepgVJo',
  'title': '#JobGuaranteed | Simplilearn Job Guarantee Programs | Upskill. Get A Job. Guaranteed | SabkoPataaHai',
  'published_time': '4 months ago',
  'view_count': 11831890,
  'url': 'https://www.youtube.com/watch?v=5nz9xepgVJo',
  'length': '0:31'},
 {'video_id': 'MSKZxL2D4Uo',
  'title': 'Simplilearn Job Guarantee Programs - Your Job Is Surely Guaranteed! 😎 | #GuaranteeKaSeason',
  'published_t