#### **Importing Libraries**

In [1]:
import os
import pandas as pd
import seaborn as sns

from googleapiclient.discovery import build

#### **Setting up YouTube API.**

In [2]:
api_service_name = "youtube"
api_version = "v3"

yt_api_key = os.environ["YT_API_KEY"]

### **I. Scraping Channel Statistics.**

#### Getting Channel ID's.

In [3]:
channel_ids = [
    "UCX6OQ3DkcsbYNE6H8uQQuVA",
    "UC59ZRYCHev_IqjUhremZ8Tg",
    "UCvgfXK4nTYKudb0rFR6noLA",
    "UCc0YbtMkRdhcqwhu3Oad-lw",
]

#### Building YouTube API Service.

In [4]:
youtube = build(api_service_name, api_version, developerKey=yt_api_key)  

#### Function to obtain channel statistics.

In [5]:
def get_channel_statistics(youtube, channel_ids):
    data = []

    request = youtube.channels().list(
        part="snippet,contentDetails,statistics", id=",".join(channel_ids)
    )

    response = request.execute()

    for i in range(len(response["items"])):
        info = dict(
            channel_name=response["items"][i]["snippet"]["title"],
            subscribers=response["items"][i]["statistics"]["subscriberCount"],
            videos=response["items"][i]["statistics"]["videoCount"],
            views=response["items"][i]["statistics"]["viewCount"],
            # --------
            playlist_id=response["items"][i]["contentDetails"]["relatedPlaylists"]['uploads'],
        )

        data.append(info)

    return data

In [6]:
channel_statistics = get_channel_statistics(youtube, channel_ids)

#### Create a dataframe. 

In [7]:
channel_df = pd.DataFrame(channel_statistics)
channel_df

Unnamed: 0,channel_name,subscribers,videos,views,playlist_id
0,Breaking Bad & Better Call Saul,746000,773,593154876,UUc0YbtMkRdhcqwhu3Oad-lw
1,Harry Mack,2640000,498,231600882,UU59ZRYCHev_IqjUhremZ8Tg
2,UFC,17500000,15926,7784942599,UUvgfXK4nTYKudb0rFR6noLA
3,MrBeast,233000000,774,41694966373,UUX6OQ3DkcsbYNE6H8uQQuVA


#### Change datatype from object to integer.

In [8]:
channel_df.dtypes

channel_name    object
subscribers     object
videos          object
views           object
playlist_id     object
dtype: object

In [9]:
exclude_cols = ['channel_name']

cols_to_include = [col for col in channel_df.columns if col not in exclude_cols]

In [10]:
channel_df[cols_to_include] = channel_df[cols_to_include].applymap(pd.to_numeric, errors='coerce')

In [11]:
channel_df.dtypes

channel_name     object
subscribers       int64
videos            int64
views             int64
playlist_id     float64
dtype: object

### **II. Scraping Video Details.**

#### Function to get video ID's

In [16]:
def get_video_ids(youtube, playlist_id):
    video_ids = []

    request = youtube.playlistItems().list(
        part="contentDetails", playlistId=playlist_id, maxResults=50
    )

    response = request.execute()

    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])

    next_page_token = response.get("nextPageToken")

    while next_page_token is not None:
        request = youtube.playlistItems().list(
            part="contentDetails",
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token,
        )

        response = request.execute()

        for i in range(len(response['items'])):
            video_ids.append(response['items'][i]['contentDetails']['videoId'])

        next_page_token = response.get("nextPageToken")

    return video_ids

In [22]:
# Breaking Bad & Better call Saul.
playlist_id = "UUc0YbtMkRdhcqwhu3Oad-lw"

In [24]:
video_ids = get_video_ids(youtube, playlist_id)

In [25]:
len(video_ids)

774