In [1]:
import pandas as pd
import numpy as np

In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from IPython.display import JSON

##  Step 1: Channel Meta Data and Playlist Id

- snippet -> title
- contentDetails -> Related Playlist -> Uploads
- statistics -> viewCount, subscriberCount, videoCount

In [3]:
api_key = "AIzaSyDeM3TTNamojpsEiPZ-yNmBSgyp8sZJB4E"
channel_id = "UCBauXqj4Ksf1Rhz7v8i7hlQ  "

In [4]:
youtube = build(
    serviceName='youtube',
    version='v3',
    developerKey=api_key)

### 1.1 Getting the JSON response

In [5]:
def get_channel_details(youtube, channel_id):
    request = youtube.channels().list(
        part="snippet, contentDetails, statistics",
        id=channel_id)
    response = request.execute()
    return JSON(response)

In [6]:
get_channel_details(youtube, channel_id)

<IPython.core.display.JSON object>

### 1.2 Getting the Playlist Id

In [7]:
def get_channel_details(youtube, channel_id):
    
    request = youtube.channels().list(
        part="snippet, contentDetails, statistics",
        id=channel_id)
    response = request.execute()
    
    items = response.get('items', [])

    if not items:
        return "Channel Not Found. Please Check your Channel ID."
        
    channel = items[0] 
    snippet = channel.get('snippet', {})
    content_details = channel.get("contentDetails", {})
    statistics = channel.get('statistics', {})

    channel_details = {
        'channel_title': snippet.get('title', None),
        'playlist_id': content_details.get('relatedPlaylists', {}).get('uploads', None),
        'total_views': statistics.get('viewCount', 0),
        'total_subscribers': statistics.get('subscriberCount', 0),
        'total_videos': statistics.get('videoCount', 0),
        }
    return pd.DataFrame([channel_details])

In [10]:
get_channel_details(youtube, channel_id)

Unnamed: 0,channel_title,playlist_id,total_views,total_subscribers,total_videos
0,vamshi kumar,UUBauXqj4Ksf1Rhz7v8i7hlQ,0,0,0


### 1.3 Adding all the Exceptions

In [11]:
def get_channel_details(youtube, channel_id):
    try:
        request = youtube.channels().list(
            part="snippet, contentDetails, statistics",
            id=channel_id)
        response = request.execute()
        
        items = response.get('items', [])
    
        if not items:
            return "Channel Not Found. Please Check your Channel ID."
            
        channel = items[0] 
        snippet = channel.get('snippet', {})
        content_details = channel.get("contentDetails", {})
        statistics = channel.get('statistics', {})
    
        channel_details = {
            'channel_title': snippet.get('title', None),
            'playlist_id': content_details.get('relatedPlaylists', {}).get('uploads', None),
            'total_views': statistics.get('viewCount', 0),
            'total_subscribers': statistics.get('subscriberCount', 0),
            'total_videos': statistics.get('videoCount', 0),
            }
        return pd.DataFrame([channel_details])
    except HttpError as e:
        if e.resp.status == 400:
            return "API Key not Found"
        else:
            return (f"Error getting channel details: {e}")

In [12]:
get_channel_details(youtube, channel_id)

Unnamed: 0,channel_title,playlist_id,total_views,total_subscribers,total_videos
0,vamshi kumar,UUBauXqj4Ksf1Rhz7v8i7hlQ,0,0,0


## Step 2: Retrieving Video id's using Playlist id

- Items -> contentDetails -> VideoId

In [13]:
channel_df = get_channel_details(youtube, channel_id)
playlist = channel_df.loc[0, 'playlist_id']
total_videos = channel_df.loc[0, 'total_videos']

### 2.1 Getting JSON Response

In [47]:
def fetch_video_ids(youtube, playlist_id):
    try:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId = playlist_id)
    
        response = request.execute()

        return JSON(response)
    except HttpError as e:
        if e.resp.status == 404:
            return "No Videos Available"
        else:
            return (f"Error getting channel details: {e}")

In [48]:
fetch_video_ids(youtube, playlist)

'No Videos Available'

### 2.2 Getting One Video Id

In [51]:
def fetch_video_ids(youtube, playlist_id):
    try:
        all_video_ids = []
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId = playlist_id)
    
        response = request.execute()
    
        for item in response.get('items', []):
            video_id = item.get("contentDetails", None).get("videoId", None)
            all_video_ids.append(video_id)
            
        return all_video_ids
    except HttpError as e:
        if e.resp.status == 404:
            return "No Videos Available"
        else:
            return (f"Error getting channel details: {e}")

In [52]:
fetch_video_ids(youtube, playlist)

'No Videos Available'

### 2.3 Getting Max Video Ids

In [53]:
def fetch_video_ids(youtube, playlist_id):
    try:
        all_video_ids = []
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId = playlist_id,
            maxResults=50
        )
    
        response = request.execute()
    
        for item in response.get('items', []):
            video_id = item.get("contentDetails", None).get("videoId", None)
            all_video_ids.append(video_id)
        
        return all_video_ids
    except HttpError as e:
        if e.resp.status == 404:
            return "No Videos Available"
        else:
            return (f"Error getting channel details: {e}")

In [54]:
len(fetch_video_ids(youtube, playlist))

19

### 2.4 Adding Page Token and While Loop

In [55]:
def fetch_video_ids(youtube, playlist_id):
    all_video_ids = []
    page_token = None

    while True:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId = playlist_id,
            maxResults=50,
            pageToken = page_token
        )

        response = request.execute()
        # print(page_token)
        
        for item in response.get('items', []):
            video_id = item.get("contentDetails", None).get("videoId", None)
            all_video_ids.append(video_id)

        page_token = response.get('nextPageToken')
        if not page_token:
            # print(page_token)
            break
        
    return all_video_ids

### 2.5 Adding all the Exceptions

In [56]:
def fetch_video_ids(youtube, playlist_id):
    all_video_ids = []
    page_token = None

    try:
        while True:
            request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults=50,
                pageToken = page_token
            )
    
            response = request.execute()
            items = response.get('items', [])
            
            for item in items:
                video_id = item.get("contentDetails", None).get("videoId", None)
                all_video_ids.append(video_id)
    
            page_token = response.get('nextPageToken')
            if not page_token:
                break
            
        return all_video_ids
            
    except HttpError as e:
        if e.resp.status == 404:
            return "No Videos Available"
        else:
            return (f"Error getting channel details: {e}")

In [57]:
video_ids = fetch_video_ids(youtube, playlist)
video_ids

'No Videos Available'

## Step 3: Fetch the Video Meta Data 

- snippet -> title, publishedAt, description, channelTitle, tags
- contentDetails -> duration
- statistics -> viewCount, likeCount, favouriteCount, commentCount

### 3.1 Getting JSON Response

In [58]:
def get_video_meta_data(youtube, video_ids):

    request = youtube.videos().list(
        part="snippet, contentDetails, statistics",
        id = ",".join(video_ids)
    )
    response = request.execute()
    return JSON(response)

In [59]:
get_video_meta_data(youtube, video_ids[0:50])

<IPython.core.display.JSON object>

### 3.2 Getting Video meta data for 50 items

In [60]:
def get_video_meta_data(youtube, video_ids):
    all_video_details= []

    request = youtube.videos().list(
        part="snippet, contentDetails, statistics",
        id = ",".join(video_ids[0:50])
    )
    response = request.execute()

    items = response.get('items', [])

    for item in items:
        snippet = item.get("snippet", {})
        content_details = item.get('contentDetails', {})
        statistics = item.get('statistics', {})

        video_details = {
            'video_title': snippet.get('title', None),
            'description': snippet.get('description', None),
            'published_at': snippet.get('publishedAt', None),
            'tags': snippet.get('tags', []),
            'channel_name': snippet.get('channelTitle', None),
            'video_duration': content_details.get('duration', None),
            'views': statistics.get('viewCount', 0),
            'likes': statistics.get('likeCount', 0),
            'favorites': statistics.get('favoriteCount', 0),
            'comments': statistics.get('commentCount', 0)
        }
        all_video_details.append(video_details)
        
    return pd.DataFrame(all_video_details)

In [61]:
df = get_video_meta_data(youtube, video_ids)

In [62]:
df.head()

### 3.3 Converting tags list to string

In [63]:
def get_video_meta_data(youtube, video_ids):
    all_video_details= []

    request = youtube.videos().list(
        part="snippet, contentDetails, statistics",
        id = ",".join(video_ids[0:50])
    )
    response = request.execute()

    items = response.get('items', [])

    for item in items:
        snippet = item.get("snippet", {})
        content_details = item.get('contentDetails', {})
        statistics = item.get('statistics', {})

        tag_list = snippet.get('tags', [])
        tags = ",".join(tag_list) if tag_list else None

        video_details = {
            'video_title': snippet.get('title', None),
            'description': snippet.get('description', None),
            'published_at': snippet.get('publishedAt', None),
            'tags': tags,
            'channel_name': snippet.get('channelTitle', None),
            'video_duration': content_details.get('duration', None),
            'views': statistics.get('viewCount', 0),
            'likes': statistics.get('likeCount', 0),
            'favorites': statistics.get('favoriteCount', 0),
            'comments': statistics.get('commentCount', 0)
        }
        all_video_details.append(video_details)
        
    return pd.DataFrame(all_video_details)

In [64]:
df = get_video_meta_data(youtube, video_ids)
df.head()

### 3.4 Getting all the video meta data

In [65]:
def get_video_meta_data(youtube, video_ids):
    all_video_details= []

    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet, contentDetails, statistics",
            id = ",".join(video_ids[i:i+50])
        )
        response = request.execute()
    
        items = response.get('items', [])
    
        for item in items:
            snippet = item.get("snippet", {})
            content_details = item.get('contentDetails', {})
            statistics = item.get('statistics', {})
    
            tag_list = snippet.get('tags', [])
            tags = ",".join(tag_list) if tag_list else None
    
            video_details = {
                'video_title': snippet.get('title', None),
                'description': snippet.get('description', None),
                'published_at': snippet.get('publishedAt', None),
                'tags': tags,
                'channel_name': snippet.get('channelTitle', None),
                'video_duration': content_details.get('duration', None),
                'views': statistics.get('viewCount', 0),
                'likes': statistics.get('likeCount', 0),
                'favorites': statistics.get('favoriteCount', 0),
                'comments': statistics.get('commentCount', 0)
            }
            all_video_details.append(video_details)
        
    return pd.DataFrame(all_video_details)

In [66]:
df = get_video_meta_data(youtube, video_ids)
df.sample(5)

ValueError: a must be greater than 0 unless no samples are taken

### 3.5 Adding all the Exceptions

In [67]:
def get_video_meta_data(youtube, video_ids):
    all_video_details= []

    try:
        for i in range(0, len(video_ids), 50):
            request = youtube.videos().list(
                part="snippet, contentDetails, statistics",
                id = ",".join(video_ids[i:i+50])
            )
            response = request.execute()
        
            items = response.get('items', [])
        
            for item in items:
                snippet = item.get("snippet", {})
                content_details = item.get('contentDetails', {})
                statistics = item.get('statistics', {})
        
                tag_list = snippet.get('tags', [])
                tags = ",".join(tag_list) if tag_list else None
        
                video_details = {
                    'video_title': snippet.get('title', None),
                    'description': snippet.get('description', None),
                    'published_at': snippet.get('publishedAt', None),
                    'tags': tags,
                    'channel_name': snippet.get('channelTitle', None),
                    'video_duration': content_details.get('duration', None),
                    'views': statistics.get('viewCount', 0),
                    'likes': statistics.get('likeCount', 0),
                    'favorites': statistics.get('favoriteCount', 0),
                    'comments': statistics.get('commentCount', 0)
                }
                all_video_details.append(video_details)
            
        return pd.DataFrame(all_video_details)

    except HttpError as e:
        print(f'Error getting video Details:{e}')

In [68]:
df = get_video_meta_data(youtube, video_ids)

In [69]:
df.head()