In [1]:
import pandas as pd
from googleapiclient.discovery import build
# from google_auth_oauthlib.flow import InstalledAppFlow

In [2]:
# Initialize the YouTube Data API client
api_service_name = "youtube"
api_version = "v3"
API_KEY = 'AIzaSyBwuVXHMSMJB5XPoGcbeRuL8rBpKf5Cp80'
youtube = build(api_service_name, api_version, developerKey=API_KEY)

In [3]:
# Specify the channel ID of the YouTube channel to retrieve data for
# Jessi's Channel
channel_id = 'UCN2bQLTTvNPZWCWU5TYghKA' 

## Get Basic Info about Jessi's Channel

In [4]:
def get_channel_basic_info(channel_id):
    # Call the channels.list method to retrieve information about the specified channel
    request = youtube.channels().list(
        part="snippet,statistics",
        id=channel_id
    )
    response = request.execute()

    # Extract relevant information from the API response
    channel_info = response['items'][0]
    snippet = channel_info['snippet']
    statistics = channel_info['statistics']

    # Store the information in a dictionary
    channel_data = {
        "Channel ID": channel_id,
        "Title": snippet['title'],
        "Description": snippet['description'],
        "Published At": snippet['publishedAt'],
        "View Count": int(statistics.get('viewCount', 0)),
        "Subscriber Count": int(statistics.get('subscriberCount', 0)),
        "Video Count": int(statistics.get('videoCount', 0))
    }

    # Create a DataFrame from the dictionary
    channel_df = pd.DataFrame([channel_data]).transpose()

    return channel_df

In [5]:
channel_info_df = get_channel_basic_info(channel_id)
print(channel_info_df)

                                         0
Channel ID        UCN2bQLTTvNPZWCWU5TYghKA
Title                                Jessi
Description                               
Published At          2019-08-16T07:17:09Z
View Count                       883656367
Subscriber Count                   4110000
Video Count                             96


In [6]:
subscriber_count = channel_info_df.loc["Subscriber Count"].values[0]

## Get Detailed View # and Like # For Each Video

In [7]:
# Function to fetch video statistics
def get_video_stats(video_id):
    request = youtube.videos().list(
        part="snippet,statistics",
        id=video_id
    )
    response = request.execute()
    if "items" in response and response["items"]:
        item = response["items"][0]
        snippet = item["snippet"]
        statistics = item["statistics"]
        title = snippet["title"]
        publish_date = snippet["publishedAt"]
        view_count = int(statistics["viewCount"]) if "viewCount" in statistics else None
        like_count = int(statistics["likeCount"]) if "likeCount" in statistics else None
        return title, publish_date, view_count, like_count
    else:
        return None, None, None, None

In [8]:
# Make API request to fetch video IDs of all videos in the channel
video_ids = []
next_page_token = None
while True:
    request = youtube.search().list(
        part="id",
        channelId=channel_id,
        maxResults=50,  # Maximum number of results per page
        pageToken=next_page_token
    )
    response = request.execute()
    for item in response["items"]:
        if item["id"]["kind"] == "youtube#video":
            video_ids.append(item["id"]["videoId"])
    next_page_token = response.get("nextPageToken")
    if not next_page_token:
        break


In [9]:
# Fetch video statistics and store in a list of dictionaries
video_data = []
for video_id in video_ids:
    title, publish_date, view_count, like_count = get_video_stats(video_id)
    video_data.append({
        "Video ID": video_id,
        "Title": title,
        "Publish Date": publish_date,
        "View Count": view_count,
        "Like Count": like_count
    })

# Create DataFrame from the list of dictionaries
df = pd.DataFrame(video_data)

# Sort the DataFrame by view count in descending order
df_sorted = df.sort_values(by="View Count", ascending=False)

# Display the sorted DataFrame
print(df_sorted)

       Video ID                                              Title  \
52  tJQaUW36pMw                 Jessi (제시) - '눈누난나 (NUNU NANA)' MV   
49  6j928wBZ_Bo                             Jessi (제시) - 'ZOOM' MV   
21  1JHOl9CSmXk  Jessi (제시) - Cold Blooded (with 스트릿 우먼 파이터 (SW...   
18  OEu1OWf8ezU             Jessi (제시) - '어떤X (What Type of X)' MV   
22  81Fp_94rEFg     Jessi (제시) - '눈누난나 (NUNU NANA)' Dance Practice   
..          ...                                                ...   
28  YBQ0uMUUTeU  Jessi (제시) - 'Gum' MV Teaser 2 ▶️ 25th October...   
63  fe8YDMklxI8               Jessi (제시) - 'Who Dat B' MV Teaser 1   
46  H_sJFjKuqWA               Jessi (제시) - 'Who Dat B' MV Teaser 4   
56  jty1Pb4dHvc               Jessi (제시) - 'Who Dat B' MV Teaser 3   
72  sPhhe4XQNbE  [Behind] Jessi 제시 방콕 라이브 - Digital Lover (제시 v...   

            Publish Date  View Count  Like Count  
52  2020-07-30T09:00:14Z   219158032   3675269.0  
49  2022-04-13T09:00:07Z   197933981   3628053.0  
21  20

## Find Other Channels With Similar Subscriber Counts

In [13]:
# Define a range for subscriber counts (you can adjust this range as needed)
margin=int(subscriber_count*0.1)
lower_bound = subscriber_count - margin
upper_bound = subscriber_count + margin

In [27]:
# Search for channels with subscriber counts within the defined range
search_request = youtube.search().list(
    part='snippet',
    type='channel',
    q='kpop artist',  # Enter search query
    maxResults=200  # Adjust the number of results
)

# Execute the search request
search_response = search_request.execute()

In [28]:
# Initialize an empty list to store channel information
channel_info_list = []

# Iterate over the search results
for item in search_response['items']:
    channel_id = item['id']['channelId']
    title = item['snippet']['title']
    
    # Exclude title contains 'KPOP' or 'k-pop' (case-insensitive)- these are usually not a artist's channel
    if 'kpop' not in title.lower() and 'k-pop' not in title.lower():
        # Get channel statistics to retrieve subscriber count
        channel_request = youtube.channels().list(
            part='statistics',
            id=channel_id
        )
        channel_response = channel_request.execute()
        subscriber_count = channel_response['items'][0]['statistics']['subscriberCount']
    
        # Append channel information to the list
        channel_info_list.append({
            'Channel ID': channel_id,
            'Title': title,
            'Subscriber Count': subscriber_count
        })
        print(f"Channel ID: {channel_id}, Title: {title}, Subscriber Count: {subscriber_count}")

Channel ID: UCgZlBRLRB1-0l-qL9BkecLQ, Title: ARTBEAT, Subscriber Count: 4410000
Channel ID: UCWWzmRMh6BPcEt46XlTyp1w, Title: AleXa • ZB Label, Subscriber Count: 838000
Channel ID: UCQuwhqPJTTvA4VEIevg3ARQ, Title: 준(JUNE), Subscriber Count: 10500
Channel ID: UCN2bQLTTvNPZWCWU5TYghKA, Title: Jessi, Subscriber Count: 4110000
Channel ID: UCbbuHjMbzBn4fVd1hV77Ddw, Title: HOLLAND, Subscriber Count: 849000
Channel ID: UCIYOI59PWwwOtsXmTnLOp_w, Title: ASIA ARTIST AWARDS, Subscriber Count: 107000
Channel ID: UCxXgIeE5hxWxHG6dz9Scg2w, Title: RAIN's Official Channel, Subscriber Count: 416000
Channel ID: UCNM5-NRrDxL0PWC5H52smaQ, Title: Edward Avila, Subscriber Count: 1550000
Channel ID: UCGUBiUnXwgrHdXuHPl65jyQ, Title: RIO, Subscriber Count: 52200
Channel ID: UCO61R65Z4TECnKI0iMXpOEQ, Title: GEMINI OFFICIAL, Subscriber Count: 70000
Channel ID: UCEU3spVQMmXrU0Nqhw00Gpg, Title: Crush, Subscriber Count: 740000
Channel ID: UCnxwObZegHSZABQOwhgTaLw, Title: アイドルが話す英語オタク動画, Subscriber Count: 12200
Chann