In [1]:
import pandas as pd
from googleapiclient.discovery import build
# from google_auth_oauthlib.flow import InstalledAppFlow

In [2]:
# INSERT API KEY
API_KEY = 'INSERT API KEY HERE'

In [3]:
# Initialize the YouTube Data API client
api_service_name = "youtube"
api_version = "v3"
youtube = build(api_service_name, api_version, developerKey=API_KEY)

In [20]:
# Specify the channel ID of the YouTube channel to retrieve data for
# Jessi's Channel
channel_id = 'UCN2bQLTTvNPZWCWU5TYghKA' 

## Get Basic Info about Jessi's Channel

In [21]:
def get_channel_basic_info(channel_id):
    # Call the channels.list method to retrieve information about the specified channel
    request = youtube.channels().list(
        part="snippet,statistics",
        id=channel_id
    )
    response = request.execute()

    # Extract relevant information from the API response
    channel_info = response['items'][0]
    snippet = channel_info['snippet']
    statistics = channel_info['statistics']

    # Store the information in a dictionary
    channel_data = {
        "Channel ID": channel_id,
        "Title": snippet['title'],
        "Description": snippet['description'],
        "Published At": snippet['publishedAt'],
        "View Count": int(statistics.get('viewCount', 0)),
        "Subscriber Count": int(statistics.get('subscriberCount', 0)),
        "Video Count": int(statistics.get('videoCount', 0))
    }

    # Create a DataFrame from the dictionary
    channel_df = pd.DataFrame([channel_data])

    return channel_df

In [22]:
channel_info_df = get_channel_basic_info(channel_id)
print(channel_info_df)

                 Channel ID  Title Description          Published At  \
0  UCN2bQLTTvNPZWCWU5TYghKA  Jessi              2019-08-16T07:17:09Z   

   View Count  Subscriber Count  Video Count  
0   883656367           4110000           96  


In [25]:
target_subscriber_count = channel_info_df["Subscriber Count"].values[0]

4110000

## Get Detailed View # and Like # For Each Video

In [8]:
# Function to fetch video statistics
def get_video_stats(video_id):
    request = youtube.videos().list(
        part="snippet,statistics",
        id=video_id
    )
    response = request.execute()
    if "items" in response and response["items"]:
        item = response["items"][0]
        snippet = item["snippet"]
        statistics = item["statistics"]
        title = snippet["title"]
        publish_date = snippet["publishedAt"]
        view_count = int(statistics["viewCount"]) if "viewCount" in statistics else None
        like_count = int(statistics["likeCount"]) if "likeCount" in statistics else None
        return title, publish_date, view_count, like_count
    else:
        return None, None, None, None

In [9]:
# Make API request to fetch video IDs of all videos in the channel
video_ids = []
next_page_token = None
while True:
    request = youtube.search().list(
        part="id",
        channelId=channel_id,
        maxResults=50,  # Maximum number of results per page
        pageToken=next_page_token
    )
    response = request.execute()
    for item in response["items"]:
        if item["id"]["kind"] == "youtube#video":
            video_ids.append(item["id"]["videoId"])
    next_page_token = response.get("nextPageToken")
    if not next_page_token:
        break


In [10]:
# Fetch video statistics and store in a list of dictionaries
video_data = []
for video_id in video_ids:
    title, publish_date, view_count, like_count = get_video_stats(video_id)
    video_data.append({
        "Video ID": video_id,
        "Title": title,
        "Publish Date": publish_date,
        "View Count": view_count,
        "Like Count": like_count
    })

# Create DataFrame from the list of dictionaries
df = pd.DataFrame(video_data)

# Sort the DataFrame by view count in descending order
df_sorted = df.sort_values(by="View Count", ascending=False)

# Display the sorted DataFrame
print(df_sorted)

       Video ID                                              Title  \
52  tJQaUW36pMw                 Jessi (제시) - '눈누난나 (NUNU NANA)' MV   
49  6j928wBZ_Bo                             Jessi (제시) - 'ZOOM' MV   
22  1JHOl9CSmXk  Jessi (제시) - Cold Blooded (with 스트릿 우먼 파이터 (SW...   
19  OEu1OWf8ezU             Jessi (제시) - '어떤X (What Type of X)' MV   
23  81Fp_94rEFg     Jessi (제시) - '눈누난나 (NUNU NANA)' Dance Practice   
..          ...                                                ...   
28  YBQ0uMUUTeU  Jessi (제시) - 'Gum' MV Teaser 2 ▶️ 25th October...   
63  fe8YDMklxI8               Jessi (제시) - 'Who Dat B' MV Teaser 1   
46  H_sJFjKuqWA               Jessi (제시) - 'Who Dat B' MV Teaser 4   
56  jty1Pb4dHvc               Jessi (제시) - 'Who Dat B' MV Teaser 3   
72  sPhhe4XQNbE  [Behind] Jessi 제시 방콕 라이브 - Digital Lover (제시 v...   

            Publish Date  View Count  Like Count  
52  2020-07-30T09:00:14Z   219202536   3675476.0  
49  2022-04-13T09:00:07Z   198007308   3628519.0  
22  20

## Find Other Channels With Similar Subscriber Counts

In [11]:
# Define a range for subscriber counts (you can adjust this range as needed)
margin=int(target_subscriber_count*0.4)
lower_bound = target_subscriber_count - margin
upper_bound = target_subscriber_count + margin

In [12]:
# Search for channels with subscriber counts within the defined range
search_request = youtube.search().list(
    part='snippet',
    type='channel',
    q='kpop artist',  # Enter search query
    maxResults=1000  # Adjust the number of results
)

# Execute the search request
search_response = search_request.execute()

In [14]:
# Initialize an empty list to store channel information
sim_channel_info_list = []


# Iterate over the search results
for item in search_response['items']:
    channel_id = item['id']['channelId']
    title = item['snippet']['title']
    
    # Exclude title contains 'KPOP' or 'k-pop' (case-insensitive)- these are usually not a artist's channel
    if 'kpop' not in title.lower() and 'k-pop' not in title.lower():
        # Get channel statistics to retrieve subscriber count
        channel_request = youtube.channels().list(
            part='statistics',
            id=channel_id
        )
        channel_response = channel_request.execute()
        subscriber_count = channel_response['items'][0]['statistics']['subscriberCount']
    
        # Add channel to the list if within subscriber counts margin
        if int(subscriber_count)>=lower_bound and int(subscriber_count)<=upper_bound:
            # Append channel information to the list
            sim_channel_info_list.append({
                'Channel ID': channel_id,
                'Title': title,
                'Subscriber Count': subscriber_count
            })
            
            # print(f"Channel ID: {channel_id}, Title: {title}, Subscriber Count: {subscriber_count}")

In [15]:
# Create a DataFrame from the list of dictionaries
sim_channel_info_df = pd.DataFrame(sim_channel_info_list)

# Display the DataFrame
print(sim_channel_info_df)

                 Channel ID                  Title Subscriber Count
0  UCgZlBRLRB1-0l-qL9BkecLQ                ARTBEAT          4410000
1  UCN2bQLTTvNPZWCWU5TYghKA                  Jessi          4110000
2  UCEIi7zFR_wE23jFncVtd6-A  STUDIO CHOOM [스튜디오 춤]          5050000


## Export to Excel

In [26]:
# Create a Pandas Excel writer
with pd.ExcelWriter('Youtube_data.xlsx', engine='xlsxwriter') as writer:
    # Write each DataFrame to a different sheet in the Excel file
    df_sorted.to_excel(writer, sheet_name='Video Statistics', index=False)
    channel_info_df.to_excel(writer, sheet_name='Channel Info', index=False)
    sim_channel_info_df.to_excel(writer, sheet_name='Similar Channel Info', index=False)
