In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#!pip install python-dotenv



In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from googleapiclient.discovery import build
from google.colab import auth
from google.auth import default

In [None]:
# Load environment variables
load_dotenv()

API_KEY = 'YOUR_API_KEY'

In [None]:
# Initialize YouTube API client
youtube = build('youtube', 'v3', developerKey=API_KEY)

def get_channel_stats(youtube, channel_id):
    request = youtube.channels().list(part='snippet,statistics', id=channel_id)
    response = request.execute()

    items = response.get('items', [])
    if items:
        snippet = items[0]['snippet']
        stats = items[0]['statistics']
        return {
            'channel_name': snippet['title'],
            'total_subscribers': stats['subscriberCount'],
            'total_views': stats['viewCount'],
            'total_videos': stats['videoCount']
        }
    return None

In [None]:
# channel_id = "UC_aEa8K-EOJ3D6gOs7HcyNg"
channel_id = "UCq-Fj5jknLsUf-MWSy4_brA"
get_channel_stats(youtube, channel_id)

{'channel_name': 'T-Series',
 'total_subscribers': '270000000',
 'total_views': '262374290185',
 'total_videos': '21442'}

In [None]:
# Read channel IDs from CSV
df = pd.read_csv("/content/drive/MyDrive/dataset/youtubers/youtube_data_united-states.csv")

channel_ids = df['NAME'].str.split('@').str[-1].unique()

In [None]:
print(channel_ids[:5])

['UCq-Fj5jknLsUf-MWSy4_brA' 'UCbCmjCuTUZos6Inko4u57UQ'
 'UCpEhnqL0y41EpW2TvWAHD7Q' 'UC-lHJZR3Gqxm24_Vd_AJ5Yw'
 'UCX6OQ3DkcsbYNE6H8uQQuVA']


In [None]:
# Fetch stats for each channel
channel_stats = []
for channel_id in channel_ids:
    stats = get_channel_stats(youtube, channel_id)
    if stats:
        channel_stats.append(stats)

In [None]:
# Convert the list of stats to a df
stats_df = pd.DataFrame(channel_stats)
stats_df.head(3)

Unnamed: 0,channel_name,total_subscribers,total_views,total_videos
0,T-Series,270000000,262374290185,21442
1,Cocomelon - Nursery Rhymes,179000000,184762987508,1222
2,SET India,175000000,167574994374,142352


In [None]:
df.reset_index(drop=True, inplace=True)
stats_df.reset_index(drop=True, inplace=True)

In [None]:
df.head(3)

Unnamed: 0,#,NAME,FOLLOWERS,ER,COUNTRY,TOPIC OF INFLUENCE,POTENTIAL REACH
0,1,T-Series @UCq-Fj5jknLsUf-MWSy4_brA,234M,-,India,,70.2M
1,2,Cocomelon - Nursery Rhymes @UCbCmjCuTUZos6Inko...,144M,-,United States,,43.2M
2,3,SET India @UCpEhnqL0y41EpW2TvWAHD7Q,139M,-,India,,41.7M


In [None]:
# Concatenate the dataframes horizontally
combined_df = pd.concat([df, stats_df], axis=1)
combined_df.head(3)

Unnamed: 0,#,NAME,FOLLOWERS,ER,COUNTRY,TOPIC OF INFLUENCE,POTENTIAL REACH,channel_name,total_subscribers,total_views,total_videos
0,1,T-Series @UCq-Fj5jknLsUf-MWSy4_brA,234M,-,India,,70.2M,T-Series,270000000,262374290185,21442
1,2,Cocomelon - Nursery Rhymes @UCbCmjCuTUZos6Inko...,144M,-,United States,,43.2M,Cocomelon - Nursery Rhymes,179000000,184762987508,1222
2,3,SET India @UCpEhnqL0y41EpW2TvWAHD7Q,139M,-,India,,41.7M,SET India,175000000,167574994374,142352


In [None]:
# Save the merged dataframe back into a CSV file
combined_df.to_csv('/content/drive/MyDrive/dataset/youtubers/updated_youtube_data_US.csv', index=False)

combined_df.head(5)

Unnamed: 0,#,NAME,FOLLOWERS,ER,COUNTRY,TOPIC OF INFLUENCE,POTENTIAL REACH,channel_name,total_subscribers,total_views,total_videos
0,1,T-Series @UCq-Fj5jknLsUf-MWSy4_brA,234M,-,India,,70.2M,T-Series,270000000,262374290185,21442
1,2,Cocomelon - Nursery Rhymes @UCbCmjCuTUZos6Inko...,144M,-,United States,,43.2M,Cocomelon - Nursery Rhymes,179000000,184762987508,1222
2,3,SET India @UCpEhnqL0y41EpW2TvWAHD7Q,139M,-,India,,41.7M,SET India,175000000,167574994374,142352
3,4,PewDiePie @UC-lHJZR3Gqxm24_Vd_AJ5Yw,111M,0.1%,United States,,33.3M,PewDiePie,111000000,29364847950,4776
4,5,MrBeast @UCX6OQ3DkcsbYNE6H8uQQuVA,100M,2.4%,United States,Celebrity Pets,30M,MrBeast,307000000,55799669124,808
