In [15]:
import numpy as np
import pandas as pd
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

In [33]:
load_dotenv() 

API_KEY = 'key'
# API_KEY = os.getenv(key)
API_VERSION = 'v3'

youtube = build('youtube', API_VERSION, developerKey=API_KEY)

In [101]:
def get_channel_stats(youtube, channel_id):
    request = youtube.channels().list(
        part='snippet, statistics',
        id=channel_id
    )
    response = request.execute()

    # if response['items']:
    if 'items' in response and response['items']:

        data = dict(channel_name=response['items'][0]['snippet']['title'],
                    total_subscribers=response['items'][0]['statistics']['subscriberCount'],
                    total_views=response['items'][0]['statistics']['viewCount'],
                    total_videos=response['items'][0]['statistics']['videoCount'],
        )

        return data
    else:
        return None 

In [63]:
# Read CSV into dataframe 
df = pd.read_csv("youtube_data_poland.csv")

In [65]:
df

Unnamed: 0,#,NAME,FOLLOWERS,ER,COUNTRY,TOPIC OF INFLUENCE,POTENTIAL REACH
0,1,Bazylland - Tractors & Excavators @UCfI1cD9PdH...,6.2M,-,Poland,,1.9M
1,2,Blowek @UC-BRUJOtblqGrftY6oRcOZw,4.9M,0.2%,Poland,,1.5M
2,3,reZigiusz @UCLLNPuvRGKxSczJcxlOiMXg,4.5M,-,Poland,,1.3M
3,4,WB Kids International @UCwKa3PzQeReoqMqNkDgG-8w,4M,-,Poland,,1.2M
4,5,Step Records @UC0kLTLosqh6GH6L19I-3EgA,3.8M,-,Poland,,1.1M
...,...,...,...,...,...,...,...
95,96,Farell @UCc7Ys58zjPxeg8UQ3cyyJsQ,1.1M,0.3%,Poland,,339K
96,97,WarszawskiKoks @UC2AyohFiDUS3K98h5dJVfog,1.1M,2%,Poland,Fitness,336K
97,98,TVN Talent Show @UCWJNF5h3No52WmZuspDrG8A,1.1M,-,Poland,,336K
98,99,Polsat @UCkNOjcTcgLaNL0-XNoe4gtw,1.1M,-,Poland,,333K


In [111]:
# Extract channel IDs and remove potential duplicates
channel_ids = df['NAME'].str.split('@').str[-1].unique()


# Initialize a list to keep track of channel stats
channel_stats = []


# Loop over the channel IDs and get stats for each
for channel_id in channel_ids:
    stats = get_channel_stats(youtube, channel_id)
    if stats is not None:
        channel_stats.append(stats)

In [119]:
# Convert the list of stats to a df
stats_df = pd.DataFrame(channel_stats)


df.reset_index(drop=True, inplace=True)
stats_df.reset_index(drop=True, inplace=True)

In [None]:
# Concatenate the dataframes horizontally
combined_df = pd.concat([df, stats_df], axis=1)


# Drop the 'channel_name' column from stats_df (since 'NOMBRE' already exists)
# combined_df.drop('channel_name', axis=1, inplace=True)


In [203]:
# Save the merged dataframe back into a CSV file
combined_df.to_csv('updated_youtube_data_poland.csv', index=False)


combined_df.head(10)

Unnamed: 0,#,NAME,FOLLOWERS,ER,COUNTRY,TOPIC OF INFLUENCE,POTENTIAL REACH,channel_name,total_subscribers,total_views,total_videos
0,1,Bazylland - Tractors & Excavators @UCfI1cD9PdH...,6.2M,-,Poland,,1.9M,Bazylland - Tractors & Excavators,7360000,3583434124,958
1,2,Blowek @UC-BRUJOtblqGrftY6oRcOZw,4.9M,0.2%,Poland,,1.5M,Blowek,5370000,1281727743,1833
2,3,reZigiusz @UCLLNPuvRGKxSczJcxlOiMXg,4.5M,-,Poland,,1.3M,reZigiusz,5170000,1260992839,1416
3,4,WB Kids International @UCwKa3PzQeReoqMqNkDgG-8w,4M,-,Poland,,1.2M,WB Kids International,6450000,2770188925,1723
4,5,Step Records @UC0kLTLosqh6GH6L19I-3EgA,3.8M,-,Poland,,1.1M,Step Records,3980000,4751474848,2857
5,6,AbstrachujeTV @UCTISYi9ABujrrI1Slg3ZDBA,3.4M,0.3%,Poland,,1M,AbstrachujeTV,3380000,1674719663,899
6,7,SBM Label @UCTIyVgnTO9gGZBFq6GRJvUQ,3.3M,0.2%,Poland,,975K,SBM,3420000,5017846546,1665
7,8,Planeta Faktów @UCAE3hyVGBraXyYZYSOS-GeQ,2.7M,0.4%,Poland,,819K,Planeta Faktów,2780000,1373624727,1590
8,9,IsAmU @UCGPKmSKumoR3TbMIHwsh5qQ,2.7M,-,Poland,,801K,IsAmU,2820000,567647418,1584
9,10,DISCO MAREK @UC4uocvXN4aPFQG6paBaMb1A,2.6M,-,Poland,,771K,DISCO MAREK,2490000,54409494,7
