In [50]:
import pandas as pd
import os
from googleapiclient.discovery import build
import isodate
from sqlalchemy import create_engine,text

### YouTube API credentials

In [51]:
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

In [52]:
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(
    api_service_name, api_version, developerKey=YOUTUBE_API_KEY)

### Import channel IDs from CSV

In [53]:
# File path for CSV import
csv_file_path = '../channels/channel_ids.csv'

# Read channel IDs from CSV file into a list
import_channel_df = pd.read_csv(csv_file_path, header=None, names=["channel_id"])

In [54]:
import_channel_df

Unnamed: 0,channel_id
0,UCtYLUTtgS3k1Fg4y5tAhLbw
1,UC4JX40jDee_tINbkjycV4Sg
2,UC8butISFwT-Wl7EV0hUK0BQ
3,UCFbNIlppjAuEX4znoulh0Cw
4,UC6AVa0vSrCpuskzGDDKz_EQ
...,...
96,UCH6gDteHtH4hg3o2343iObA
97,UC3iyLcDyYxk2nN7M5DsYaYQ
98,UC4xKdmAXFh4ACyhpiQ_3qBw
99,UCtC_WTVuo9k3Zol0ZB6u5mQ


In [55]:
def get_channel_stats(youtube, channel_ids):
    all_data = []

    try:
        # Split channel_ids list into chunks of up to 50 ids each
        id_chunks = [channel_ids[i:i+50] for i in range(0, len(channel_ids), 50)]

        for id_chunk in id_chunks:
            request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(id_chunk)
            )
            response = request.execute()

            for item in response.get('items', []):
                data = {
                    'channel_name': item['snippet']['title'],
                    'channel_id': item['id'],
                    'description': item['snippet']['description'],
                    'subscriber_count': item['statistics']['subscriberCount'],
                    'view_count': item['statistics']['viewCount'],
                    'video_count': item['statistics']['videoCount'],
                    'playlist_id': item['contentDetails']['relatedPlaylists']['uploads'],
                    'start_date': item['snippet']['publishedAt'],
                    'country': item['snippet'].get('country', None),
                }
                all_data.append(data)
    except Exception as e:
        print(f"Error occurred: {e}")

    return pd.DataFrame(all_data)

In [56]:
channels_df = get_channel_stats(youtube, import_channel_df['channel_id'])

In [57]:
channels_df

Unnamed: 0,channel_name,channel_id,description,subscriber_count,view_count,video_count,playlist_id,start_date,country
0,Learn with Lukas,UCw_LFe2pS8x3NyipGNJgeEA,"Hey, I’m Lukas 👋 I learn about IT & careers an...",34500,1944838,74,UUw_LFe2pS8x3NyipGNJgeEA,2020-11-26T17:38:58.614177Z,US
1,Corey Schafer,UCCezIgC97PvUuR4_gbFUs5g,Welcome to my Channel. This channel is focused...,1280000,93126265,232,UUCezIgC97PvUuR4_gbFUs5g,2006-05-31T22:49:22Z,US
2,Thu Vu data analytics,UCJQJAI7IjbLcpsjWdSzYz0Q,Hi there! Thanks for checking out my channel. ...,213000,7076650,82,UUJQJAI7IjbLcpsjWdSzYz0Q,2021-04-30T15:22:58.33437Z,US
3,Boris Meinardus,UCBGxe5vTiVWv7e2gP6QB1Mw,👨🏻‍🎓 I do paper reviews and teach AI and Machi...,27300,966843,59,UUBGxe5vTiVWv7e2gP6QB1Mw,2013-11-02T16:07:44Z,DE
4,Mo Chen,UCDybamfye5An6p-j1t2YMsg,"👋 Hey there, my name is Mo Chen and I work as ...",85300,2852246,98,UUDybamfye5An6p-j1t2YMsg,2022-12-25T20:25:38.187653Z,GB
...,...,...,...,...,...,...,...,...,...
96,Rishabh Mishra,UC3rY5HOgbBvGmq7RnDfwF7A,"Education, Jobs, Career + College Tips 😉\n\nHo...",205000,12107381,85,UU3rY5HOgbBvGmq7RnDfwF7A,2022-02-12T14:28:29.641872Z,IN
97,Clear Code,UCznj32AM2r98hZfTxrRo9bQ,"I am making tutorials on coding, primarily in ...",198000,12096139,46,UUznj32AM2r98hZfTxrRo9bQ,2019-11-25T13:11:41.329218Z,
98,Hussein Nasser,UC_ML5xP23TOWKUcc-oAE_Eg,Join me as we discuss various software enginee...,382000,21946130,891,UU_ML5xP23TOWKUcc-oAE_Eg,2014-08-19T07:33:42Z,US
99,Darshil Parmar,UCChmJrVa8kDg05JfCmxpLRw,Freelance Data Engineer and Solution Architect...,116000,5136161,144,UUChmJrVa8kDg05JfCmxpLRw,2021-04-13T14:01:35.850568Z,IN


In [58]:
from datetime import datetime
channels_df['etl_date'] = datetime.today().strftime('%Y-%m-%d')

In [67]:
channels_df

Unnamed: 0,channel_id,channel_name,description,subscriber_count,view_count,video_count,playlist_id,start_date,country,etl_date
0,UC4bIrRImqGd4Ye_KqLpLPeQ,UVA School of Data Science,Official YouTube channel for the UVA School of...,1690,118894,197,UU4bIrRImqGd4Ye_KqLpLPeQ,2017-10-26T13:33:02Z,US,2024-02-28
1,UCteRPiisgIoHtMgqHegpWAQ,Sundas Khalid,Hey! I am Sundas and I help you succeed in the...,198000,10619897,131,UUteRPiisgIoHtMgqHegpWAQ,2014-04-19T02:27:06Z,US,2024-02-28
2,UC2UXDak6o7rBm23k3Vv5dww,Tina Huang,Hi! My name is Tina and I'm a ex-Meta data sci...,616000,28603820,197,UU2UXDak6o7rBm23k3Vv5dww,2013-08-28T18:05:17Z,US,2024-02-28
3,UC9Tl0-lzeDPH4y7LcRwRSQA,Egor Howell,Data Science Made Simple.\n,2830,73905,41,UU9Tl0-lzeDPH4y7LcRwRSQA,2023-08-16T18:02:06.851027Z,GB,2024-02-28
4,UCh1zLfuN6F_X4eoNKCsyICA,Yosh,Support me on Patreon: https://patreon.com/yos...,117000,23679633,9,UUh1zLfuN6F_X4eoNKCsyICA,2018-12-13T01:16:04Z,,2024-02-28
...,...,...,...,...,...,...,...,...,...,...
96,UCdq65x-0_G8sMhwWNgtmXaQ,Aditi Gupta,"Hey there!! I am Aditi Gupta, Founder, CEO of ...",37400,3349604,203,UUdq65x-0_G8sMhwWNgtmXaQ,2017-09-24T11:05:09Z,IN,2024-02-28
97,UClb90NQQcskPUGDIXsQEz5Q,developedbyed,"Hello my gorgeous friends on the internet, wel...",910000,53054792,245,UUlb90NQQcskPUGDIXsQEz5Q,2018-05-27T14:48:23Z,GB,2024-02-28
98,UCQiNyL7ik4FIlV2UCvojq0g,Rohan Adus,I help people up-skill themselves with data.\n,8160,421224,215,UUQiNyL7ik4FIlV2UCvojq0g,2020-03-23T18:26:32.339179Z,,2024-02-28
99,UCyU5wkjgQYGRB0hIHMwm2Sg,Syntax,Hosted by Wes Bos and Scott Tolinski since 201...,347000,28844028,836,UUyU5wkjgQYGRB0hIHMwm2Sg,2012-01-20T02:13:54Z,US,2024-02-28


In [80]:
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
engine = create_engine('sqlite:///../../db/youtube.db', echo = True)

In [81]:
# Create connection
conn = engine.connect()

In [77]:
# Push df to database
channels_df.to_sql(name="channels_daily_v2", con=engine, if_exists='append', index=False)

2024-02-29 08:54:38,387 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-02-29 08:54:38,390 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("channels_daily_v2")
2024-02-29 08:54:38,391 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-02-29 08:54:38,394 INFO sqlalchemy.engine.Engine INSERT INTO channels_daily_v2 (channel_id, etl_date, channel_name, subscriber_count, view_count, video_count) VALUES (?, ?, ?, ?, ?, ?)
2024-02-29 08:54:38,395 INFO sqlalchemy.engine.Engine [generated in 0.00122s] [('UCw_LFe2pS8x3NyipGNJgeEA', '2024-02-29', 'Learn with Lukas', 34500, 1944838, 74), ('UCCezIgC97PvUuR4_gbFUs5g', '2024-02-29', 'Corey Schafer', 1280000, 93126265, 232), ('UCJQJAI7IjbLcpsjWdSzYz0Q', '2024-02-29', 'Thu Vu data analytics', 213000, 7076650, 82), ('UCBGxe5vTiVWv7e2gP6QB1Mw', '2024-02-29', 'Boris Meinardus', 27300, 966843, 59), ('UCDybamfye5An6p-j1t2YMsg', '2024-02-29', 'Mo Chen', 85300, 2852246, 98), ('UCsvqVGtbbyHaMoevxPAq9Fg', '2024-02-29', 'Simplilearn', 3770000, 337448546, 7

202

In [69]:
# Close the connection
conn.close()
engine.dispose()

2024-02-29 08:49:50,152 INFO sqlalchemy.engine.Engine ROLLBACK


In [85]:
# Test database with simple query
query = text('ALTER TABLE channels_daily_v2 RENAME TO channels_daily;')
# channels_df = pd.read_sql_query(query, conn)

# Execute the query using SQLAlchemy's execution methods
result = conn.execute(query)

2024-02-29 08:58:45,889 INFO sqlalchemy.engine.Engine ALTER TABLE channels_daily_v2 RENAME TO channels_daily;
2024-02-29 08:58:45,891 INFO sqlalchemy.engine.Engine [generated in 0.00210s] ()


In [79]:
channels_df

Unnamed: 0,channel_id,etl_date,channel_name,subscriber_count,view_count,video_count
0,UCw_LFe2pS8x3NyipGNJgeEA,2024-02-29,Learn with Lukas,34500,1944838,74
1,UCCezIgC97PvUuR4_gbFUs5g,2024-02-29,Corey Schafer,1280000,93126265,232
2,UCJQJAI7IjbLcpsjWdSzYz0Q,2024-02-29,Thu Vu data analytics,213000,7076650,82
3,UCBGxe5vTiVWv7e2gP6QB1Mw,2024-02-29,Boris Meinardus,27300,966843,59
4,UCDybamfye5An6p-j1t2YMsg,2024-02-29,Mo Chen,85300,2852246,98
...,...,...,...,...,...,...
197,UCdq65x-0_G8sMhwWNgtmXaQ,2024-02-28,Aditi Gupta,37400,3349604,203
198,UClb90NQQcskPUGDIXsQEz5Q,2024-02-28,developedbyed,910000,53054792,245
199,UCQiNyL7ik4FIlV2UCvojq0g,2024-02-28,Rohan Adus,8160,421224,215
200,UCyU5wkjgQYGRB0hIHMwm2Sg,2024-02-28,Syntax,347000,28844028,836
