# Data Collection of Outdoor Boys Youtube Channel

In [16]:
import googleapiclient.discovery
from IPython.display import JSON
import os
from dotenv import load_dotenv
import pandas as pd

In [17]:
load_dotenv()
api_key = os.environ.get('API_KEY')
channel_ids = ['UCfpCQ89W9wjkHc8J_6eTbBg']
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=api_key)

In [18]:
# get channel stats
all_data = []

request = youtube.channels().list(
    part="snippet,contentDetails,statistics",
    id=','.join(channel_ids)
)
response = request.execute()

for item in response['items']:
    data = {'channelName': item['snippet']['title'],
            'subscribers': item['statistics']['subscriberCount'],
            'views': item['statistics']['viewCount'],
            'totalVideos': item['statistics']['videoCount'],
            'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
    }
    all_data.append(data)
        
channel_stats = pd.DataFrame(all_data)

In [19]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Outdoor Boys,13500000,2283815548,482,UUfpCQ89W9wjkHc8J_6eTbBg


In [20]:
video_ids = []
playlist_id = channel_stats['playlistId'][0]
# first page
request = youtube.playlistItems().list(
    part="snippet,contentDetails",
    playlistId=playlist_id,
    maxResults=50
)
response = request.execute()

video_ids += [item['contentDetails']['videoId'] for item in response['items']]

# rest of pages
next_page_token = response.get('nextPageToken')
while next_page_token is not None:
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 50,
        pageToken=next_page_token
        
    )
    response = request.execute()

    video_ids += [item['contentDetails']['videoId'] for item in response['items']]
    next_page_token = response.get('nextPageToken')

In [21]:
all_video_info = []

for i in range(0, len(video_ids), 50):
    
    request = youtube.videos().list(
        part = 'snippet, contentDetails, statistics',
        id= ','.join(video_ids[i:i+50])
    )
    response = request.execute()
    
    for video in response['items']:
        features = {'snippet': ['channelTitle', 'title', 'tags', 'publishedAt'],
                            'statistics': ['viewCount', 'likeCount', 'commentCount', 'favoriteCount'],
                            'contentDetails': ['duration', 'definition', 'caption']}

        video_info = {}
        video_info['video_id'] = video['id']
        
        for key in features.keys():
            for feature in features[key]:
                video_info[feature] = video[key].get(feature, None)
    
        all_video_info.append(video_info)
            
df = pd.DataFrame(all_video_info)
df.head()

Unnamed: 0,video_id,channelTitle,title,tags,publishedAt,viewCount,likeCount,commentCount,favoriteCount,duration,definition,caption
0,mR3Smq_yBiA,Outdoor Boys,Rebuilding Off-Grid Cabin & Digging Well at Al...,"[Homestead, cabin, off-grid, off grid cabin, o...",2025-02-08T12:01:05Z,5281675,209237,,0,PT41M26S,hd,False
1,C-PW7lDl8g8,Outdoor Boys,Exploring Every Island in Hawaii - 9 weeks fis...,"[Fishing, Hawaii, Kauai, Fishing Hawaii, Hawai...",2025-01-11T12:00:14Z,6099338,121271,,0,PT3H2M44S,hd,False
2,iys_pmJSp9M,Outdoor Boys,3 Days in Arctic Survival Shelter - Solo Bushc...,"[Camping, solo camping, survival camping, bush...",2024-12-28T12:01:03Z,19641619,505759,,0,PT25M46S,hd,False
3,p0Q0LexfUP0,Outdoor Boys,Searching Jungle for WWII Battlefields (6 Days...,"[Palau, Belau, Travel Palau, Palau travel vlog...",2024-12-14T12:01:04Z,6235560,174257,,0,PT56M8S,hd,False
4,p9yaDeStS7A,Outdoor Boys,"No tent, No sleeping bag - Extreme Winter Surv...","[camping, survival, winter survival, survival ...",2024-11-30T12:00:56Z,19207916,550056,,0,PT23M,hd,False


## Export

In [22]:
channel_stats.to_csv('./data/channel_stats.csv', index=False)
df.sample(frac=1).reset_index(drop=True).to_csv('./data/videos.csv', index=False)