In [37]:
import pandas as pd
import requests
import json
from googleapiclient.discovery import build
import googleapiclient.discovery
import googleapiclient.errors
from datetime import datetime

In [7]:
video_info_df = pd.read_csv('youtube_videos.csv')
video_info_df.head()

Unnamed: 0,Video ID,Channel Name,Views,Likes,Subscribers,published_date
0,Vb1gZyckHX8,Markiplier,6955647,178424,37000000,2020-02-20 17:09:08
1,xzDyhUdsuCc,DanTDM,10485117,95312,29100000,2019-05-14 17:10:22
2,UNhGynJEp-M,Ctop,948614,10412,2190000,2023-01-20 18:15:01
3,xwj8mD5MnMg,Camodo Gaming,1856954,19241,3660000,2020-02-20 12:00:11
4,zZSbH9bUVaM,Sam Tabor Gaming,427076,7273,1330000,2021-01-13 20:00:26


In [8]:
video_ids = video_info_df['Video ID'].to_list()

In [10]:
API_KEY = open('youtube_api_3.txt', 'r').read()
relevant_terms = [r"Snakeybus",r"Biped",r"Project Winter - Blackout",r"Worms Rumble",r"Wobbly Life",r"Party Animals",r"Bloons TD Battles 2",r"PICO PARK",r"Perfect Heist 2",r"Goose Goose Duck",r"Stumble Guys",r"Bopl Battle",r"Crab Game",r"The Riflemen",r"Scribblenauts Unlimited",r"Pit People®",r"Duck Game",r"ShellShock Live",r"Worms W.M.D",r"Ratz Instagib",r"Cat Goes Fishing",r"The Mean Greens - Plastic Warfare",r"Tower Unite",r"Clustertruck",r"Garry's Mod",r"Who's Your Daddy?!",r"Human Fall Flat",r"Happy Room",r"Pikuniku",r"Stick Fight: The Game",r"Worms_Ultimate_Mayhem",r"Overcooked! 2",r"Project Winter",r"Super Animal Royale",r"Pummel Party",r"ibb & obb",r"Bloons TD 6",'gamplay', 'walkthrough', 'gaming', 'tutorial', 'game', 'gamer', 'multi-player', 'Games']

In [41]:
youtube = build('youtube', 'v3', developerKey=API_KEY)

In [11]:
def get_video_info(video_id):
    """Get video information for a given video ID."""
    video_url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={API_KEY}'
    response = requests.get(video_url)
    video_data = response.json()

    if 'items' in video_data and len(video_data['items']) > 0:
        title = video_data['items'][0]['snippet']['title']
        description = video_data['items'][0]['snippet']['description']
        tags = video_data['items'][0]['snippet'].get('tags', [])
        category_id = video_data['items'][0]['snippet']['categoryId']
        return title, description, tags, category_id
    return None, None, None, None


In [12]:
def filter_video_ids(video_ids, relevant_terms):
    """Filter video IDs based on relevant terms in title, description, or tags, if category is gaming."""
    filtered_video_ids = []

    for video_id in video_ids:
        title, description, tags, category_id = get_video_info(video_id)

        # Check if the category ID is 20 (gaming)
        if category_id == '20':
            filtered_video_ids.append(video_id)
            continue

        # If not gaming, check for relevant terms
        if any(term.lower() in title.lower() for term in relevant_terms) or \
           any(term.lower() in description.lower() for term in relevant_terms) or \
           any(term.lower() in tag.lower() for tag in tags for term in relevant_terms):
            filtered_video_ids.append(video_id)

    return filtered_video_ids

In [25]:
filtered_ids = filter_video_ids(video_ids, relevant_terms)
filtered_ids

['Vb1gZyckHX8',
 'xzDyhUdsuCc',
 'UNhGynJEp-M',
 'xwj8mD5MnMg',
 'zZSbH9bUVaM',
 'AsV7_nJHd-A',
 'BsjS8bmMFNU',
 'FJ5AG1zv0pY',
 'ZZx3S4GlcHQ',
 'A3_Xhge3qlE',
 'ATfxLkKqBYQ',
 'RaWn4YXZpP8',
 '3a2Ru1d5YCc',
 'E4vzdt4wTXc',
 'dedWvlqgeq4',
 'Whyndp_iyZM',
 '83tvhlrT2lc',
 'AuEAoD8pYRk',
 'NtzkdSPMfOc',
 'XxBtAU4p6GA',
 'RtC0eQJoSFM',
 'CoBanjYraik',
 'NVq487C9Me4',
 'Cws3_y7gXXM',
 '0sefoTPb20c',
 'hccR1OjsoAc',
 'F9eKeEV5jgU',
 'DDFviIAy0YU',
 'oNx9K26ylU4',
 'abJrWHWVX1M',
 '2MSyz5O1-ow',
 '9lhz2ntmb2A',
 'S1oZRK1jOnE',
 'oUlG00icCVE',
 '4oNHRAnXQ80',
 'w6_MtKpJC2c',
 'dHyI_6t28io',
 'aELWuDkDYuA',
 '-Lni83C9BLI',
 'm60QczK9cuQ',
 '9ihtoXtbn-8',
 'Qmr95FH-LdE',
 'E2_8J91PPyI',
 'OlFiflqr1Kw',
 'TRnuTlTUO9g',
 'c0hzMKTRYMI',
 'BMbdhFtupMw',
 'VAAm7GTxJXM',
 'eNZz6n6us0o',
 'BaPGZJKwxfo',
 'smlef-jkmuQ',
 'uXhfnGv_fEQ',
 'xUko31SmXxA',
 'Ta9ReKaZOGw',
 'YSlrAuFAnhc',
 'hV_nResY_nE',
 'wgR1tNgLV68',
 'Aj7tJ79EiQU',
 'cs3BOop15nU',
 'wrE8wHZQ1BI',
 'TjApDcGW9Io',
 'UpNid_rWDnI',
 '9Lypkw

In [16]:
with open('filtered_video_ids.json', 'r') as f:
    filtered_ids = json.load(f)


Analysis

In [17]:
filtered_video_info_df = video_info_df[video_info_df['Video ID'].isin(filtered_ids)]
filtered_video_info_df.head()

Unnamed: 0,Video ID,Channel Name,Views,Likes,Subscribers,published_date
0,Vb1gZyckHX8,Markiplier,6955647,178424,37000000,2020-02-20 17:09:08
1,xzDyhUdsuCc,DanTDM,10485117,95312,29100000,2019-05-14 17:10:22
2,UNhGynJEp-M,Ctop,948614,10412,2190000,2023-01-20 18:15:01
3,xwj8mD5MnMg,Camodo Gaming,1856954,19241,3660000,2020-02-20 12:00:11
4,zZSbH9bUVaM,Sam Tabor Gaming,427076,7273,1330000,2021-01-13 20:00:26


In [18]:
channel_group_df = filtered_video_info_df.groupby('Channel Name').agg({'Views': 'sum', 'Likes': 'sum', 'Subscribers': 'first'}).sort_values(by=['Views', 'Likes'], ascending=False).reset_index()
channel_group_df.head()

Unnamed: 0,Channel Name,Views,Likes,Subscribers
0,FGTeeV,196047966,1684806,23900000
1,SSundee,165655603,5557940,24200000
2,ItsFunneh,145581835,2041694,11400000
3,Aphmau,137376858,1492154,22000000
4,SMii7Y,87480467,2677223,7320000


In [29]:
suitable_channels = channel_group_df[channel_group_df['Subscribers'] < 1000000]
suitable_channels.head(7)

Unnamed: 0,Channel Name,Views,Likes,Subscribers
11,BiN TV,43866580,100705,386000
17,Blox4Fun,34293626,230183,952000
19,Coty,30454076,1384143,330000
20,SakawGaming,28257110,-1,149000
26,Mobile Arcade,22361831,123096,598000
27,Animals Home Animation,20973431,72702,444000
30,Johan Grönvall - Zapray Games,19491056,716169,419000


In [24]:
suitable_channels.to_csv('suitable_channels.csv', index=False)

In [30]:
video_info_df[video_info_df['Channel Name'] == 'Johan Grönvall - Zapray Games']

Unnamed: 0,Video ID,Channel Name,Views,Likes,Subscribers,published_date
555,MXkXsC0hnN4,Johan Grönvall - Zapray Games,16214981,559750,419000,2023-11-27 11:55:56
566,JPekiqHWeuM,Johan Grönvall - Zapray Games,2395453,124343,419000,2024-08-13 16:47:47
569,wT1kFMMIP-A,Johan Grönvall - Zapray Games,880622,32076,419000,2023-11-16 09:07:29


In [32]:
def get_channel_id(channel_name):
    """Get the channel ID using the channel name."""
    search_url = f'https://www.googleapis.com/youtube/v3/search?part=snippet&type=channel&q={channel_name}&key={API_KEY}'
    response = requests.get(search_url)
    data = response.json()

    if 'items' in data and len(data['items']) > 0:
        return data['items'][0]['id']['channelId']
    return None

In [33]:
def get_uploads_playlist_id(channel_id):
    """Get the uploads playlist ID for a given channel ID."""
    channel_url = f'https://www.googleapis.com/youtube/v3/channels?part=contentDetails&id={channel_id}&key={API_KEY}'
    response = requests.get(channel_url)
    data = response.json()

    if 'items' in data and len(data['items']) > 0:
        return data['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    return None

In [34]:
def get_latest_videos(playlist_id, max_results=5):
    """Get the latest videos from a playlist, including publication dates."""
    playlist_url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&playlistId={playlist_id}&maxResults={max_results}&key={API_KEY}'
    response = requests.get(playlist_url)
    data = response.json()

    video_info = {}
    if 'items' in data:
        for item in data['items']:
            video_id = item['snippet']['resourceId']['videoId']
            published_at = item['snippet']['publishedAt']
            video_info[video_id] = published_at  # Store video ID and publication date
    return video_info

In [35]:
def get_comments(video_id):
    """Get comments for a given video ID."""
    comments = []

    try:  # Wrap the API call in a try-except block
        response = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            maxResults=50,
            order='relevance'
        ).execute()

        if response:
            for item in response['items']:
                text = item['snippet']['topLevelComment']['snippet']['textDisplay']
                channel_id = item['snippet']['topLevelComment']['snippet']['channelId']
                like_count = item['snippet']['topLevelComment']['snippet']['likeCount']
                channel_id_text = item['snippet']['topLevelComment']['snippet']['authorChannelId']['value']
                date = item['snippet']['topLevelComment']['snippet'].get('publishedAt', 'N/A')

                if date != 'N/A':
                    date = datetime.fromisoformat(date[:-1])
                else:
                    date = None

                comment = {
                    'video_id': video_id,
                    'text': text,
                    'channel_id': channel_id,
                    'like_count': like_count,
                    'channel_id_text': channel_id_text,
                    'date': date
                }
                comments.append(comment)

    except googleapiclient.errors.HttpError as e:
        # Handle the HttpError
        if e.resp.status == 403 and "commentsDisabled" in str(e):
            print(f"Comments are disabled for video ID: {video_id}")
        elif e.resp.status == 404:  # Check for video not found error
            print(f"Video not found for video ID: {video_id}")
        else:
            raise e

    return comments

In [45]:
def get_comments_on_latest_videos(channel_name):
  channel_id = get_channel_id(channel_name)

  if channel_id:
      uploads_playlist_id = get_uploads_playlist_id(channel_id)

      if uploads_playlist_id:
          latest_video_info = get_latest_videos(uploads_playlist_id)

          # Create a DataFrame to store comments
          all_comments_data = []

          for video_id, published_at in latest_video_info.items():
              comments = get_comments(video_id)
              for comment in comments:
                  all_comments_data.append({
                      'Video ID': video_id,
                      'Published Date': published_at,
                      'Comment': comment['text'],
                      'Channel ID': comment['channel_id'],
                      'Like Count': comment['like_count'],
                      'Commenter': comment['channel_id_text'],
                      'Comment Date': comment['date']
                  })

          # Create a DataFrame from the collected comments
          comments_df = pd.DataFrame(all_comments_data)

          # Display the DataFrame
          return comments_df
  else:
      print("Channel not found.")

In [46]:
channel_names = ['Blox4Fun', 'Coty', 'SakawGaming', 'Mobile Arcade', 'Johan Grönvall - Zapray Games']
for channel_name in channel_names:
    channel_latest = get_comments_on_latest_videos(channel_name)
    channel_latest.to_csv(f'{channel_name}_comments.csv', index=False)



Comments are disabled for video ID: pQEAqUpNMwU
Comments are disabled for video ID: dFpNgQnAbmM
Comments are disabled for video ID: -rJ2ynsCAWo
Comments are disabled for video ID: XF3eQnR82_A
Comments are disabled for video ID: Vjd05tz2YMU


In [48]:
blox4fun_comments = pd.read_csv('Blox4Fun_comments.csv')
johan_comments = pd.read_csv('Johan Grönvall - Zapray Games_comments.csv')
sakaw_comments = pd.read_csv('SakawGaming_comments.csv')
mobile_arcade_comments = pd.read_csv('Mobile Arcade_comments.csv')

In [49]:
blox4fun_comments['Commenter'].value_counts()

Unnamed: 0_level_0,count
Commenter,Unnamed: 1_level_1
UCRkSs-a9SskfDU4T56-476A,6
UCB14tK-B_hsZin0zME55tpQ,4
UCQSoPy-DLGOLCF5ONoc2Low,4
UCvhndW-FKY6tgVovxITlu-Q,2
UCdnRAygxFDIEBSsxhoUSXvw,2
...,...
UCWJdhKakWqIGZ1I-bGiaIQw,1
UCrsA8LS3vdmKoRBn6yh3OWg,1
UCs_4TFpH1zmpPNT2kwmXzBA,1
UCac4TC3TGmHda7fM2-Qw96A,1


In [50]:
johan_comments['Commenter'].value_counts()

Unnamed: 0_level_0,count
Commenter,Unnamed: 1_level_1
UCpE2JVP-l2YFgf2t0dc3hVg,2
UCmHg723_Szla2nz05VJsUoA,2
UCECT--O9GCwoFtzwsM9spuw,2
UCMQ9PckRWS3uo3LNbqb_LwQ,2
UCalc80nzzbREWYmRkERy5JA,2
...,...
UCMNkQPK8XA6OEn6qfbHGJ4g,1
UCiDmTrxzO9hF6BW8Mf5RuYA,1
UCettAYqmRmFnCqBfQMMAQrw,1
UC_ZYc7RRRBxLR8uae3QQvPA,1


In [58]:
sakaw_comments['Commenter'].value_counts()

Unnamed: 0_level_0,count
Commenter,Unnamed: 1_level_1
UCWEViZCgQITYbeNDaed3rrg,3
UCTrpOid_g_ZOzmewA6UteSg,3
UCpIZEbcxWKNwedYo5DVM7DA,2
UCJyLgVX4FtLjaDLuU4iSQ6Q,2
UCdxrxJ9CRXd0CV7SxN6_iAA,2
...,...
UCvSP-wZVSvKAZ2Uy6Fk3hoA,1
UCP7EZu0Eiz8pZCFmeaeCn6A,1
UC0G_nG6GwlX7mmXK_osnbDQ,1
UCXrCrdiMu4bmYsEEuhr9VjA,1


In [61]:
sakaw_comments[sakaw_comments['Commenter'] == 'UCWEViZCgQITYbeNDaed3rrg']

Unnamed: 0,Video ID,Published Date,Comment,Channel ID,Like Count,Commenter,Comment Date
31,2ve9IOIpmmk,2024-01-05T13:00:02Z,Wait is it mod or not,UCImQ1eovTV7RTiCd3sku9QQ,0,UCWEViZCgQITYbeNDaed3rrg,2024-01-05 13:02:29
33,m880bDnuask,2023-11-26T02:00:29Z,Let's gooooo sakaw gaming upload new video,UCImQ1eovTV7RTiCd3sku9QQ,8,UCWEViZCgQITYbeNDaed3rrg,2023-11-26 02:50:44
66,yiNOeR3Ewdg,2023-07-19T10:00:48Z,Sakaw Kembali akhirnya 🤯,UCImQ1eovTV7RTiCd3sku9QQ,2,UCWEViZCgQITYbeNDaed3rrg,2023-07-19 11:50:30


In [62]:
mobile_arcade_comments['Commenter'].value_counts()

Unnamed: 0_level_0,count
Commenter,Unnamed: 1_level_1
UC3vyPyt-dbpmhn_KSMiaIRg,5
UCwxrX1moXAZE0KdAhf_gA5g,5
UC8dNa5hL1hv-EXFwuY-D73A,3
UCjnMDlVY-rjohq0AX56QqDA,2
UCVP3Q9Z_JIIkkkqF0Ro706g,2
UCcqE7po3tPYagbE1Ld8HtOw,2
UCbJrjXkGKX66U0ObgNLi5MA,1
UC2jdyvytsonG1o34siiNVTg,1
UCiaIlgsETdq0Kv3PEcPLPBw,1
UCuebilnjyB54dEl6fDolbHQ,1


In [63]:
mobile_arcade_comments[mobile_arcade_comments['Commenter'] == 'UC3vyPyt-dbpmhn_KSMiaIRg']

Unnamed: 0,Video ID,Published Date,Comment,Channel ID,Like Count,Commenter,Comment Date
5,dt0tG_23qVw,2024-10-10T14:17:53Z,🤣🤣🤣🤣🤣🤣🤣,UChHZWnj7E6EzQPhlp62U2Gw,2,UC3vyPyt-dbpmhn_KSMiaIRg,2024-10-10 21:06:44
17,x2zJMFY8fEU,2024-10-04T14:00:57Z,🤣,UChHZWnj7E6EzQPhlp62U2Gw,0,UC3vyPyt-dbpmhn_KSMiaIRg,2024-10-04 19:33:15
20,XVAQrfTF5QM,2024-09-29T13:00:29Z,🤣,UChHZWnj7E6EzQPhlp62U2Gw,1,UC3vyPyt-dbpmhn_KSMiaIRg,2024-09-29 16:06:13
28,e3GutAsXMS0,2024-09-24T13:30:20Z,🤣🤣🤣,UChHZWnj7E6EzQPhlp62U2Gw,2,UC3vyPyt-dbpmhn_KSMiaIRg,2024-09-24 21:58:28
33,9Zg9CWUTWM8,2024-09-21T14:59:59Z,🤣,UChHZWnj7E6EzQPhlp62U2Gw,1,UC3vyPyt-dbpmhn_KSMiaIRg,2024-09-21 16:34:30


### Sam Tabor Gaming

In [53]:
channel_name = 'Sam Tabor Gaming'
sam_tabor_comments = get_comments_on_latest_videos(channel_name)
sam_tabor_comments.to_csv(f'{channel_name}_comments.csv', index=False)

In [55]:
sam_tabor_comments['Commenter'].value_counts()

Unnamed: 0_level_0,count
Commenter,Unnamed: 1_level_1
UC4iWMYh64GvXMo_JLAC3rGg,7
UCjjwl--Xau6vkg-cLv46zQA,4
UCu3EUnqwYD9DPBq2bG9U1aA,4
UCjSWc6LxTQPpMUylsL7kHDw,3
UCrT_GLjo5oCYRr0JOXEAbtg,3
...,...
UCg6tIsBAXdvJAkYaORyZnBg,1
UCA2FDoOaH3WHQjDyW0uSP_A,1
UCnPUk53KURS9AAbmWUa4qiw,1
UCkztnY5R7Njj4fAFivQz8xA,1


In [56]:
sam_tabor_comments[sam_tabor_comments['Commenter'] == 'UC4iWMYh64GvXMo_JLAC3rGg']

Unnamed: 0,Video ID,Published Date,Comment,Channel ID,Like Count,Commenter,Comment Date
20,T48vHm1xIdU,2024-10-18T19:00:14Z,Amazing VIDEO,UCWp3_e0cQvBHVpb_pTge5FQ,0,UC4iWMYh64GvXMo_JLAC3rGg,2024-10-18 19:28:37
47,nFIu6F_U5i8,2024-10-17T19:00:33Z,This is an amazing video so far. This is goin...,UCWp3_e0cQvBHVpb_pTge5FQ,0,UC4iWMYh64GvXMo_JLAC3rGg,2024-10-17 19:02:48
84,nFIu6F_U5i8,2024-10-17T19:00:33Z,Second?,UCWp3_e0cQvBHVpb_pTge5FQ,0,UC4iWMYh64GvXMo_JLAC3rGg,2024-10-17 19:01:16
97,5vMGppsBZ-4,2024-10-16T19:00:02Z,Literal superman. Love the videos,UCWp3_e0cQvBHVpb_pTge5FQ,0,UC4iWMYh64GvXMo_JLAC3rGg,2024-10-16 19:20:07
110,5vMGppsBZ-4,2024-10-16T19:00:02Z,Amazing video,UCWp3_e0cQvBHVpb_pTge5FQ,0,UC4iWMYh64GvXMo_JLAC3rGg,2024-10-16 19:20:21
147,OgG2tj92_EA,2024-10-14T19:00:22Z,I know how much you like skateboarding and tha...,UCWp3_e0cQvBHVpb_pTge5FQ,1,UC4iWMYh64GvXMo_JLAC3rGg,2024-10-14 19:24:35
161,OgG2tj92_EA,2024-10-14T19:00:22Z,Love the videos,UCWp3_e0cQvBHVpb_pTge5FQ,0,UC4iWMYh64GvXMo_JLAC3rGg,2024-10-14 19:24:46


#### ChilledChaosGAME

In [64]:
channel_name = 'ChilledChaosGAME'
chilled_chaos_comments = get_comments_on_latest_videos(channel_name)
chilled_chaos_comments.to_csv(f'{channel_name}_comments.csv', index=False)

In [66]:
chilled_chaos_comments['Commenter'].value_counts()

Unnamed: 0_level_0,count
Commenter,Unnamed: 1_level_1
UCpWGxYCGeE-bJ8oHdcD7hpQ,4
UCS3Orb8JKMcgOZVZ8y58Wyg,3
UCDC38ojkKtkZOpCvcnFDf7w,3
UCmy1iqodoUGdXAH9A1VEqpA,3
UCo_BQE9TdAVETdloUb1z6xg,3
...,...
UCvv27ZZ6xi_Xs5pTncKVC5g,1
UCSO-g4rD_7Qn56T4q9wekBw,1
UC0NyL_rjcncklWOeBQBukrg,1
UCbCYZspz8gKGEGm8uQlSQDQ,1


In [68]:
chilled_chaos_comments[chilled_chaos_comments['Commenter'] == 'UCpWGxYCGeE-bJ8oHdcD7hpQ']

Unnamed: 0,Video ID,Published Date,Comment,Channel ID,Like Count,Commenter,Comment Date
50,9bq1eEhsVSc,2024-10-18T00:03:44Z,"""I woke up early so I wouldn't start drinking ...",UCpmu4uEZ8XcPjHdHh7_zFOg,3,UCpWGxYCGeE-bJ8oHdcD7hpQ,2024-10-18 15:13:30
165,6vKPdxnUojA,2024-10-15T00:26:30Z,In liar's dice there's roughly an 80% chance t...,UCpmu4uEZ8XcPjHdHh7_zFOg,1,UCpWGxYCGeE-bJ8oHdcD7hpQ,2024-10-15 17:41:26
178,6vKPdxnUojA,2024-10-15T00:26:30Z,27:29 is when Liars bar starts for anyone else...,UCpmu4uEZ8XcPjHdHh7_zFOg,1,UCpWGxYCGeE-bJ8oHdcD7hpQ,2024-10-15 17:10:44
190,4N-8lMO7Jas,2024-10-14T01:05:19Z,The game has good bones. Just needs copious am...,UCpmu4uEZ8XcPjHdHh7_zFOg,20,UCpWGxYCGeE-bJ8oHdcD7hpQ,2024-10-14 14:36:15
