#### Channel Data

In [None]:
import pandas as pd

file_path = '__mini___raw_df_channels_100k.tsv'

df_channel = pd.read_csv(file_path, sep='\t')

df_education_channel = df_channel[df_channel['category_cc'] == 'Education']

print(df_education_channel.head())

   category_cc   join_date                   channel                  name_cc  \
20   Education  2015-06-29  UCltVEZ6GecWntoZ19FvaWhQ                   Ekeeda   
54   Education  2015-01-06  UCtJ9PC1Llj4f2DWPEz8utTA  Manchester Twp Schoo...   
60   Education  2010-04-20  UC9NTBQja_r2NI5e56PvuSAA           Daniel Kreuter   
78   Education  2015-01-30  UC4db8x07Kr0LQODjjen6w7A            Taylor Allard   
95   Education  2011-11-01  UCQFUt_0q3piEMaxJtOsz0fQ           Taylor Fuentes   

    subscribers_cc  videos_cc  subscriber_rank_sb  
20          513000       6240             32492.0  
54           10300        102            920473.0  
60           56700        250            253163.0  
78           14600         88            744777.0  
95           48500        343                 NaN  


#### YouTube Data API to get the Country info of a channel

In [None]:
# pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client

### Test the API

In [None]:
from googleapiclient.discovery import build

# API key
api_key = '##TODO: Replace your API KEY'
youtube = build('youtube', 'v3', developerKey=api_key)

# Channel ID
channel_id = 'UCBJuEqXfXTdcPSbGO9qqn1g'  

# The info we want
request = youtube.channels().list(
    part='snippet,contentDetails,statistics,brandingSettings',
    id=channel_id
)
response = request.execute()

# Channel info
for item in response['items']:
    print('Channel:', item['snippet']['title'])
    print('Subscribes:', item['statistics']['subscriberCount'])
    print('Views:', item['statistics']['viewCount'])
    if 'country' in item['brandingSettings']['channel']:
        print('Country:', item['brandingSettings']['channel']['country'])
    else:
        print('No Country Info in this channel')

Channel: Stop War In Ukraine
Subscribes: 66500
Views: 20727962
Country: UA


### Now add country info to our data

In [None]:
import pandas as pd
from googleapiclient.discovery import build
import time

# API key
api_key = '##TODO: Replace your API KEY'
youtube = build('youtube', 'v3', developerKey=api_key)
# Batch processing: The YouTube we can process 50 rows of data for each request
batch_size = 50

output_file = 'df_education_channel_with_country.csv'

# If we already have a result, read it
try:
    df_result = pd.read_csv(output_file)
    processed_ids = set(df_result['channel'])
except FileNotFoundError:
    df_result = pd.DataFrame()
    processed_ids = set()

# Process the channel without country data 
for i in range(0, len(df_education_channel), batch_size):
    batch = df_education_channel.iloc[i:i+batch_size]
    channel_ids = batch['channel']
    
    # Filter out the data that has already been processed 
    channel_ids = [ch_id for ch_id in channel_ids if ch_id not in processed_ids]
    
    if not channel_ids:
        continue
    
    # API requset
    request = youtube.channels().list(
        part='brandingSettings',
        id=','.join(channel_ids)
    )
    response = request.execute()
    
    # Process the result
    country_data = []
    for item in response['items']:
        country = item.get('brandingSettings', {}).get('channel', {}).get('country', None)
        channel_id = item['id']
        country_data.append({'channel': channel_id, 'country': country})
    
    # Combine the result to DataFrame
    df_country = pd.DataFrame(country_data)
    batch = batch.merge(df_country, on='channel', how='left')
    df_result = pd.concat([df_result, batch])
    
    # We should save the file in the loop
    df_result.to_csv(output_file, index=False)
    processed_ids.update(channel_ids)
    time.sleep(0.3)  

print("All country data are finished")

### Channels other than education categories

In [None]:
import pandas as pd
from googleapiclient.discovery import build
import time

# API key
api_key = '##TODO: Replace your API KEY'
youtube = build('youtube', 'v3', developerKey=api_key)
# Batch processing: The YouTube we can process 50 rows of data for each request
batch_size = 50

output_file = 'df_other_channel_with_country.csv'

# If we already have a result, read it
try:
    df_result = pd.read_csv(output_file)
    processed_ids = set(df_result['channel'])
except FileNotFoundError:
    df_result = pd.DataFrame()
    processed_ids = set()

# Process the channel without country data 
for i in range(0, len(df_other_channel), batch_size):
    batch = df_other_channel.iloc[i:i+batch_size]
    channel_ids = batch['channel']
    
    # Filter out the data that has already been processed 
    channel_ids = [ch_id for ch_id in channel_ids if ch_id not in processed_ids]
    
    if not channel_ids:
        continue
    
    # API requset
    request = youtube.channels().list(
        part='brandingSettings',
        id=','.join(channel_ids)
    )
    response = request.execute()
    
    # Process the result
    country_data = []
    for item in response['items']:
        country = item.get('brandingSettings', {}).get('channel', {}).get('country', None)
        channel_id = item['id']
        country_data.append({'channel': channel_id, 'country': country})
    
    # Merge the result to DataFrame
    df_country = pd.DataFrame(country_data)
    batch = batch.merge(df_country, on='channel', how='left')
    df_result = pd.concat([df_result, batch])
    
    # We should save the file in the loop
    df_result.to_csv(output_file, index=False)
    processed_ids.update(channel_ids)

print("Finished all the country data")

Finished all the country data


### Test on the Comment Data

In [17]:
from googleapiclient.discovery import build

api_key = 'AIzaSyCvuOGIvyZshbSCwsDpZF0WQhuGKOixHhI'
youtube = build('youtube', 'v3', developerKey=api_key)

video_id = 'YrHlHbtiSM0'
request = youtube.commentThreads().list(
    part='snippet',
    videoId=video_id,
    maxResults=100,
    order='relevance'
)

comments = []

# Get all comment data (pages)
while request:
    response = request.execute()
    
    # Get Comment Data
    for item in response['items']:
        comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
        author = item['snippet']['topLevelComment']['snippet']['authorDisplayName']
        like_count = item['snippet']['topLevelComment']['snippet']['likeCount']
        published_at = item['snippet']['topLevelComment']['snippet']['publishedAt']
        comments.append({
            'author': author,
            'comment': comment,
            'like_count': like_count,
            'published_at': published_at
        })
    
    # Next Page or not
    request = youtube.commentThreads().list_next(request, response)

for comment in comments:
    print(f"Author: {comment['author']}")
    print(f"Comment: {comment['comment']}")
    print(f"Likes: {comment['like_count']}")
    print(f"Published_Date: {comment['published_at']}")
    print("\n" + "-"*50 + "\n")

Author: @mitocw
Comment: For links to Professors Strang’s related courses on OCW, visit the Related Resources page on the full resource site: <a href="https://ocw.mit.edu/2020-vision">https://ocw.mit.edu/2020-vision</a>.
Likes: 167
Published_Date: 2020-05-06T14:58:56Z

--------------------------------------------------

Author: @Jason-ke4jf
Comment: I emailed him when I saw a small mistake in the homework in 18.06. He was so gracious and responded almost immediately. A living legend!
Likes: 2168
Published_Date: 2020-05-05T13:14:52Z

--------------------------------------------------

Author: @Soapluvva
Comment: I attended MIT from September 1971 to May 1975, lived in A-Entry in MacGregor House, and double majored in Math and Management.  Prof. Strang was my professor for Linear Algebra.  I was already familiar with matrices and determinants from Intermediate Algebra and Modern Math in high school.  I can tell you that Prof. Strang was a phenomenal prof.  I always paid attention in his 

In [18]:
comments

[{'author': '@mitocw',
  'comment': 'For links to Professors Strang’s related courses on OCW, visit the Related Resources page on the full resource site: <a href="https://ocw.mit.edu/2020-vision">https://ocw.mit.edu/2020-vision</a>.',
  'like_count': 167,
  'published_at': '2020-05-06T14:58:56Z'},
 {'author': '@Jason-ke4jf',
  'comment': 'I emailed him when I saw a small mistake in the homework in 18.06. He was so gracious and responded almost immediately. A living legend!',
  'like_count': 2168,
  'published_at': '2020-05-05T13:14:52Z'},
 {'author': '@Soapluvva',
  'comment': 'I attended MIT from September 1971 to May 1975, lived in A-Entry in MacGregor House, and double majored in Math and Management.  Prof. Strang was my professor for Linear Algebra.  I was already familiar with matrices and determinants from Intermediate Algebra and Modern Math in high school.  I can tell you that Prof. Strang was a phenomenal prof.  I always paid attention in his class and received an &quot;A.&quo

In [20]:
from googleapiclient.discovery import build

api_key = 'AIzaSyCvuOGIvyZshbSCwsDpZF0WQhuGKOixHhI'
youtube = build('youtube', 'v3', developerKey=api_key)

video_id = 'PFDu9oVAE-g'

request = youtube.videos().list(
    part='snippet',
    id=video_id
)
response = request.execute()

for item in response['items']:
    title = item['snippet']['title']
    tags = item['snippet'].get('tags', [])
    print(f"title: {title}")
    print("tag:", tags)

title: Intro: A New Way to Start Linear Algebra
tag: []


In [23]:
from googleapiclient.discovery import build

api_key = 'AIzaSyCvuOGIvyZshbSCwsDpZF0WQhuGKOixHhI'
youtube = build('youtube', 'v3', developerKey=api_key)

video_id = '9MCjyQSRmR8'

request = youtube.videos().list(
    part='snippet',
    id=video_id
)
response = request.execute()

for item in response['items']:
    title = item['snippet']['title']
    tags = item['snippet'].get('tags', [])
    print(f"title: {title}")
    print("tag:", tags)

title: Intro - Linear Algebra
tag: []


In [25]:
import pandas as pd
df_video = pd.read_csv('video_with_channelcountry.csv')



  df_video = pd.read_csv('video_with_channelcountry.csv')


In [55]:
import random
subset = df_video.loc[random.sample((range(len(df_video))),100)]

In [56]:
a = subset['title']
a.to_csv('a.csv',index=False)