<a href="https://colab.research.google.com/github/bramyeon/youtube-scraper/blob/main/youtube_scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# (Daewoong AI) 09-medical-trend: <b>YouTube Scraper</b>

Developed by [Bryan Nathanael Wijaya](mailto:bramyeon@gmail.com)  
Contact me for inquiries or bug reports 🙌

In [None]:
#@title Setting up prerequisites

from google.colab import files
from googleapiclient.discovery import build
from datetime import datetime, timedelta
from tqdm import tqdm
import pandas as pd

API_KEY = "insert your API key here" #@param {type:"string"}
youtube = build('youtube', 'v3', developerKey=API_KEY)

def get_videos(keyword, weeks=2, min_view_count=100000):
    publishedAfter = (datetime.now() - timedelta(weeks=weeks)).isoformat() + "Z"

    videos = []
    next_page_token = None

    print(f"Checking all videos with keyword `{keyword}` published within {weeks} weeks, regardless of view counts...")
    while True:
        try:
            search_response = youtube.search().list(
                q=keyword,
                part='snippet',
                type='video',
                publishedAfter=publishedAfter,
                maxResults=50,
                pageToken=next_page_token
            ).execute()

            video_ids = []
            for item in search_response['items']:
                video_ids.append(item['id']['videoId'])

            video_response = youtube.videos().list(
                part='snippet,statistics',
                id=','.join(video_ids)
            ).execute()

            for video in tqdm(video_response['items']):
                view_count = int(video['statistics'].get('viewCount', 0))
                if view_count >= min_view_count:
                    video_data = {
                        'url': f"https://www.youtube.com/watch?v={video['id']}",
                        'title': video['snippet']['title'],
                        'author': video['snippet']['channelTitle'],
                        'view_count': view_count,
                        'like_count': int(video['statistics'].get('likeCount', 0)),
                        'comment_count': int(video['statistics'].get('commentCount', 0)),
                        'description': video['snippet']['description'],
                        'upload_date': video['snippet']['publishedAt'],
                        'video_id': video['id']
                    }
                    videos.append(video_data)

            next_page_token = search_response.get('nextPageToken')
            if not next_page_token:
                break
        except:
            break

    return videos

In [None]:
#@title YouTube Scraping

keyword = "diabetes" #@param {type:"string"}
published_within_in_weeks = 2 #@param {type:"integer"}
minimum_view_count = 100000 #@param {type:"integer"}
save_name = "scraping.csv" #@param {type:"string"}

videos = get_videos(keyword, published_within_in_weeks, minimum_view_count)
df = pd.DataFrame(videos)

save_name = (save_name + '.csv').replace('.csv.csv', '.csv')
print(f"\n\nScraping completed!\nSaving YouTube scraping results as `{save_name}` and downloading...")
df.to_csv(save_name, index=False)
files.download(save_name)