# YouTube API

### Prerequisites

In [None]:
from googleapiclient.discovery import build
import pandas as pd

### Set up API

In [None]:
youtube = build('youtube', 'v3', developerKey='ENTER YOUR KEY HERE')

### Search videos

In [None]:
query = 'is having a boyfriend embarrassing now'
search_response = youtube.search().list(
    q=query,
    part='id,snippet',
    maxResults=100,  # set the quota here
    type='video'
).execute()

video_ids = [item['id']['videoId'] for item in search_response['items']]

### Collect comments for each video

In [None]:
data = []

for vid in video_ids:
    # Video metadata
    video_response = youtube.videos().list(
        part='snippet,statistics',
        id=vid
    ).execute()
    
    video_info = video_response['items'][0]
    video_title = video_info['snippet']['title']
    video_url = f"https://www.youtube.com/watch?v={vid}"
    video_description = video_info['snippet']['description']
    video_date = video_info['snippet']['publishedAt']
    
    # Top-level comments (no replies)
    request = youtube.commentThreads().list(
        part='snippet',
        videoId=vid,
        maxResults=100, # max allowed per page
        textFormat='plainText'
    )
    
    while request:
        response = request.execute()
        for item in response.get('items', []):
            top_comment = item['snippet']['topLevelComment']
            data.append({
                'video_id': vid,
                'video_title': video_title,
                'video_url': video_url,
                'video_description': video_description,
                'video_date': video_date,
                'comment': top_comment['snippet']['textDisplay'],
                'comment_likes': top_comment['snippet'].get('likeCount', 0),
                'comment_date': top_comment['snippet']['publishedAt']
            })
        request = youtube.commentThreads().list_next(request, response)

### Save to CSV

In [None]:
df = pd.DataFrame(data)
df.to_csv("youtube_comments.csv", index=False)

# Scrape the Vogue article
Article: https://www.vogue.com/article/is-having-a-boyfriend-embarrassing-now
### Prerequisites

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

### Set up the scraper

`BeautifulSoup` documentation: https://beautiful-soup-4.readthedocs.io/en/latest/

In [None]:
# Parse the html
soup = BeautifulSoup(resp.text, "html.parser")

In [None]:
# Define the URL
url = "https://www.vogue.com/article/is-having-a-boyfriend-embarrassing-now"

In [None]:
# Download the Vogue webpage
resp = requests.get(url)
resp.raise_for_status()

### Extract metadata

In [None]:
# Title
title = soup.find("h1").get_text(strip=True)

# Date
time_tag = soup.find("time") # sometimes publication date is in <time> tag
pub_date = time_tag['datetime'] if time_tag else None

# Author
author_tag = soup.find("meta", {"name":"author"})
author = author_tag['content'] if author_tag else None

# Article body
body_div = soup.find("div", {"data-analytics-component":"article-body"})  
# fallback to generic <article>
if body_div is None:
    body_div = soup.find("article")
paragraphs = [p.get_text(strip=True) for p in body_div.find_all("p")]

body_text = "\n\n".join(paragraphs)

print("Title:", title)
print("Date:", pub_date)
print("Author:", author)
print("Body:", body_text[:500], "...")  # print first 500 chars

### Save to CSV

In [None]:
df = pd.DataFrame([{
    "platform": "Vogue",
    "article_url": url,
    "title": title,
    "pub_date": pub_date,
    "author": author,
    "text": body_text
}])

df.to_csv("vogue_article.csv", index=False)