# Fetch Raw Data From Youtube

In [2]:
# Helper libraries
import warnings

# Scientific and visual libraries
import pandas as pd
import googleapiclient.errors

%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

# Various settings
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_colwidth", 40)
pd.set_option("display.precision", 4)
pd.set_option("display.max_columns", None)

In [3]:
# Let's define a simple ETL process

from youtube_analysis.data import fetch_youtube_comments
import youtube_analysis.config as config


def fetch_batch_data_from_video(id, limit=3000):
    api_service_name = "youtube"
    api_version = "v3"
    api_key = config.YOUTUBE_API_KEY
    try:
        ytb_df = pd.DataFrame(
            fetch_youtube_comments(
                "snippet", id, limit, api_service_name, api_version, api_key
            ),
            columns=["author", "published_at", "updated_at", "likes", "text"],
        )
    except googleapiclient.errors.HttpError as e:
        print(f"Error {e.resp.status} occurred while fetching data:\n{e.content}")
    return ytb_df

In [4]:
lex_comments = fetch_batch_data_from_video(id="NXU_M4030nE")

In [5]:
lex_comments.head()

Unnamed: 0,author,published_at,updated_at,likes,text
0,Lex Fridman,2022-12-29T17:34:04Z,2022-12-29T17:34:04Z,194,Here are the timestamps. Please chec...
1,Steve C,2023-11-10T22:16:58Z,2023-11-10T22:16:58Z,0,😳The Plants are Fake…?
2,Brigid Mary Prain,2023-11-09T09:51:33Z,2023-11-09T09:51:33Z,0,Responding to your pushback on the p...
3,Jack Reacher,2023-11-04T09:30:38Z,2023-11-04T09:30:38Z,0,"<a href=""https://www.youtube.com/wat..."
4,Oskar Gudnason,2023-11-02T21:47:54Z,2023-11-03T12:18:47Z,0,Don´t you forget that the ape comes ...


In [6]:
from youtube_analysis.paths import RAW_DATA_DIR

lex_comments.to_pickle(RAW_DATA_DIR / "lex_comments.pkl")