# üß± 1Ô∏è‚É£ Configuraci√≥n

In [2]:
import os
from datetime import datetime, timezone
import pandas as pd
from google.cloud import bigquery
from googleapiclient.discovery import build
from dotenv import load_dotenv

load_dotenv()

YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
CHANNEL_ID = os.getenv("YOUTUBE_CHANNEL_ID")
PROJECT_ID = os.getenv("GCP_PROJECT")
DATASET_ID = "angelgarciadatablog"
TABLE_ID = "latest_videos_current"

FULL_TABLE_ID = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
client = bigquery.Client(project=PROJECT_ID)


# üì° 2Ô∏è‚É£ Extract ‚Äî traer √∫ltimos 5 videos

In [3]:
search_response = youtube.search().list(
    part="snippet",
    channelId=CHANNEL_ID,
    order="date",
    maxResults=5,
    type="video"
).execute()


In [7]:
video_ids = [
    item["id"]["videoId"]
    for item in search_response["items"]
]

len(video_ids)


5

In [8]:
len(search_response["items"])

5

# üìä 3Ô∏è‚É£ M√©tricas en una sola llamada

In [9]:
videos_response = youtube.videos().list(
    part="snippet,statistics",
    id=",".join(video_ids)
).execute()

# üîÑ 4Ô∏è‚É£ Transform a DataFrame

In [10]:
rows = []

for item in videos_response["items"]:
    snippet = item["snippet"]
    stats = item["statistics"]

    rows.append({
        "video_id": item["id"],
        "title": snippet["title"],
        "published_at": snippet["publishedAt"],
        "thumbnail_url": snippet["thumbnails"]["high"]["url"],
        "video_url": f"https://www.youtube.com/watch?v={item['id']}",
        "view_count": int(stats.get("viewCount", 0)),
        "like_count": int(stats.get("likeCount", 0)),
        "comment_count": int(stats.get("commentCount", 0)),
        "extracted_at": datetime.now(timezone.utc)
    })

df_latest = pd.DataFrame(rows)

df_latest["published_at"] = pd.to_datetime(df_latest["published_at"], utc=True)

df_latest


Unnamed: 0,video_id,title,published_at,thumbnail_url,video_url,view_count,like_count,comment_count,extracted_at
0,xB4ecIksJSY,4. Git Push - Actualizar cambios de un reposit...,2026-01-24 12:04:21+00:00,https://i.ytimg.com/vi/xB4ecIksJSY/hqdefault.jpg,https://www.youtube.com/watch?v=xB4ecIksJSY,37,1,0,2026-02-15 09:26:15.561338+00:00
1,7bwkNrRpgw0,Modo oscuro - Big query - Google Cloud,2026-01-23 06:52:23+00:00,https://i.ytimg.com/vi/7bwkNrRpgw0/hqdefault.jpg,https://www.youtube.com/watch?v=7bwkNrRpgw0,19,2,0,2026-02-15 09:26:15.561350+00:00
2,HDyKUodeuNw,Como seleccionar valores iguales en SQL,2026-01-23 06:43:39+00:00,https://i.ytimg.com/vi/HDyKUodeuNw/hqdefault.jpg,https://www.youtube.com/watch?v=HDyKUodeuNw,11,1,0,2026-02-15 09:26:15.561354+00:00
3,Zj6uiqMvFOU,3. C√≥mo funciona el bucle For en python - Repa...,2026-01-17 20:07:55+00:00,https://i.ytimg.com/vi/Zj6uiqMvFOU/hqdefault.jpg,https://www.youtube.com/watch?v=Zj6uiqMvFOU,19,1,0,2026-02-15 09:26:15.561358+00:00
4,RiYjYfMTGvw,2. Qu√© es el PATH en la instalaci√≥n de python,2026-01-11 23:25:57+00:00,https://i.ytimg.com/vi/RiYjYfMTGvw/hqdefault.jpg,https://www.youtube.com/watch?v=RiYjYfMTGvw,16,0,0,2026-02-15 09:26:15.561362+00:00


In [11]:
df_latest.dtypes

video_id                         str
title                            str
published_at     datetime64[us, UTC]
thumbnail_url                    str
video_url                        str
view_count                     int64
like_count                     int64
comment_count                  int64
extracted_at     datetime64[us, UTC]
dtype: object

In [12]:
df_latest.shape

(5, 9)

# üîÑ 5 Load to BIg query

In [13]:
PROJECT_ID = os.getenv("GCP_PROJECT")
DATASET_ID = "angelgarciadatablog"
TABLE_ID = "latest_videos_current"

FULL_TABLE_ID = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

client = bigquery.Client(project=PROJECT_ID)

## Definir esquema expl√≠cito 

In [14]:
schema = [
    bigquery.SchemaField("video_id", "STRING"),
    bigquery.SchemaField("title", "STRING"),
    bigquery.SchemaField("published_at", "TIMESTAMP"),
    bigquery.SchemaField("thumbnail_url", "STRING"),
    bigquery.SchemaField("video_url", "STRING"),
    bigquery.SchemaField("view_count", "INT64"),
    bigquery.SchemaField("like_count", "INT64"),
    bigquery.SchemaField("comment_count", "INT64"),
    bigquery.SchemaField("extracted_at", "TIMESTAMP"),
]


## Crear tabla (si no existe)

In [15]:
table = bigquery.Table(FULL_TABLE_ID, schema=schema)

client.create_table(table, exists_ok=True)

print("Tabla latest_videos_current lista.")

Tabla latest_videos_current lista.


In [16]:
df_latest["published_at"] = pd.to_datetime(df_latest["published_at"], utc=True)
df_latest["extracted_at"] = pd.to_datetime(df_latest["extracted_at"], utc=True)

df_latest.dtypes

video_id                         str
title                            str
published_at     datetime64[us, UTC]
thumbnail_url                    str
video_url                        str
view_count                     int64
like_count                     int64
comment_count                  int64
extracted_at     datetime64[us, UTC]
dtype: object

## Cargar con WRITE_TRUNCATE

In [17]:
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE"
)

job = client.load_table_from_dataframe(
    df_latest,
    FULL_TABLE_ID,
    job_config=job_config
)

job.result()

print("latest_videos_current actualizado correctamente.")




latest_videos_current actualizado correctamente.
