# üß± 1Ô∏è‚É£ Configuraci√≥n

In [None]:
import os
from datetime import datetime, timezone
import pandas as pd
from google.cloud import bigquery
from googleapiclient.discovery import build
from dotenv import load_dotenv

load_dotenv()

YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
CHANNEL_ID = os.getenv("YOUTUBE_CHANNEL_ID")
PROJECT_ID = os.getenv("GCP_PROJECT")
DATASET_ID = "angelgarciadatablog"
TABLE_ID = "latest_videos_current"

FULL_TABLE_ID = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
client = bigquery.Client(project=PROJECT_ID)


# üì° 2Ô∏è‚É£ Extract ‚Äî traer √∫ltimos 5 videos

In [None]:
search_response = youtube.search().list(
    part="snippet",
    channelId=CHANNEL_ID,
    order="date",
    maxResults=5,
    type="video"
).execute()


In [None]:
video_ids = [
    item["id"]["videoId"]
    for item in search_response["items"]
]

len(video_ids)


In [None]:
len(search_response["items"])

# üìä 3Ô∏è‚É£ M√©tricas en una sola llamada

In [None]:
videos_response = youtube.videos().list(
    part="snippet,statistics",
    id=",".join(video_ids)
).execute()

# üîÑ 4Ô∏è‚É£ Transform a DataFrame

In [None]:
rows = []

for item in videos_response["items"]:
    snippet = item["snippet"]
    stats = item["statistics"]

    rows.append({
        "video_id": item["id"],
        "title": snippet["title"],
        "published_at": snippet["publishedAt"],
        "thumbnail_url": snippet["thumbnails"]["high"]["url"],
        "video_url": f"https://www.youtube.com/watch?v={item['id']}",
        "view_count": int(stats.get("viewCount", 0)),
        "like_count": int(stats.get("likeCount", 0)),
        "comment_count": int(stats.get("commentCount", 0)),
        "extracted_at": datetime.now(timezone.utc)
    })

df_latest = pd.DataFrame(rows)

df_latest["published_at"] = pd.to_datetime(df_latest["published_at"], utc=True)

df_latest


In [None]:
df_latest.dtypes

In [None]:
df_latest.shape

# üîÑ 5 Load to BIg query

In [None]:
PROJECT_ID = os.getenv("GCP_PROJECT")
DATASET_ID = "angelgarciadatablog"
TABLE_ID = "latest_videos_current"

FULL_TABLE_ID = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

client = bigquery.Client(project=PROJECT_ID)

## Definir esquema expl√≠cito 

In [None]:
schema = [
    bigquery.SchemaField("video_id", "STRING"),
    bigquery.SchemaField("title", "STRING"),
    bigquery.SchemaField("published_at", "TIMESTAMP"),
    bigquery.SchemaField("thumbnail_url", "STRING"),
    bigquery.SchemaField("video_url", "STRING"),
    bigquery.SchemaField("view_count", "INT64"),
    bigquery.SchemaField("like_count", "INT64"),
    bigquery.SchemaField("comment_count", "INT64"),
    bigquery.SchemaField("extracted_at", "TIMESTAMP"),
]


## Crear tabla (si no existe)

In [None]:
table = bigquery.Table(FULL_TABLE_ID, schema=schema)

client.create_table(table, exists_ok=True)

print("Tabla latest_videos_current lista.")

In [None]:
df_latest["published_at"] = pd.to_datetime(df_latest["published_at"], utc=True)
df_latest["extracted_at"] = pd.to_datetime(df_latest["extracted_at"], utc=True)

df_latest.dtypes

## Cargar con WRITE_TRUNCATE

In [None]:
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE"
)

job = client.load_table_from_dataframe(
    df_latest,
    FULL_TABLE_ID,
    job_config=job_config
)

job.result()

print("latest_videos_current actualizado correctamente.")
