In [None]:
# 1. Importar librerías - Empieza la vaina
import requests, pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, time
from datetime import datetime
sns.set_style('whitegrid')
%matplotlib inline
print("Todo listo")

In [None]:
# 2. Buscar cientos de series con paginación
def buscar_series(paginas=50):
    series = []
    for p in range(1, paginas+1):
        print(f"Página {p}", end="\r")
        url = f"https://www.episodate.com/api/search?q=show&page={p}"
        try:
            data = requests.get(url).json().get('tv_shows', [])
            if not data: break
            series.extend(data)
            time.sleep(0.2)
        except: continue
    print(f"\nSeries encontradas: {len(series)}")
    return series

series_lista = buscar_series()

In [None]:
# 3. Obtener detalles completos de una serie
def detalles_serie(show_id):
    url = f"https://www.episodate.com/api/show-details?q={show_id}"
    try:
        r = requests.get(url, timeout=10)
        time.sleep(0.35)
        return r.json().get('tvShow', {})
    except:
        return {}

In [None]:
# 4. Procesar todas las series 
shows_info = []
todos_episodios = []

for i, s in enumerate(series_lista):
    if i % 20 == 0:
        print(f"{i}/{len(series_lista)} → {len(todos_episodios)} episodios")
    
    info = detalles_serie(s['id'])
    if not info: continue
    
    shows_info.append({
        'id': info.get('id'),
        'name': info.get('name'),
        'status': info.get('status'),
        'network': info.get('network'),
        'country': info.get('country'),
        'rating': info.get('rating'),
        'genres': ', '.join(info.get('genres', [])),
        'total_episodes': len(info.get('episodes', []))
    })
    
    for ep in info.get('episodes', []):
        ep['show_id'] = s['id']
        ep['show_name'] = s['name']
    todos_episodios.extend(info.get('episodes', []))

print(f"\n¡Terminado! {len(shows_info)} series | {len(todos_episodios)} episodios")

In [None]:
# 5. Guardar CSV 1: Info de series
df_shows = pd.DataFrame(shows_info)
df_shows.to_csv("01_shows_master.csv", index=False)
print("Guardado 01_shows_master.csv")

In [None]:
# 6. Guardar CSV 2: Todos los episodios
df_eps = pd.DataFrame(todos_episodios)
df_eps.to_csv("02_episodios_raw.csv", index=False)
print("Guardado 02_episodios_raw.csv")

In [None]:
# 7. Cargar y mergear
shows = pd.read_csv("01_shows_master.csv")
eps = pd.read_csv("02_episodios_raw.csv")

eps['air_date'] = pd.to_datetime(eps['air_date'], errors='coerce')
eps = eps.dropna(subset=['air_date'])

df = eps.merge(shows[['id','network','country','status','rating','genres']], 
                left_on='show_id', right_on='id', how='left')
df.drop('id', axis=1, inplace=True)

df['year'] = df['air_date'].dt.year
print(f"Dataset final: {df.shape[0]} episodios enriquecidos")