In [17]:
# google_trends_scrape_save.py
from pytrends.request import TrendReq
import pandas as pd
import numpy as np
import os, time, re
import warnings
warnings.filterwarnings("ignore")

# ==== Grug settings ====
TERMS = [
    "Cards Against Humanity",
    "Baby Einstein Toy",
    "Exploding Kittens",
    "baby stacking cups",
    "dyson vacuum toy",
    "what do you meme",
    "stuffed elephant",
    "jenga",
    "automatic card shuffler",
    "princess castle tent",
    "magnet tiles",
    "pokemon cards",
    "five crowns",
    "gumby toy",
    "ticket to ride",
    "sequence game",
    "left right center",
    "connect 4",
    "drone toy",
    "monopoly deal",
    "little tikes basketball",
    "suspend game",
    "Ultimate kitchen playset",
    "Play kitchen",
    "Kid roller coaster",
    "HD drone",
    "4k drone",
    "Polar express toy train",
    "BB-8 Toy",
    "Mini John Deere",
    "Magna Tiles",
    "Remote control monster truck",
    "GPS drone",
    "Camera drone",
    "Vector robot",
    "Mini bounce house",
    "Jumbo doll house",
    "Mini play kitchen"
]

TIMEFRAME = "2004-01-01 2025-11-01"   # small range (e.g., last 90 days) gives daily; long range gives weekly
OUT_DIR = "trends_out"                # folder will be made if not exist
SLEEP_SEC = 1.5                       # be nice to magic rock; can bump to 3â€“5 if rate-limited
RETRIES = 3

# ==== small helpers ====
def safe_name(s: str) -> str:
    s = s.strip().lower().replace("&", "and").replace("+", "plus")
    s = re.sub(r"[^a-z0-9]+", "_", s)
    return s.strip("_")

def infer_windows(index: pd.DatetimeIndex):
    """Guess if series is daily or weekly. Set window sizes accordingly."""
    if len(index) < 3:
        # default to weekly if tiny
        return {"month": 4, "sixmo": 24, "year": 52}
    diffs = pd.Series(index[1:]).reset_index(drop=True) - pd.Series(index[:-1]).reset_index(drop=True)
    step_days = diffs.median().days if len(diffs) else 7
    # daily if ~1 day cadence
    if step_days <= 2:
        return {"month": 30, "sixmo": 182, "year": 365}  # ~calendar days
    else:
        return {"month": 4, "sixmo": 24, "year": 52}     # ~weeks

def add_ratios(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
    df = df.copy()
    df.index = pd.to_datetime(df.index)
    wins = infer_windows(df.index)
    # Rolling with min_periods=window so ratios start when stable
    m_mean  = df[value_col].rolling(wins["month"],  min_periods=wins["month"]).mean()
    s6_mean = df[value_col].rolling(wins["sixmo"],  min_periods=wins["sixmo"]).mean()
    y_mean  = df[value_col].rolling(wins["year"],   min_periods=wins["year"]).mean()
    df["ratio_month"] = df[value_col] / m_mean
    df["ratio_6mo"]   = df[value_col] / s6_mean
    df["ratio_year"]  = df[value_col] / y_mean
    return df

def fetch_one(pytrends: TrendReq, term: str, timeframe: str) -> pd.DataFrame:
    # retry loop (in case of 429 or hiccup)
    last_err = None
    for attempt in range(1, RETRIES + 1):
        try:
            pytrends.build_payload([term], timeframe=timeframe)
            df = pytrends.interest_over_time()
            return df
        except Exception as e:
            last_err = e
            wait = SLEEP_SEC * attempt
            time.sleep(wait)
    raise last_err


os.makedirs(OUT_DIR, exist_ok=True)
pytrends = TrendReq(hl="en-US", tz=360)

all_rows = []  # for big long table

for i, term in enumerate(TERMS, 1):
    print(f"[{i}/{len(TERMS)}] fetching: {term!r}")
    time.sleep(SLEEP_SEC)  # gentle pacing

    raw = fetch_one(pytrends, term, TIMEFRAME)

    if raw.empty:
        print(f"   -> empty for {term}; skip")
        continue

    # Normalize & clean
    term_col = term  # pytrends uses the exact term as column name
    if "isPartial" in raw.columns:
        raw = raw[~raw["isPartial"].astype(bool)].drop(columns=["isPartial"])  # drop partial last row
    raw.index = pd.to_datetime(raw.index)

    # Add ratios
    out = add_ratios(raw, term_col)
    out = out.rename(columns={term_col: "value"})
    out["term"] = term

    # Save per-term CSV
    fn = os.path.join(OUT_DIR, f"{safe_name(term)}.csv")
    out.to_csv(fn, index_label="date")
    print(f"   -> saved {fn} ({len(out):,} rows)")

    # Add to big long table
    all_rows.append(out.reset_index())

# Save big CSV (all terms stacked)
if all_rows:
    big = pd.concat(all_rows, ignore_index=True)
    big = big[["date", "term", "value", "ratio_month", "ratio_6mo", "ratio_year"]]
    big.sort_values(["term", "date"], inplace=True)
    big_fn = os.path.join(OUT_DIR, "all_terms_long.csv")
    big.to_csv(big_fn, index=False)
    print(f"\nALL DONE. Big file saved: {big_fn}  (rows: {len(big):,})")
else:
    print("No data fetched. Check TERMS or timeframe.")

[1/38] fetching: 'Cards Against Humanity'
   -> saved trends_out/cards_against_humanity.csv (263 rows)
[2/38] fetching: 'Baby Einstein Toy'
   -> saved trends_out/baby_einstein_toy.csv (263 rows)
[3/38] fetching: 'Exploding Kittens'
   -> saved trends_out/exploding_kittens.csv (263 rows)
[4/38] fetching: 'baby stacking cups'
   -> saved trends_out/baby_stacking_cups.csv (263 rows)
[5/38] fetching: 'dyson vacuum toy'
   -> saved trends_out/dyson_vacuum_toy.csv (263 rows)
[6/38] fetching: 'what do you meme'
   -> saved trends_out/what_do_you_meme.csv (263 rows)
[7/38] fetching: 'stuffed elephant'
   -> saved trends_out/stuffed_elephant.csv (263 rows)
[8/38] fetching: 'jenga'
   -> saved trends_out/jenga.csv (263 rows)
[9/38] fetching: 'automatic card shuffler'
   -> saved trends_out/automatic_card_shuffler.csv (263 rows)
[10/38] fetching: 'princess castle tent'
   -> saved trends_out/princess_castle_tent.csv (263 rows)
[11/38] fetching: 'magnet tiles'
   -> saved trends_out/magnet_tiles.