In [1]:
pip install pytrends pandas numpy tqdm

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install "urllib3<2.0"


Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import numpy as np
import time
import random
from datetime import timedelta
from tqdm import tqdm
from pytrends.request import TrendReq


In [4]:
pytrends = TrendReq(
    hl="en-US",
    tz=360,
    retries=2,              # low retries, avoid hammering
    backoff_factor=0.1
)


In [5]:
suggestions = pytrends.suggestions("Israeli-Palestinian conflict")

topic_mid = [s["mid"] for s in suggestions if s["type"] == "Topic"][0]
topic_mid


'/m/0cj5y'

In [6]:
def polite_sleep(low=8, high=15):
    time.sleep(random.uniform(low, high))

def get_weekly_anchor(start, end):
    polite_sleep(10, 20)

    pytrends.build_payload(
        kw_list=[topic_mid],
        geo="US",
        timeframe=f"{start} {end}"
    )

    df = pytrends.interest_over_time()
    df = df.drop(columns=["isPartial"])
    df = df.rename(columns={topic_mid: "weekly"})
    return df

def get_daily_window(start, end):
    polite_sleep(8, 15)

    pytrends.build_payload(
        kw_list=[topic_mid],
        geo="US",
        timeframe=f"{start} {end}"
    )

    df = pytrends.interest_over_time()
    df = df.drop(columns=["isPartial"])
    df = df.rename(columns={topic_mid: "daily"})
    return df


In [None]:
weekly_parts = []

for y in range(2004, 2020, 3):
    try:
        print(f"Weekly anchor: {y}-{y+2}")
        part = get_weekly_anchor(f"{y}-01-01", f"{y+2}-12-31")
        weekly_parts.append(part)
    except Exception as e:
        print(f"Skipped weekly block {y}: {e}")
        continue

weekly = pd.concat(weekly_parts).sort_index()


Weekly anchor: 2004-2006
Weekly anchor: 2007-2009
Weekly anchor: 2010-2012
Weekly anchor: 2013-2015
Weekly anchor: 2016-2018
Weekly anchor: 2019-2021
Weekly anchor: 2022-2024
Weekly anchor: 2025-2027


  df = df.fillna(False)


In [8]:
start_date = pd.to_datetime("2004-01-01")
end_date   = pd.to_datetime("2026-01-01")

windows = []
current = start_date

while current < end_date:
    window_end = min(current + timedelta(days=89), end_date)
    windows.append((current, window_end))
    current = window_end + timedelta(days=1)


In [9]:
daily_scaled = []

for start, end in tqdm(windows):

    try:
        daily = get_daily_window(
            start.strftime("%Y-%m-%d"),
            end.strftime("%Y-%m-%d")
        )
    except Exception as e:
        print(f"Skipped {start}–{end}: {e}")
        continue

    if daily.empty:
        continue

    # Convert daily → weekly
    daily_weekly = daily.resample("W-SUN").mean()

    # Merge with anchor
    merged = daily_weekly.join(weekly, how="inner")

    if merged.empty:
        continue

    # Scaling factor
    scale = (merged["weekly"] / merged["daily"]).mean()

    daily["scaled"] = daily["daily"] * scale
    daily_scaled.append(daily[["scaled"]])


100%|██████████| 90/90 [20:00<00:00, 13.34s/it]


In [10]:
daily_final = (
    pd.concat(daily_scaled)
    .sort_index()
    .loc[~pd.concat(daily_scaled).index.duplicated()]
)

daily_final = daily_final.rename(columns={"scaled": "google_search_index"})


In [11]:
daily_final["google_search_index"] = (
    100 * daily_final["google_search_index"]
    / daily_final["google_search_index"].max()
)


In [12]:
daily_final.to_csv("google_trends_israel_palestine_daily_US.csv")


In [13]:
test_start = "2004-09-01"
test_end   = "2004-11-30"

pytrends.build_payload(
    kw_list=[topic_mid],
    geo="US",
    timeframe=f"{test_start} {test_end}"
)

test_daily = pytrends.interest_over_time()
test_daily


Unnamed: 0_level_0,/m/0cj5y,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2004-09-01,0,False
2004-09-02,0,False
2004-09-03,0,False
2004-09-04,0,False
2004-09-05,0,False
...,...,...
2004-11-26,0,False
2004-11-27,0,False
2004-11-28,0,False
2004-11-29,31,False


In [14]:
pytrends.build_payload(
    kw_list=[topic_mid],
    geo="US",
    timeframe="2004-01-01 2006-12-31"
)

test_weekly = pytrends.interest_over_time()
test_weekly.describe()


Unnamed: 0,/m/0cj5y
count,158.0
mean,28.348101
std,14.357549
min,0.0
25%,17.25
50%,27.0
75%,36.0
max,100.0
