In [5]:
from pytrends.request import TrendReq
import pandas as pd

pytrends = TrendReq(hl='en-US', tz=360)

pytrends.build_payload(
    kw_list=["NBA"],
    geo="PH",
    timeframe="2018-01-01 2025-01-01"
)

df_nba = pytrends.interest_over_time().reset_index()
df_nba.head()


Unnamed: 0,date,NBA,isPartial
0,2018-01-01,48,False
1,2018-02-01,47,False
2,2018-03-01,49,False
3,2018-04-01,70,False
4,2018-05-01,78,False


In [6]:
df_nba = df_nba.drop(columns=["isPartial"])
df_nba.head()


Unnamed: 0,date,NBA
0,2018-01-01,48
1,2018-02-01,47
2,2018-03-01,49
3,2018-04-01,70
4,2018-05-01,78


In [11]:
import pandas as pd

# Create seasonal bands for NBA playoffs (April–June)
years = range(2018, 2025)

seasonal_bands = pd.DataFrame([
    {"start": f"{y}-04-01", "end": f"{y}-06-30"}
    for y in years
])

seasonal_bands["start"] = pd.to_datetime(seasonal_bands["start"])
seasonal_bands["end"] = pd.to_datetime(seasonal_bands["end"])


In [12]:
import altair as alt

shade = alt.Chart(seasonal_bands).mark_rect(
    fill="#CE1141",   # Bulls red, very subtle
    opacity=0.06
).encode(
    x="start:T",
    x2="end:T"
)


In [15]:
line = alt.Chart(df_nba).mark_line(
    color="black",
    strokeWidth=2
).encode(
    x=alt.X(
        "date:T",
        title="Year"
    ),
    y=alt.Y(
        "NBA:Q",
        title="Relative Search Interest (Seasonally Indexed)"
    ),
    tooltip=[
        alt.Tooltip("date:T", title="Date"),
        alt.Tooltip("NBA:Q", title="Search Interest Index")
    ]
)


In [25]:
footnotes = pd.DataFrame({
    "text": [
        "• Google Trends is a relative index. A value of 100 marks the single month with the highest NBA search interest in the Philippines between 2018 and 2024.",
        "• Lower values mostly reflect the NBA off-season. They do not mean that interest disappears.", 
        "• The same spikes show up every year during the playoffs (marked with a red background), which suggests fan engagement is predictable and tied to the league calendar."
    ],
    "row": [0, 1, 2]
})

notes = alt.Chart(footnotes).mark_text(
    align="left",
    fontSize=11
).encode(
    x=alt.value(0),
    y=alt.Y("row:O", axis=None),
    text="text:N"
).properties(
    width=830,
    height=80   # <-- this is what fixes the squeeze
)


final_chart = alt.vconcat(
    (shade + line).properties(
        width=830,
        height=400,
        title="Google Search Interest for the NBA in the Philippines (2018–2024)"
    ),
    notes,
    spacing=8
)

final_chart



In [26]:
from pytrends.request import TrendReq
import pandas as pd

pytrends = TrendReq(hl='en-US', tz=360)

countries = {
    "Philippines": "PH",
    "United Kingdom": "GB",
    "France": "FR",
    "Germany": "DE",
    "Spain": "ES",
    "Australia": "AU"   # optional control
}

results = []

for country, geo_code in countries.items():
    pytrends.build_payload(
        kw_list=["NBA"],
        geo=geo_code,
        timeframe="2018-01-01 2025-01-01"
    )
    
    df = pytrends.interest_over_time()
    df = df.drop(columns=["isPartial"])
    
    avg_interest = df["NBA"].mean()
    
    results.append({
        "country": country,
        "avg_interest": avg_interest
    })

df_country = pd.DataFrame(results)
df_country


Unnamed: 0,country,avg_interest
0,Philippines,39.247059
1,United Kingdom,44.611765
2,France,52.988235
3,Germany,41.917647
4,Spain,42.858824
5,Australia,42.647059


In [27]:
df_country = df_country.sort_values("avg_interest", ascending=True)
df_country


Unnamed: 0,country,avg_interest
0,Philippines,39.247059
3,Germany,41.917647
5,Australia,42.647059
4,Spain,42.858824
1,United Kingdom,44.611765
2,France,52.988235


In [28]:
import altair as alt

alt.Chart(df_country).mark_point(
    filled=True,
    size=120
).encode(
    x=alt.X(
        "avg_interest:Q",
        title="Average NBA Search Interest (Relative Index)"
    ),
    y=alt.Y(
        "country:N",
        sort=df_country["country"].tolist(),
        title=""
    ),
    color=alt.condition(
        alt.datum.country == "Philippines",
        alt.value("#CE1141"),   # Bulls red
        alt.value("#7f7f7f")
    ),
    tooltip=[
        alt.Tooltip("country:N", title="Country"),
        alt.Tooltip("avg_interest:Q", title="Average Interest", format=".1f")
    ]
).properties(
    width=650,
    height=300,
    title="Average NBA Search Interest by Country (2018–2024)"
)


In [29]:
import pandas as pd

# Load World Bank GDP per capita data
gdp_raw = pd.read_csv(
    "data/gdp_per_capita.csv",
    skiprows=4
)

gdp_raw.head()


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2016,2017,2018,2019,2020,2021,2022,2023,2024,Unnamed: 69
0,Aruba,ABW,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,27441.550214,28440.041688,30082.158423,30645.890602,22759.807175,26749.329609,30975.998912,35718.753119,39498.594129,
1,Africa Eastern and Southern,AFE,GDP per capita (current US$),NY.GDP.PCAP.CD,186.089204,186.909053,197.367547,225.400079,208.962717,226.836135,...,1334.4705,1528.104224,1552.073722,1507.0856,1351.591669,1562.416175,1679.327622,1571.449189,1615.396356,
2,Afghanistan,AFG,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,522.082216,525.469771,491.337221,496.602504,510.787063,356.496214,357.261153,413.757895,,
3,Africa Western and Central,AFW,GDP per capita (current US$),NY.GDP.PCAP.CD,121.936832,127.45104,133.823783,139.00498,148.545883,155.561897,...,1630.039439,1574.230564,1720.140092,2216.385055,2030.861659,2112.794076,2138.473153,1841.855064,1411.337029,
4,Angola,AGO,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,2051.814621,2790.718869,2860.093648,2493.678844,1759.356199,2303.908127,3682.113151,2916.136633,2665.874448,


In [30]:
countries = [
    "Philippines",
    "Japan",
    "Korea, Rep.",
    "Indonesia",
    "Thailand",
    "United Kingdom",
    "France",
    "Germany",
    "Spain",
    "Italy",
    "Australia"
]

gdp = gdp_raw[gdp_raw["Country Name"].isin(countries)].copy()


In [31]:
def latest_gdp(row):
    if not pd.isna(row["2024"]):
        return row["2024"], 2024
    else:
        return row["2023"], 2023

gdp[["gdp_per_capita_usd", "gdp_year"]] = gdp.apply(
    lambda row: pd.Series(latest_gdp(row)),
    axis=1
)


In [32]:
gdp_clean = (
    gdp[["Country Name", "gdp_per_capita_usd", "gdp_year"]]
    .rename(columns={"Country Name": "country"})
    .sort_values("gdp_per_capita_usd", ascending=False)
    .reset_index(drop=True)
)

gdp_clean


Unnamed: 0,country,gdp_per_capita_usd,gdp_year
0,Australia,64603.985631,2024.0
1,Germany,56103.732318,2024.0
2,United Kingdom,53246.367615,2024.0
3,France,46103.084086,2024.0
4,Italy,40385.341396,2024.0
5,"Korea, Rep.",36238.639908,2024.0
6,Spain,35326.768307,2024.0
7,Japan,32487.077805,2024.0
8,Thailand,7346.620221,2024.0
9,Indonesia,4925.430488,2024.0


In [48]:
countries = {
    "Philippines": "PH",
    "Indonesia": "ID",
    "Taiwan": "TW",
    "Hong Kong SAR, China": "HK",
    "Mexico": "MX",
    "Argentina": "AR",
    "Brazil": "BR",
    "Puerto Rico": "PR",
    "Colombia": "CO",
    "Peru": "PE", 
    "China": "CN",
    "Ecuador": "EC",
    "Brazil": "BR",
    "Chile": "CL",
    "Saudi Arabia": "SA",
    "Oman": "OM",
    "Egypt": "EG",
    "Kenya": "KE",
    "Ukraine": "UA",
    "Hungary": "HU",
    "Thailand": "TH",
    "Cambodia": "KH",
    "Laos": "LA",
    "Myanmar": "MM",
    "Nepal": "NP",
    "Tanzania": "TZ",
    "Uganda": "UG",
    "Algeria": "DZ",
    "Kazakhstan": "KZ",
    "Uzbekistan": "UZ",
    "Bolivia": "BO",
    "Paraguay": "PY",
    "Uruguay": "UY",
}


In [49]:
import time
from pytrends.request import TrendReq
import pandas as pd
import numpy as np

results = []

for i, (country, geo) in enumerate(countries.items()):
    try:
        # Recreate session every few requests
        if i % 5 == 0:
            pytrends = TrendReq(hl="en-US", tz=360)

        pytrends.build_payload(
            kw_list=["NBA"],
            timeframe="2018-01-01 2024-12-31",
            geo=geo
        )

        data = pytrends.interest_over_time()

        if data.empty or "NBA" not in data.columns:
            continue

        series = data["NBA"]
        top10_avg = series.nlargest(max(1, int(len(series) * 0.10))).mean()

        results.append({
            "country": country,
            "nba_top10_avg": top10_avg
        })

        # IMPORTANT: sleep between requests
        # time.sleep(15)

    except Exception as e:
        print(f"Skipping {country}: {e}")
        # time.sleep(30)  # back off harder on failure

nba_intensity = pd.DataFrame(results)
nba_intensity.sort_values("nba_top10_avg", ascending=False)


Skipping Philippines: The request failed: Google returned a response with code 429
Skipping Indonesia: The request failed: Google returned a response with code 429
Skipping Taiwan: The request failed: Google returned a response with code 429
Skipping Hong Kong SAR, China: The request failed: Google returned a response with code 429
Skipping Mexico: The request failed: Google returned a response with code 429
Skipping Argentina: The request failed: Google returned a response with code 429
Skipping Brazil: The request failed: Google returned a response with code 429


KeyboardInterrupt: 