# 🎶 UK City Bands – Google Trends Analysis (2019–2024)
This notebook explores search interest in key UK bands by city of origin, using Google Trends data from the past 5 years. Each band's popularity is normalized using **The Beatles** as a reference anchor.

Cities included: London, Birmingham, Manchester, Glasgow, Liverpool, Leeds, Sheffield, Bradford, Bristol, Nottingham.

In [1]:
from pytrends.request import TrendReq
import pandas as pd
import time
import matplotlib.pyplot as plt

In [2]:
# 🔁 Extract band list from city dictionary
bands_by_city = {
    "London": ["The Rolling Stones", "The Clash", "Blur", "Coldplay", "Florence and the Machine"],
    "Birmingham": ["Black Sabbath", "ELO", "Duran Duran", "UB40", "Editors"],
    "Manchester": ["Oasis", "The Smiths", "New Order", "The Stone Roses", "The 1975"],
    "Glasgow": ["Simple Minds", "Primal Scream", "Belle and Sebastian", "Franz Ferdinand", "CHVRCHES"],
    "Liverpool": ["The Beatles", "Echo & the Bunnymen", "Frankie Goes to Hollywood", "The La's", "The Wombats"],
    "Leeds": ["Kaiser Chiefs", "Soft Cell", "Alt-J", "Sisters of Mercy", "The Wedding Present"],
    "Sheffield": ["Def Leppard", "The Human League", "Pulp", "Arctic Monkeys", "Bring Me The Horizon"],
    "Bradford": ["Smokie", "Tasmin Archer", "Kiki Dee", "Ivyrise", "The Cult"],
    "Bristol": ["Massive Attack", "Portishead", "Tricky", "Idles", "Kosheen"],
    "Nottingham": ["Jake Bugg", "Dog Is Dead", "London Grammar", "Ten Years After", "Sleaford Mods"]
}
all_bands = sorted(set(b for bands in bands_by_city.values() for b in bands))

## 🎯 Using Google Trends Topics Instead of Search Terms
To improve the accuracy of our comparisons, we now fetch each band using its **Google Trends Topic ID (mid)**. This avoids ambiguity with band names that may have other meanings (like *Pulp* or *Editors*).

We'll use `pytrends.suggestions()` to look up the canonical topic for each band before fetching trend data.

In [None]:
# ✅ Skip regeneration if CSV already exists
import os
if os.path.exists("google_trends_band_interest.csv"):
    print("📁 Using existing google_trends_band_interest.csv")
    normalized_df = pd.read_csv("google_trends_band_interest.csv", index_col=0)
else:
    from pytrends.request import TrendReq
    import pandas as pd
    import time
    import warnings
    warnings.filterwarnings('ignore')

    pytrends = TrendReq(hl='en-GB', tz=0)
    pytrends = TrendReq(hl='en-US', tz=0)
    anchor = "The Beatles"
    batch_size = 4

    # 🔍 Lookup band topics via pytrends.suggestions
    band_topics = {}
    for band in all_bands:
        suggestions = pytrends.suggestions(band)
        match = next((s for s in suggestions if s['type'].lower() == 'band'), None)
        time.sleep(0.5)
        if match:
            print(f"Topic found for {band}: {match['mid']}")
            band_topics[band] = match['mid']
        else:
            print(f"⚠️ No topic found for {band}")

    # 🧠 Add anchor topic
    anchor_topic = band_topics.get(anchor)
    if not anchor_topic:
        raise ValueError("Anchor band topic not found")

    batched_data = []
    for i in range(0, len(all_bands), batch_size):
        batch = all_bands[i:i + batch_size]
        mids = [band_topics.get(band) for band in batch if band_topics.get(band)]
        if anchor_topic not in mids:
            mids = [anchor_topic] + mids
        pytrends.build_payload(mids, timeframe="today 5-y", geo="GB")
        df = pytrends.interest_over_time()
        if not df.empty:
            df = df.drop(columns=["isPartial"])
            df.columns.name = None
            batched_data.append(df)
        time.sleep(1)

    # 🧬 Normalize using anchor
    merged_df = pd.concat(batched_data, axis=1)
    merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]
    normalized_df = merged_df.div(merged_df[anchor].max()).multiply(100)
    normalized_df.to_csv("google_trends_band_interest.csv")

⚠️ No topic found for Alt-J
⚠️ No topic found for Arctic Monkeys
⚠️ No topic found for Belle and Sebastian
⚠️ No topic found for Black Sabbath


KeyboardInterrupt: 

In [None]:
# 🧬 Normalize and save
merged_df = pd.concat(batched_data, axis=1)
merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]
anchor_max = merged_df[anchor].max()
normalized_df = merged_df.div(merged_df[anchor], axis=0).multiply(anchor_max)
normalized_df.to_csv("google_trends_band_interest.csv")

In [None]:
# 📊 Top 20 bands
top_20 = normalized_df.mean().sort_values(ascending=False).head(20)
plt.figure(figsize=(10, 6))
plt.barh(top_20.index[::-1], top_20.values[::-1], color="gray", edgecolor="none")
ax = plt.gca()
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False)
plt.xlabel("Avg. Google Search Interest (Normalized)")
plt.title("Top 20 UK Bands by Google Search (5-Year Avg, UK)")
plt.tight_layout()
plt.show()

## 🌍 Global Interest Chart
In addition to UK-only interest, this chart shows **global average search interest** over the past 5 years.

In [None]:
# ✅ Skip global trends regeneration if CSV already exists
if os.path.exists("google_trends_global_band_interest.csv"):
    print("📁 Using existing google_trends_global_band_interest.csv")
    normalized_global = pd.read_csv("google_trends_global_band_interest.csv", index_col=0)
else:
    global_batched_data = []
    for i in range(0, len(all_bands), batch_size):
        batch = all_bands[i:i + batch_size]
        if anchor not in batch:
            batch = [anchor] + batch
        else:
            batch = batch[:5]

        pytrends.build_payload(batch, timeframe="today 5-y", geo="")  # No geo = worldwide
        df = pytrends.interest_over_time()
        if not df.empty:
            df = df.drop(columns="isPartial")
            global_batched_data.append(df)
        time.sleep(1)

    global_df = pd.concat(global_batched_data, axis=1)
    global_df = global_df.loc[:, ~global_df.columns.duplicated()]
    anchor_max = global_df[anchor].max()
    normalized_global = global_df.div(global_df[anchor], axis=0).multiply(anchor_max)
    normalized_global.to_csv("google_trends_global_band_interest.csv")

In [None]:
# Plot top 20 globally
top_20_global = normalized_global.mean().sort_values(ascending=False).head(20)
plt.figure(figsize=(10, 6))
plt.barh(top_20_global.index[::-1], top_20_global.values[::-1], color="gray", edgecolor="none")
ax = plt.gca()
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False)
plt.xlabel("Avg. Global Google Search Interest (Normalized)")
plt.title("Top 20 UK Bands by Global Search (5-Year Avg)")
plt.tight_layout()
plt.show()

## 🏙️ Cultural Impact by City (Google Search Interest)
We've been looking at bands individually, but the original question was: **which UK cities have the biggest cultural impact through music?**

To answer that, we now aggregate normalized search interest by city, using the band–city mapping. This gives us a total cultural 'footprint' based on the popularity of each city's artists across the UK.

In [None]:
# Aggregate city-level cultural impact
band_city_lookup = {band: city for city, bands in bands_by_city.items() for band in bands}
band_averages = normalized_df.mean()
city_scores = {}
for band, score in band_averages.items():
    city = band_city_lookup.get(band)
    if city:
        city_scores[city] = city_scores.get(city, 0) + score

# Convert to DataFrame
city_df = pd.DataFrame.from_dict(city_scores, orient='index', columns=['TotalSearchImpact'])
city_df = city_df.sort_values('TotalSearchImpact', ascending=True)

In [None]:
# Plot city impact
plt.figure(figsize=(10, 6))
plt.barh(city_df.index, city_df['TotalSearchImpact'], color="gray", edgecolor="none")
ax = plt.gca()
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False)
plt.xlabel("Aggregate Google Search Interest (Normalized)")
plt.title("Cultural Impact by City (Based on Band Popularity)")
plt.tight_layout()
plt.show()

## 🧠 Better Metrics for City Cultural Impact
Because our search interest scores are normalized (not absolute), simple aggregation can misrepresent cultural impact.

Instead, we use two smarter metrics:
1. **Average Search Interest per City** – how strong a city's artists are on average
2. **Top Band per City** – the single most searched band for each city

In [None]:
# 🎯 Metric 1: Average search interest per city
city_scores_avg = {}
for band, score in band_averages.items():
    city = band_city_lookup.get(band)
    if city:
        city_scores_avg.setdefault(city, []).append(score)

city_avg_df = pd.DataFrame({
    city: sum(scores) / len(scores)
    for city, scores in city_scores_avg.items()
}, index=["AvgSearchImpact"]).T.sort_values("AvgSearchImpact", ascending=True)

In [None]:
# 📊 Plot average per city
plt.figure(figsize=(10, 6))
plt.barh(city_avg_df.index, city_avg_df['AvgSearchImpact'], color="gray", edgecolor="none")
ax = plt.gca()
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False)
plt.xlabel("Avg. Google Search Interest (Normalized)")
plt.title("Average Cultural Impact per City (Google Search)")
plt.tight_layout()
plt.show()

In [None]:
# 🎖️ Metric 2: Top band per city
top_band_per_city = {}
for band, city in band_city_lookup.items():
    score = band_averages.get(band)
    if score is not None:
        current_top = top_band_per_city.get(city, (None, -1))
        if score > current_top[1]:
            top_band_per_city[city] = (band, score)

top_band_df = pd.DataFrame(
    [(city, band, score) for city, (band, score) in top_band_per_city.items()],
    columns=["City", "Band", "SearchImpact"]
).sort_values("SearchImpact", ascending=True)

In [None]:
# 📊 Plot top band per city
plt.figure(figsize=(10, 6))
bars = plt.barh(top_band_df['City'], top_band_df['SearchImpact'], color="gray", edgecolor="none")
for bar, band in zip(bars, top_band_df['Band']):
    width = bar.get_width()
    plt.text(width + 0.3, bar.get_y() + bar.get_height()/2, f"{band} ({width:.1f})", va='center')
ax = plt.gca()
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False)
plt.xlabel("Search Interest (Normalized)")
plt.title("Top Band per City by Google Search Impact")
plt.tight_layout()
plt.show()

## 📈 City Cultural Impact vs Population
Does city size correlate with cultural influence through music?

We compare each city's **average search interest** (from earlier) with its **population** using a scatterplot.

> 🔍 Note: Normalized search interest means each band's scores are scaled relative to The Beatles' max UK interest. Still, since we're averaging across time, **bands with consistent long-term interest** may end up ranked higher than bands with one very high but brief spike — like The Beatles.

In [None]:
# 📊 Scatterplot: Population vs Cultural Impact
city_pop = {
    "London": 9.65,
    "Birmingham": 2.64,
    "Manchester": 2.87,
    "Glasgow": 1.65,
    "Liverpool": 1.53,
    "Leeds": 1.88,
    "Sheffield": 1.57,
    "Bradford": 1.40,
    "Bristol": 0.72,
    "Nottingham": 1.00
}

# Join population and cultural impact
pop_series = pd.Series(city_pop)
scatter_df = city_avg_df.join(pop_series.rename("Population"), how="inner")

# Plot
plt.figure(figsize=(8, 6))
plt.scatter(scatter_df["Population"], scatter_df["AvgSearchImpact"], color="gray", edgecolors="none")

# Add city labels
for city, row in scatter_df.iterrows():
    plt.text(row["Population"] + 0.1, row["AvgSearchImpact"], city, fontsize=9)

# Style
ax = plt.gca()
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(left=False, bottom=False)
plt.xlabel("City Population (millions)")
plt.ylabel("Average Google Search Interest (Normalized)")
plt.title("Population vs Cultural Impact by City")
plt.tight_layout()
plt.show()