### Plotly

In [7]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the updated HorizonAnalytics template
HorizonAnalytics = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#0d1b2a',  # Background color
        plot_bgcolor='#0d1b2a',  # Background color
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines for contrast
            tickfont=dict(
                size=36,  # Consistent with other elements
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match other elements
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines for contrast
            linewidth=2
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines
            tickfont=dict(
                size=36,  # Consistent with x-axis
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match x-axis
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines
            linewidth=2
        ),
        font=dict(
            color='#ffffff',  # White font for all text
            size=36,  # Uniform font size
            family='Montserrat, sans-serif',
            weight="bold"
        ),
        # Refined colorway for better visibility and differentiation
        colorway=["#FFFF00", "#33D7FF", "#A463FF", "#FFD700", 
                  "#ff4081", "#ffc107", "#00c4a0", "#a0aec0"],
        title=dict(
            text='',
            font=dict(
                size=64,  # **Big Boost in Title Size**
                color='#ffffff',
                family='Montserrat, sans-serif',
                weight="bold"
            ),
            x=0.5,  # Center title
            y=0.97  # Push title higher
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=5)  # Increased line width for better visibility
            )
        ]
    )
)

# Register the updated HorizonAnalytics template
pio.templates['HorizonAnalytics'] = HorizonAnalytics
pio.templates.default = 'HorizonAnalytics'

## Search Terms

In [8]:
import pandas as pd

def expand_pokemon_data(input_file="d_pokemon_raw.csv", output_file="d_pokemon.csv", min_year=2000, max_year=2025):
    # Load the Pokémon data
    df = pd.read_csv(input_file)
    
    # Create a list of years
    years = list(range(min_year, max_year + 1))
    
    # Expand the data by duplicating it for each year
    expanded_df = pd.concat([df.assign(year=year) for year in years], ignore_index=True)
    
    # Reorder columns
    expanded_df = expanded_df[['year', 'id', 'name']]
    
    # Save to the output file
    expanded_df.to_csv(output_file, index=False)

    return output_file

# Example usage:
expand_pokemon_data(min_year=2024, max_year=2025)

'd_pokemon.csv'

## Weekly Interest

In [2]:
import pandas as pd
import time
import os
from pytrends.request import TrendReq

def weekly_interest(input_file, output_file):
    """
    Fetches weekly search interest from Google Trends for each 'name' in the input file.
    
    Args:
    - input_file (str): Path to input CSV (must contain columns 'id', 'name', 'year').
    - output_file (str): Path to save the processed weekly interest data.
    """
    # Load processed data if it exists
    if os.path.exists(output_file):
        df_existing = pd.read_csv(output_file)

        # Track completed name-year combinations
        completed_terms = set(zip(df_existing["name"], df_existing["year"].astype(str)))

        print(f"✅ Found existing data: {len(completed_terms)} name-year combinations already processed.")
    else:
        df_existing = pd.DataFrame()
        completed_terms = set()

    # Load input data
    df_data = pd.read_csv(input_file)

    # Initialize Pytrends
    pytrends = TrendReq(hl='en-US', tz=360, retries=3)

    # Prepare storage for results
    all_trends = []

    # Extract entities that still need to be processed
    pending_entities = [
        (row["id"], row["name"], str(row["year"])) for _, row in df_data.iterrows()
        if (row["name"], str(row["year"])) not in completed_terms  # Ensure it checks BOTH name & year
    ]

    max_retries = 3  # Maximum retries per entity

    while pending_entities:
        new_pending = []  # Reset pending terms for the next loop

        for entity_id, name, year in pending_entities:
            attempt = 0
            success = False

            while attempt < max_retries and not success:
                try:
                    # Build payload for a single name
                    pytrends.build_payload([name], timeframe=f"{year}-01-01 {year}-12-31", geo='US')

                    # Get interest over time
                    df_trends = pytrends.interest_over_time()

                    # Check if the response is empty before processing
                    if df_trends.empty:
                        print(f"⚠️ No data found for {name} ({year}) - Skipping.")
                        success = True  # Mark as success so it doesn't keep retrying
                        continue  # Skip to next entity

                    # Remove 'isPartial' column if present
                    if 'isPartial' in df_trends.columns:
                        df_trends = df_trends.drop(columns=['isPartial'])

                    # Convert data to long format (Tidy Data)
                    df_trends = df_trends.reset_index().melt(id_vars=["date"], var_name="name", value_name="interest")

                    # Add metadata columns
                    df_trends["year"] = year
                    df_trends["id"] = entity_id  # Include entity ID

                    # Append results
                    all_trends.append(df_trends)

                    print(f"✅ Extracted: {name} ({year})")
                    success = True  # Mark as success

                    # Merge with existing data
                    final_df = pd.concat(all_trends + [df_existing], ignore_index=True)

                    # ✅ Fix FutureWarning: Explicitly convert types before filling missing values
                    final_df = final_df.astype({
                        "year": "int64",
                        "id": "int64",
                        "name": "string",
                        "interest": "float64"
                    }).infer_objects(copy=False)

                    # ✅ Fix FutureWarning: Replace .fillna(False) with correct type-based handling
                    final_df = final_df.fillna(value={"interest": 0.0}, downcast=None)

                    # Save progress
                    final_df.to_csv(output_file, index=False)

                except Exception as e:
                    attempt += 1
                    print(f"❌ Attempt {attempt}/{max_retries} failed for {name} ({year}): {e}")
                    time.sleep(5 * attempt)  # Exponential backoff (5s, 10s, 15s)

            if not success:  # If all retries failed, add to new_pending list for retrying
                new_pending.append((entity_id, name, year))

        # Update pending_entities for the next loop (only failed ones)
        pending_entities = new_pending

        if pending_entities:
            print(f"🔄 Retrying {len(pending_entities)} failed terms...")

    print(f"✅ All terms successfully processed! Check {output_file}")

# Example usage:
weekly_interest("d_pokemon.csv", "f_pokemon_weekly_interest.csv")

✅ Found existing data: 301 name-year combinations already processed.
⚠️ No data found for Nidoran♂ (2024) - Skipping.
✅ All terms successfully processed! Check f_pokemon_weekly_interest.csv


In [12]:
import pandas as pd

def reorder_weekly_interest(file_path, sort_by=["id", "year"]):
    """
    Reorders a weekly interest dataset by sorting it and ensuring correct column order.

    Args:
    - file_path (str): Path to the input CSV file.
    - sort_by (list): Columns to sort by (default: ["id", "year"]).

    Returns:
    - Saves the reordered dataset back to the same file.
    """
    # Load the data
    df = pd.read_csv(file_path)

    # Define the correct column order
    required_columns = ["year", "date", "id", "name", "interest"]

    # Check if required columns exist
    missing_columns = set(required_columns) - set(df.columns)
    if missing_columns:
        raise ValueError(f"❌ Missing required columns: {missing_columns}")

    # Reorder columns
    df = df[required_columns]

    # Convert 'year' and 'id' to integers for proper sorting
    df["year"] = df["year"].astype(int)
    df["id"] = df["id"].astype(int)

    # Sort by the specified columns
    df = df.sort_values(by=sort_by)

    # Overwrite the file with sorted data
    df.to_csv(file_path, index=False)

    print(f"✅ Data successfully reordered and saved to {file_path}")

# Example usage:
reorder_weekly_interest("f_pokemon_weekly_interest.csv", sort_by=["year", "id"])
# reorder_weekly_interest("some_other_dataset.csv", sort_by=["name", "year"])

✅ Data successfully reordered and saved to f_pokemon_weekly_interest.csv


## State interest

In [13]:
from pytrends.request import TrendReq
import pandas as pd
import time
import os

def state_interest(input_file, output_file):
    """
    Fetches state-level search interest from Google Trends for each name-year in the input file.

    Args:
    - input_file (str): Path to CSV with columns 'name' and 'year'.
    - output_file (str): Path to output CSV with columns 'name', 'year', 'state', 'search_interest'.
    """
    # Load processed data if it exists
    if os.path.exists(output_file):
        df_existing = pd.read_csv(output_file)
        completed_terms = set(zip(df_existing["name"], df_existing["year"].astype(str)))
        print(f"✅ Found existing data: {len(completed_terms)} name-year combinations already processed.")
    else:
        df_existing = pd.DataFrame(columns=["name", "year", "state", "search_interest"])
        completed_terms = set()

    # Load input dataset
    df_terms = pd.read_csv(input_file)

    # Initialize Pytrends
    pytrends = TrendReq(hl='en-US', tz=360, retries=3)

    # Filter out already processed terms
    pending_terms = [
        (row["name"], str(row["year"])) for _, row in df_terms.iterrows()
        if (row["name"], str(row["year"])) not in completed_terms
    ]

    max_retries = 3

    while pending_terms:
        new_pending_terms = []

        for name, year in pending_terms:
            attempt = 0
            success = False

            while attempt < max_retries and not success:
                try:
                    # Build payload for the current name and year
                    pytrends.build_payload([name], timeframe=f"{year}-01-01 {year}-12-31", geo='US')

                    # Extract state-level search interest
                    df_states = pytrends.interest_by_region(resolution='REGION')

                    if df_states.empty:
                        print(f"⚠️ No state-level data found for {name} ({year}) - Skipping.")
                        success = True
                        continue

                    # Prepare dataframe
                    df_states.reset_index(inplace=True)
                    df_states.rename(columns={"geoName": "state"}, inplace=True)
                    df_states = df_states[["state", name]]
                    df_states.rename(columns={name: "search_interest"}, inplace=True)
                    df_states["name"] = name
                    df_states["year"] = year
                    df_states = df_states[["name", "year", "state", "search_interest"]]

                    # Append to output file
                    df_states.to_csv(output_file, mode='a', header=not os.path.exists(output_file), index=False)

                    print(f"✅ Extracted state-level data for: {name} ({year})")
                    success = True

                except Exception as e:
                    attempt += 1
                    print(f"❌ Attempt {attempt}/{max_retries} failed for {name} ({year}): {e}")
                    time.sleep(5 * attempt)

            if not success:
                new_pending_terms.append((name, year))

        pending_terms = new_pending_terms

        if pending_terms:
            print(f"🔄 Retrying {len(pending_terms)} failed terms...")

    print(f"✅ All state-level data successfully processed! Check {output_file}")

# Example usage:
# state_interest("d_pokemon.csv", "d_pokemon_state_interest.csv")

## rbt_pokemon

In [168]:
import pandas as pd
from IPython.display import display, HTML
import us  # For state name-to-code conversion

def create_reporting_table(
    weekly_file,         # e.g., 'f_pokemon_weekly_interest.csv'
    state_file,          # e.g., 'd_pokemon_state_interest.csv'
    colour_file,         # e.g., 'd_pokemon_colour.csv'
    output_file,         # e.g., 'rbt_pokemon.csv'
    join_on="name",      # e.g., 'name' (can be 'search_term' or 'topic')
    show_table=True      # Whether to display output in notebook
):
    """
    Generates a reporting table showing the top item (e.g., Pokémon) for each US state per week, 
    including the Pokémon's color.

    Args:
    - weekly_file (str): Path to CSV with weekly interest data (must have 'date', 'year', 'interest').
    - state_file (str): Path to CSV with state-level interest data (must have 'state', 'year', and the join column).
    - colour_file (str): Path to CSV with Pokémon colors (must have 'name' and 'hex_code').
    - output_file (str): Output path for the resulting reporting table.
    - join_on (str): Column to join on (e.g., 'name' or 'search_term').
    - show_table (bool): Whether to display the result as a scrollable table (for notebooks).
    """

    # Load input files
    df_weekly = pd.read_csv(weekly_file, parse_dates=["date"])
    df_state = pd.read_csv(state_file)
    df_colour = pd.read_csv(colour_file)  # Load color dataset

    # Convert state names to state codes
    def get_state_code(state_name):
        if state_name == "District of Columbia":
            return "DC"
        state = us.states.lookup(state_name)
        return state.abbr if state else None

    df_state["state_code"] = df_state["state"].map(get_state_code)
    df_state = df_state.dropna(subset=["state_code"])

    # Merge on join_on column and year
    merged_df = df_weekly.merge(df_state, on=[join_on, "year"], how="inner")

    # Compute weighted interest
    merged_df["weighted_interest"] = merged_df["interest"] * merged_df["search_interest"]

    # Identify the top item per state and week
    result = merged_df.loc[
        merged_df.groupby(["date", "year", "state_code"])["weighted_interest"].idxmax(),
        ["date", "year", "state_code", join_on]
    ]

    # Rename columns
    result.rename(columns={join_on: "top_item", "state_code": "state"}, inplace=True)

    # Merge with color data
    result = result.merge(df_colour, left_on="top_item", right_on="name", how="left")

    # Drop redundant 'name' column (since 'top_item' is already present)
    result.drop(columns=["name"], inplace=True)

    # Save to CSV
    result.to_csv(output_file, index=False)
    print(f"✅ Reporting table saved to {output_file}")

    # Display in notebook
    if show_table:
        def display_scrollable_dataframe(df):
            html = df.to_html(classes='scrollable', escape=False)
            style = """
            <style>
                .scrollable {
                    display: block;
                    overflow-x: auto;
                    white-space: nowrap;
                    max-height: 400px;
                    overflow-y: auto;
                }
            </style>
            """
            display(HTML(style + html))

        display_scrollable_dataframe(result)

# Example usage:
create_reporting_table(
    weekly_file="f_pokemon_weekly_interest.csv",
    state_file="d_pokemon_state_interest.csv",
    colour_file="d_pokemon_colour.csv",
    output_file="rbt_pokemon.csv",
    join_on="name",
    show_table=False
)

✅ Reporting table saved to rbt_pokemon.csv


## Visualisation

### Pictures

In [14]:
import os
import requests

# Define the target directory
base_dir = "/Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03_pokemon"
save_dir = os.path.join(base_dir, "pokemon_pictures")

# Ensure the folder exists
os.makedirs(save_dir, exist_ok=True)

# Base URL for fetching Pokémon data
pokeapi_url = "https://pokeapi.co/api/v2/pokemon/"

# Download the first 150 Pokémon images
for pokemon_id in range(1, 152):  # Pokémon 1 to 151
    try:
        # Fetch Pokémon data
        response = requests.get(f"{pokeapi_url}{pokemon_id}/")
        response.raise_for_status()
        data = response.json()

        # Get Pokémon name and image URL
        pokemon_name = data["name"].capitalize()  # Capitalize first letter
        image_url = data["sprites"]["other"]["official-artwork"]["front_default"]

        if image_url:
            # Download the image
            img_response = requests.get(image_url)
            img_response.raise_for_status()

            # Save image as {id}.png
            file_path = os.path.join(save_dir, f"{pokemon_id}.png")
            with open(file_path, "wb") as file:
                file.write(img_response.content)

            print(f"Downloaded: {pokemon_name} (ID: {pokemon_id})")

    except Exception as e:
        print(f"Failed to download ID {pokemon_id}: {e}")

print("Download completed!")

Downloaded: Mewtwo (ID: 150)
Downloaded: Mew (ID: 151)
Download completed!


## Frames

In [175]:
offset_states = {
        "TX": (-1.25, 0), "NM": (0.2, -0.9), "OK": (-0.2, -0.6), "NV": (0.4, 0.9),
        "UT": (0.2, -1.2), "CO": (0, -0.5), "KS": (-1.7, -0.5), "MO": (-0.2, -0.3),
        "TN": (0, -0.4), "MS": (0, -0.3), "ML": (0, -0.3), "GA": (0, -0.3),
        "LA": (-0.7, 0), "SC": (0, -0.3), "NC": (0.5, -0.5), "VA": (0, -1.2),
        "KY": (0, -0.5), "OR": (1.5, -1.2), "WA": (1.2, -0.8), "ID": (0, -1), 
        "MT": (0.8, -0.4), "ND": (-0.5, -0.5), "SD": (-0.7, -0.4), "NE": (-1.6, 0),
        "AR": (0, -0.5), "IA": (-0.3, -0.3), "MN": (-0.5, 0), "WI": (-0.5, 0),
        "IN": (0, -0.5), "IL": (-0.3, 0), "WV": (0, -0.5), "OH": (0, -0.5),
        "PA": (-0.7, -0.2), "MI": (0, -0.3), "NY": (-0.5, 0.3), "AK": (0.3, 1.8),
        "HI": (0, -0.5)
    }

In [214]:
import pandas as pd
import plotly.graph_objects as go

# External dictionary: offsets for better label placement
offset_states = {
    "TX": (-1.25, 0), "NM": (0.2, -0.9), "OK": (-0.2, -0.6), "NV": (0.4, 0.9),
    "UT": (0.2, -1.2), "CO": (0, -0.5), "KS": (-1.7, -0.5), "MO": (-0.2, -0.3),
    "TN": (0, -0.4), "MS": (0, -0.3), "ML": (0, -0.3), "GA": (0, -0.3),
    "LA": (-0.7, 0), "SC": (0, -0.3), "NC": (0.5, -0.5), "VA": (0, -0.8),
    "KY": (0, -0.5), "OR": (1.5, -1.2), "WA": (1.2, -0.8), "ID": (0, -1), 
    "MT": (0.8, -0.4), "ND": (-0.5, -0.5), "SD": (-0.7, -0.4), "NE": (-1.6, 0),
    "AR": (0, -0.5), "IA": (-0.3, -0.3), "MN": (-0.5, 0), "WI": (-0.5, 0),
    "IN": (0, -0.5), "IL": (-0.3, 0), "WV": (0, -0.5), "OH": (0, -0.5),
    "PA": (-0.7, -0.2), "MI": (0, -0.3), "NY": (-0.5, 0.3), "AK": (0.3, 1.8),
    "HI": (0, -0.5)
}

def plot_weekly_choropleth(
    reporting_file="rbt_pokemon.csv",
    date_str="2023-12-31",
    width=1000,
    height=600,
    font_size=10,
    font_family='Arial'
):
    """
    Plots a choropleth map of the U.S. showing the top Pokémon per state for a given week,
    with transparent background and white-outlined labels.
    """

    # Load weekly Pokémon data
    df = pd.read_csv(reporting_file, parse_dates=["date"])
    df_week = df[df["date"] == pd.to_datetime(date_str)]

    if df_week.empty:
        print(f"❌ No data found for {date_str}")
        return

    # Load lat/lon
    lat_longs = pd.read_csv("lat_longs.csv")
    df_plot = df_week.merge(lat_longs, on="state", how="left")

    # Exclude small states from label text
    exclude_text_states = {"CT", "RI", "DE", "MD", "DC", "MA", "NJ", "NH", "VT"}
    df_labels = df_plot[~df_plot["state"].isin(exclude_text_states)].copy()

    # Apply manual label offsets
    df_labels["lon"] += df_labels["state"].map(lambda x: offset_states.get(x, (0, 0))[0])
    df_labels["lat"] += df_labels["state"].map(lambda x: offset_states.get(x, (0, 0))[1])

    # Assign numeric values to Pokémon
    unique_pokemon = df_plot["top_item"].unique()
    pokemon_mapping = {pokemon: idx for idx, pokemon in enumerate(unique_pokemon)}
    df_plot["color_value"] = df_plot["top_item"].map(pokemon_mapping)

    # Build colorscale from hex codes
    hex_codes = df_plot.drop_duplicates("top_item")[["top_item", "hex_code"]]
    hex_codes = hex_codes.set_index("top_item").loc[unique_pokemon]
    colorscale = [
        [i / (len(unique_pokemon) - 1), hex_code]
        for i, hex_code in enumerate(hex_codes["hex_code"])
    ]

    # Base choropleth layer
    choropleth = go.Choropleth(
        locations=df_plot["state"],
        locationmode="USA-states",
        z=df_plot["color_value"],
        colorscale=colorscale,
        showscale=False,
        marker_line_color="white"
    )

    # Outline text helper
    def create_text_outline_traces(df, font_size, font_family, offset=0.03):
        outlines = []
        directions = [(-offset, 0), (offset, 0), (0, -offset), (0, offset)]
        for dx, dy in directions:
            outlines.append(go.Scattergeo(
                locationmode="USA-states",
                lon=df["lon"] + dx,
                lat=df["lat"] + dy,
                text=df["top_item"],
                mode="text",
                textposition="top center",
                textfont=dict(size=font_size, color="white", family=font_family),
                showlegend=False,
                hoverinfo="skip"
            ))
        return outlines

    # Main label layer
    main_text = go.Scattergeo(
        locationmode="USA-states",
        lon=df_labels["lon"],
        lat=df_labels["lat"],
        text=df_labels["top_item"],
        mode="text",
        textposition="top center",
        textfont=dict(size=font_size, color="black", family=font_family),
        showlegend=False
    )

    # Combine and render
    fig = go.Figure(data=[choropleth] + create_text_outline_traces(df_labels, font_size, font_family) + [main_text])

    fig.update_layout(
        title=None,
        geo=dict(
            scope="usa",
            showlakes=False,  # Hides the Great Lakes
            lakecolor="rgba(0,0,0,0)",  # Ensures transparency if lakes show
            showcoastlines=False,  # Removes unwanted coastlines
            bgcolor="rgba(0,0,0,0)"  # Ensures the whole map is transparent)
        ),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        width=width,
        height=height,
        margin=dict(l=20, r=20, t=20, b=20)
    )

    fig.show()

In [215]:
plot_weekly_choropleth(
    reporting_file="rbt_pokemon.csv",
    date_str="2024-01-21",
    width=1920,
    height=1080,
    font_size=26,
    font_family="Impact"
)