### Plotly

In [3]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the updated HorizonAnalytics template
HorizonAnalytics = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#0d1b2a',  # Background color
        plot_bgcolor='#0d1b2a',  # Background color
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines for contrast
            tickfont=dict(
                size=36,  # Consistent with other elements
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match other elements
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines for contrast
            linewidth=2
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines
            tickfont=dict(
                size=36,  # Consistent with x-axis
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match x-axis
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines
            linewidth=2
        ),
        font=dict(
            color='#ffffff',  # White font for all text
            size=36,  # Uniform font size
            family='Montserrat, sans-serif',
            weight="bold"
        ),
        # Refined colorway for better visibility and differentiation
        colorway=["#FFFF00", "#33D7FF", "#A463FF", "#FFD700", 
                  "#ff4081", "#ffc107", "#00c4a0", "#a0aec0"],
        title=dict(
            text='',
            font=dict(
                size=64,  # **Big Boost in Title Size**
                color='#ffffff',
                family='Montserrat, sans-serif',
                weight="bold"
            ),
            x=0.5,  # Center title
            y=0.97  # Push title higher
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=5)  # Increased line width for better visibility
            )
        ]
    )
)

# Register the updated HorizonAnalytics template
pio.templates['HorizonAnalytics'] = HorizonAnalytics
pio.templates.default = 'HorizonAnalytics'

## Search Terms

In [4]:
import pandas as pd

def expand_pokemon_data(input_file="d_pokemon_raw.csv", output_file="d_pokemon.csv", min_year=2000, max_year=2025):
    # Load the Pokémon data
    df = pd.read_csv(input_file)
    
    # Create a list of years
    years = list(range(min_year, max_year + 1))
    
    # Expand the data by duplicating it for each year
    expanded_df = pd.concat([df.assign(year=year) for year in years], ignore_index=True)
    
    # Reorder columns
    expanded_df = expanded_df[['year', 'id', 'name']]
    
    # Save to the output file
    expanded_df.to_csv(output_file, index=False)

    return output_file

# Example usage:
expand_pokemon_data(min_year=2024, max_year=2025)

'd_pokemon.csv'

## Weekly Interest

In [6]:
from pytrends.request import TrendReq
import pandas as pd
import time
import os

def weekly_interest(input_file, output_file):
    """
    Fetches weekly search interest from Google Trends for each 'name' in the input file.
    
    Args:
    - input_file (str): Path to input CSV (must contain columns 'id', 'name', 'year').
    - output_file (str): Path to save the processed weekly interest data.
    """
    # Load processed data if it exists
    if os.path.exists(output_file):
        df_existing = pd.read_csv(output_file)

        # Track completed name-year combinations
        completed_terms = set(zip(df_existing["name"], df_existing["year"].astype(str)))

        print(f"✅ Found existing data: {len(completed_terms)} name-year combinations already processed.")
    else:
        df_existing = pd.DataFrame()
        completed_terms = set()

    # Load input data
    df_data = pd.read_csv(input_file)

    # Ask user whether to start from the top or bottom
    # start_from = input("📌 Enter 'top' to start from the beginning or 'bottom' to start from the end: ").strip().lower()

    # if start_from == "bottom":
        # df_data = df_data[::-1]  # Reverse the order

    # Initialize Pytrends
    pytrends = TrendReq(hl='en-US', tz=360, retries=3)

    # Prepare storage for results
    all_trends = []

    # Extract entities that still need to be processed
    pending_entities = [
        (row["id"], row["name"], str(row["year"])) for _, row in df_data.iterrows()
        if (row["name"], str(row["year"])) not in completed_terms  # Ensure it checks BOTH name & year
    ]

    max_retries = 3  # Maximum retries per entity

    while pending_entities:
        new_pending = []  # Reset pending terms for the next loop

        for entity_id, name, year in pending_entities:
            attempt = 0
            success = False

            while attempt < max_retries and not success:
                try:
                    # Build payload for a single name
                    pytrends.build_payload([name], timeframe=f"{year}-01-01 {year}-12-31", geo='US')

                    # Get interest over time
                    df_trends = pytrends.interest_over_time()

                    # Check if the response is empty before processing
                    if df_trends.empty:
                        print(f"⚠️ No data found for {name} ({year}) - Skipping.")
                        success = True  # Mark as success so it doesn't keep retrying
                        continue  # Skip to next entity

                    # Remove 'isPartial' column if present
                    if 'isPartial' in df_trends.columns:
                        df_trends = df_trends.drop(columns=['isPartial'])

                    # Convert data to long format (Tidy Data)
                    df_trends = df_trends.reset_index().melt(id_vars=["date"], var_name="name", value_name="interest")

                    # Add metadata columns
                    df_trends["year"] = year
                    df_trends["id"] = entity_id  # Include entity ID

                    # Append results
                    all_trends.append(df_trends)

                    print(f"✅ Extracted: {name} ({year})")
                    success = True  # Mark as success

                    # Save progress after every entity
                    pd.concat(all_trends + [df_existing], ignore_index=True).to_csv(output_file, index=False)

                except Exception as e:
                    attempt += 1
                    print(f"❌ Attempt {attempt}/{max_retries} failed for {name} ({year}): {e}")
                    time.sleep(5 * attempt)  # Exponential backoff (5s, 10s, 15s)

            if not success:  # If all retries failed, add to new_pending list for retrying
                new_pending.append((entity_id, name, year))

        # Update pending_entities for the next loop (only failed ones)
        pending_entities = new_pending

        if pending_entities:
            print(f"🔄 Retrying {len(pending_entities)} failed terms...")

    print(f"✅ All terms successfully processed! Check {output_file}")

# Example usage:
weekly_interest("d_pokemon.csv", "f_pokemon_weekly_interest.csv")

✅ Found existing data: 11 name-year combinations already processed.
❌ Attempt 1/3 failed for Butterfree (2024): HTTPSConnectionPool(host='trends.google.com', port=443): Max retries exceeded with url: /trends/api/widgetdata/multiline?req=%7B%22time%22%3A+%222024-01-01+2024-12-31%22%2C+%22resolution%22%3A+%22WEEK%22%2C+%22locale%22%3A+%22en-US%22%2C+%22comparisonItem%22%3A+%5B%7B%22geo%22%3A+%7B%22country%22%3A+%22US%22%7D%2C+%22complexKeywordsRestriction%22%3A+%7B%22keyword%22%3A+%5B%7B%22type%22%3A+%22BROAD%22%2C+%22value%22%3A+%22Butterfree%22%7D%5D%7D%7D%5D%2C+%22requestOptions%22%3A+%7B%22property%22%3A+%22%22%2C+%22backend%22%3A+%22IZG%22%2C+%22category%22%3A+0%7D%2C+%22userConfig%22%3A+%7B%22userType%22%3A+%22USER_TYPE_SCRAPER%22%7D%7D&token=APP6_UEAAAAAZ9w53d7psxAqRtHbTiPvlzaAPC6_FzN8&tz=360 (Caused by ResponseError('too many 429 error responses'))


KeyboardInterrupt: 

In [4]:
import pandas as pd

def reorder_weekly_interest(file_path, sort_by=["id", "year"]):
    """
    Reorders a weekly interest dataset by sorting it and ensuring correct column order.

    Args:
    - file_path (str): Path to the input CSV file.
    - sort_by (list): Columns to sort by (default: ["id", "year"]).

    Returns:
    - Saves the reordered dataset back to the same file.
    """
    # Load the data
    df = pd.read_csv(file_path)

    # Define the correct column order
    required_columns = ["year", "date", "id", "name", "interest"]

    # Check if required columns exist
    missing_columns = set(required_columns) - set(df.columns)
    if missing_columns:
        raise ValueError(f"❌ Missing required columns: {missing_columns}")

    # Reorder columns
    df = df[required_columns]

    # Convert 'year' and 'id' to integers for proper sorting
    df["year"] = df["year"].astype(int)
    df["id"] = df["id"].astype(int)

    # Sort by the specified columns
    df = df.sort_values(by=sort_by)

    # Overwrite the file with sorted data
    df.to_csv(file_path, index=False)

    print(f"✅ Data successfully reordered and saved to {file_path}")

# Example usage:
reorder_weekly_interest("f_pokemon_weekly_interest.csv")
# reorder_weekly_interest("some_other_dataset.csv", sort_by=["name", "year"])

✅ Data successfully reordered and saved to f_pokemon_weekly_interest.csv


## State Score (Above this needs testing, below this is old code)

In [3]:
from pytrends.request import TrendReq
import pandas as pd
import time
import os

# Load processed data if it exists
output_file = "d_state_interest.csv"
failed_file = "d_state_interest_failed.csv"

# Load existing processed data
if os.path.exists(output_file):
    df_existing = pd.read_csv(output_file)
    completed_terms = set(zip(df_existing["search_term"], df_existing["year"].astype(str)))
    print(f"✅ Found existing data: {len(completed_terms)} search term-year combinations already processed.")
else:
    df_existing = pd.DataFrame(columns=["search_term", "year", "state", "search_interest"])
    completed_terms = set()

# Load input dataset
input_file = "d_search_terms.csv"
df_clean_trends = pd.read_csv(input_file)

# Initialize Pytrends
pytrends = TrendReq(hl='en-US', tz=360, retries=3)

# Prepare storage for failed terms
failed_terms = []

# Extract search terms that still need to be processed
pending_terms = [
    (row["search_term"], str(row["year"])) for _, row in df_clean_trends.iterrows()
    if (row["search_term"], str(row["year"])) not in completed_terms
]

max_retries = 3  # Maximum retries per search term

while pending_terms:
    new_failed_terms = []  # Reset failed terms list in each loop

    for search_term, year in pending_terms:
        attempt = 0
        success = False

        while attempt < max_retries and not success:
            try:
                # Build payload for search term only
                pytrends.build_payload([search_term], timeframe=f"{year}-01-01 {year}-12-31", geo='US')

                # Extract Regional Interest
                df_regions = pytrends.interest_by_region(resolution='REGION')

                # Check if the response is empty before processing
                if df_regions.empty:
                    print(f"⚠️ No regional data found for {search_term} ({year}) - Skipping.")
                    failed_terms.append({"search_term": search_term, "year": year, "reason": "No Regional Data"})
                    success = True  # Mark as success so it doesn't keep retrying
                    continue  # Skip to next search term

                # Reset index to get state names
                df_regions.reset_index(inplace=True)
                df_regions.rename(columns={"geoName": "state"}, inplace=True)

                # Keep only relevant columns
                df_regions = df_regions[["state", search_term]]

                # Rename search interest column
                df_regions.rename(columns={search_term: "search_interest"}, inplace=True)

                # Add metadata columns
                df_regions["search_term"] = search_term
                df_regions["year"] = year

                # Reorder columns for clarity
                df_regions = df_regions[["search_term", "year", "state", "search_interest"]]

                # Save results **immediately** to prevent duplication
                df_regions.to_csv(output_file, mode='a', header=not os.path.exists(output_file), index=False)

                print(f"✅ Extracted regional data for: {search_term} ({year})")
                success = True  # Mark as success

            except Exception as e:
                attempt += 1
                print(f"❌ Attempt {attempt}/{max_retries} failed for {search_term} ({year}): {e}")
                time.sleep(5 * attempt)  # Exponential backoff (5s, 10s, 15s)

        if not success:  # If all retries failed, add to failed list
            new_failed_terms.append((search_term, year))

    # Update pending terms for the next loop (only failed ones)
    pending_terms = new_failed_terms
 
    if pending_terms:
        print(f"🔄 Retrying {len(pending_terms)} failed terms...")

# Save failed terms to CSV for review
if failed_terms:
    df_failed = pd.DataFrame(failed_terms)
    df_failed.to_csv(failed_file, index=False)
    print(f"❌ Failed terms saved to {failed_file}")
else:
    print("✅ No failed terms!")

print("✅ All regional data successfully processed! Check d_state_interest.csv")

✅ Found existing data: 1752 search term-year combinations already processed.
✅ Extracted regional data for: Allison Mack (2018)
✅ Extracted regional data for: Fashion Nova (2018)
✅ Extracted regional data for: Louis Vuitton (2018)
✅ Extracted regional data for: Versace (2018)
✅ Extracted regional data for: Givenchy (2018)
✅ Extracted regional data for: Gucci (2018)
✅ Extracted regional data for: Alexander McQueen (2018)
✅ Extracted regional data for: Dolce & Gabbana (2018)
✅ Extracted regional data for: Fashionphile (2018)
✅ Extracted regional data for: Dior (2018)
✅ Extracted regional data for: Moschino (2018)
✅ Extracted regional data for: Stacey Abrams (2018)
✅ Extracted regional data for: Beto O'Rourke (2018)
✅ Extracted regional data for: Andrew Gillum (2018)
✅ Extracted regional data for: Alexandria Ocasio-Cortez (2018)
✅ Extracted regional data for: Nikki Haley (2018)
✅ Extracted regional data for: Lindsey Graham (2018)
✅ Extracted regional data for: Kyrsten Sinema (2018)
✅ Extr

## Processing

In [6]:
https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number#Generation_I

SyntaxError: invalid syntax (3799621341.py, line 1)

Filter things like: "Chicken Repipes"

## Visualisation

### Pictures

In [14]:
import os
import requests

# Define the target directory
base_dir = "/Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03_pokemon"
save_dir = os.path.join(base_dir, "pokemon_pictures")

# Ensure the folder exists
os.makedirs(save_dir, exist_ok=True)

# Base URL for fetching Pokémon data
pokeapi_url = "https://pokeapi.co/api/v2/pokemon/"

# Download the first 150 Pokémon images
for pokemon_id in range(1, 152):  # Pokémon 1 to 151
    try:
        # Fetch Pokémon data
        response = requests.get(f"{pokeapi_url}{pokemon_id}/")
        response.raise_for_status()
        data = response.json()

        # Get Pokémon name and image URL
        pokemon_name = data["name"].capitalize()  # Capitalize first letter
        image_url = data["sprites"]["other"]["official-artwork"]["front_default"]

        if image_url:
            # Download the image
            img_response = requests.get(image_url)
            img_response.raise_for_status()

            # Save image as {id}.png
            file_path = os.path.join(save_dir, f"{pokemon_id}.png")
            with open(file_path, "wb") as file:
                file.write(img_response.content)

            print(f"Downloaded: {pokemon_name} (ID: {pokemon_id})")

    except Exception as e:
        print(f"Failed to download ID {pokemon_id}: {e}")

print("Download completed!")

Downloaded: Mewtwo (ID: 150)
Downloaded: Mew (ID: 151)
Download completed!


## Frames