### Plotly

In [None]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the updated HorizonAnalytics template
HorizonAnalytics = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#0d1b2a',  # Background color
        plot_bgcolor='#0d1b2a',  # Background color
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines for contrast
            tickfont=dict(
                size=36,  # Consistent with other elements
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match other elements
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines for contrast
            linewidth=2
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines
            tickfont=dict(
                size=36,  # Consistent with x-axis
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match x-axis
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines
            linewidth=2
        ),
        font=dict(
            color='#ffffff',  # White font for all text
            size=36,  # Uniform font size
            family='Montserrat, sans-serif',
            weight="bold"
        ),
        # Refined colorway for better visibility and differentiation
        colorway=["#FFFF00", "#33D7FF", "#A463FF", "#FFD700", 
                  "#ff4081", "#ffc107", "#00c4a0", "#a0aec0"],
        title=dict(
            text='',
            font=dict(
                size=64,  # **Big Boost in Title Size**
                color='#ffffff',
                family='Montserrat, sans-serif',
                weight="bold"
            ),
            x=0.5,  # Center title
            y=0.97  # Push title higher
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=5)  # Increased line width for better visibility
            )
        ]
    )
)

# Register the updated HorizonAnalytics template
pio.templates['HorizonAnalytics'] = HorizonAnalytics
pio.templates.default = 'HorizonAnalytics'

## Search Terms

In [None]:
# d_search_terms

# search_term,year
# Donald Trump,2015
# Bernie Sanders,2015
# Ben Carson,2015

## Weekly Interest

In [None]:
# f_search_interest

# date,search_term,interest,year
# 2014-12-28 00:00:00,Donald Trump,2,2015
# 2015-01-04 00:00:00,Donald Trump,4,2015
# 2015-01-11 00:00:00,Donald Trump,3,2015

## State Score

In [5]:
from pytrends.request import TrendReq
import pandas as pd
import time
import os

# Load processed data if it exists
output_file = "d_state_interest.csv"
failed_file = "d_state_interest_failed.csv"

# Load existing processed data
if os.path.exists(output_file):
    df_existing = pd.read_csv(output_file)
    completed_terms = set(zip(df_existing["search_term"], df_existing["year"].astype(str)))
    print(f"✅ Found existing data: {len(completed_terms)} search term-year combinations already processed.")
else:
    df_existing = pd.DataFrame(columns=["search_term", "year", "state", "search_interest"])
    completed_terms = set()

# Load input dataset
input_file = "d_search_terms.csv"
df_clean_trends = pd.read_csv(input_file)

# Initialize Pytrends
pytrends = TrendReq(hl='en-US', tz=360, retries=3)

# Prepare storage for failed terms
failed_terms = []

# Extract search terms that still need to be processed
pending_terms = [
    (row["search_term"], str(row["year"])) for _, row in df_clean_trends.iterrows()
    if (row["search_term"], str(row["year"])) not in completed_terms
]

max_retries = 3  # Maximum retries per search term

while pending_terms:
    new_failed_terms = []  # Reset failed terms list in each loop

    for search_term, year in pending_terms:
        attempt = 0
        success = False

        while attempt < max_retries and not success:
            try:
                # Build payload for search term only
                pytrends.build_payload([search_term], timeframe=f"{year}-01-01 {year}-12-31", geo='US')

                # Extract Regional Interest
                df_regions = pytrends.interest_by_region(resolution='REGION')

                # Check if the response is empty before processing
                if df_regions.empty:
                    print(f"⚠️ No regional data found for {search_term} ({year}) - Skipping.")
                    failed_terms.append({"search_term": search_term, "year": year, "reason": "No Regional Data"})
                    success = True  # Mark as success so it doesn't keep retrying
                    continue  # Skip to next search term

                # Reset index to get state names
                df_regions.reset_index(inplace=True)
                df_regions.rename(columns={"geoName": "state"}, inplace=True)

                # Keep only relevant columns
                df_regions = df_regions[["state", search_term]]

                # Rename search interest column
                df_regions.rename(columns={search_term: "search_interest"}, inplace=True)

                # Add metadata columns
                df_regions["search_term"] = search_term
                df_regions["year"] = year

                # Reorder columns for clarity
                df_regions = df_regions[["search_term", "year", "state", "search_interest"]]

                # Save results **immediately** to prevent duplication
                df_regions.to_csv(output_file, mode='a', header=not os.path.exists(output_file), index=False)

                print(f"✅ Extracted regional data for: {search_term} ({year})")
                success = True  # Mark as success

            except Exception as e:
                attempt += 1
                print(f"❌ Attempt {attempt}/{max_retries} failed for {search_term} ({year}): {e}")
                time.sleep(5 * attempt)  # Exponential backoff (5s, 10s, 15s)

        if not success:  # If all retries failed, add to failed list
            new_failed_terms.append((search_term, year))

    # Update pending terms for the next loop (only failed ones)
    pending_terms = new_failed_terms
 
    if pending_terms:
        print(f"🔄 Retrying {len(pending_terms)} failed terms...")

# Save failed terms to CSV for review
if failed_terms:
    df_failed = pd.DataFrame(failed_terms)
    df_failed.to_csv(failed_file, index=False)
    print(f"❌ Failed terms saved to {failed_file}")
else:
    print("✅ No failed terms!")

print("✅ All regional data successfully processed! Check d_state_interest.csv")

✅ Found existing data: 1609 search term-year combinations already processed.
❌ Attempt 1/3 failed for Call of Duty: Black Ops 4 (2018): HTTPSConnectionPool(host='trends.google.com', port=443): Max retries exceeded with url: /trends/api/explore?hl=en-US&tz=360&req=%7B%22comparisonItem%22%3A+%5B%7B%22keyword%22%3A+%22Call+of+Duty%3A+Black+Ops+4%22%2C+%22time%22%3A+%222018-01-01+2018-12-31%22%2C+%22geo%22%3A+%22US%22%7D%5D%2C+%22category%22%3A+0%2C+%22property%22%3A+%22%22%7D (Caused by ResponseError('too many 429 error responses'))


KeyboardInterrupt: 

## Processing

## Visualisation

## Frames