### Bets vs Underdog Wins - WTA

In [4]:
@app.callback(
    Output("underdog-plot", "figure"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value")
)
def update_plot(year, surface, player_input):
    if not year or not surface:
        return px.scatter(title="Please select both year and surface.")

    df = df_all.copy()
    df = df[df["Date"].dt.year == int(year)]
    df = df[df["Surface"].str.lower() == surface.lower()]

    df = df[
        ((df["WRank"] <= 10) & (df["LRank"] > 10) & (df["LRank"] <= 100)) |
        ((df["LRank"] <= 10) & (df["WRank"] > 10) & (df["WRank"] <= 100))
    ].copy()

    df = df.dropna(subset=["B365W", "B365L"])
    df["Underdog"], df["UnderdogOdds"], df["UnderdogRank"] = zip(*df.apply(
        lambda row: (row["Winner"], row["B365W"], row["WRank"]) if row["WRank"] > row["LRank"]
        else (row["Loser"], row["B365L"], row["LRank"]),
        axis=1
    ))
    df["UnderdogWon"] = df["Winner"] == df["Underdog"]
    df["Return"] = df["UnderdogOdds"].where(df["UnderdogWon"], 0)
    df["Label"] = df["Underdog"].apply(lambda x: f"Underdog: {x}")

    if player_input:
        df = df[(df["Loser"] == player_input) & (df["UnderdogWon"] == True)]

    if df.empty:
        return px.scatter(title="No matches found.")

    fig = px.scatter(
        df,
        x="UnderdogRank",  # Now using the rank instead of odds
        y="Return",
        color="UnderdogWon",
        hover_data=["Winner", "Loser", "UnderdogRank", "Return"],
        text="Label",
        title=f"Underdog Wins vs Top 10 on {surface.capitalize()} ({year})"
    )

    fig.update_traces(
        mode="markers+text",
        textposition="top center",
        textfont=dict(size=11, color="black"),
        marker=dict(size=10, opacity=0.9)
    )

    fig.update_layout(
        xaxis_title="Underdog Rank",
        yaxis_title="Return on $1 Bet",
        legend_title="UnderdogWon",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        images=[{
            "source": surface_backgrounds.get(surface.lower()),
            "xref": "paper",
            "yref": "paper",
            "x": 0,
            "y": 1,
            "sizex": 1,
            "sizey": 1,
            "xanchor": "left",
            "yanchor": "top",
            "layer": "below",
            "sizing": "stretch",
            "opacity": 0.5
        }]
    )

    return fig


### Bets vs Underdog Wins - ATP

In [2]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
import os
import base64

# Path to your data directory
DATA_DIR = "/Users/ellendagher/Desktop/DataViz/SmashData/csv_data/atp_mens_tour"

# Load all CSV files
all_files = [os.path.join(DATA_DIR, f) for f in os.listdir(DATA_DIR) if f.endswith('.csv')]
df_all = pd.concat([pd.read_csv(f, low_memory=False) for f in all_files], ignore_index=True)

# Preprocess
df_all["Date"] = pd.to_datetime(df_all["Date"], errors="coerce")
df_all = df_all.dropna(subset=["B365W", "B365L", "WRank", "LRank"])

# Load and encode background images
surface_images = {
    "Hard": "/Users/ellendagher/Desktop/DataViz/SmashData/hard.jpeg",
    "Clay": "/Users/ellendagher/Desktop/DataViz/SmashData/clay.jpg",
    "Grass": "/Users/ellendagher/Desktop/DataViz/SmashData/grass.jpg"
}
surface_backgrounds = {}
for surface, path in surface_images.items():
    with open(path, "rb") as f:
        encoded_image = base64.b64encode(f.read()).decode()
        surface_backgrounds[surface.lower()] = f"data:image/jpeg;base64,{encoded_image}"

# Initialize Dash app
app = Dash(__name__)
app.title = "Underdog Wins vs Top 10"

# Layout
app.layout = html.Div([
    html.Label("Select Year:"),
    dcc.Input(id="year-input", type="number", placeholder="e.g. 2018", debounce=True),
    html.Br(),

    html.Label("Select Surface:"),
    dcc.Input(id="surface-dropdown", type="text", placeholder="Hard/Clay/Grass", debounce=True),
    html.Br(),

    html.Label("Select Top 10 Player:"),
    dcc.Dropdown(id="player-dropdown", placeholder="Select a player", multi=False),
    html.Br(),

    dcc.Graph(id="underdog-plot")
])

# Populate player dropdown based on selected year
@app.callback(
    Output("player-dropdown", "options"),
    Input("year-input", "value")
)
def update_player_dropdown(year):
    if not year:
        return []
    year_df = df_all[df_all["Date"].dt.year == int(year)]
    top_players = pd.concat([
        year_df[year_df["WRank"] <= 10][["Winner", "WRank"]],
        year_df[year_df["LRank"] <= 10][["Loser", "LRank"]].rename(columns={"Loser": "Winner", "LRank": "WRank"})
    ])
    top_players = top_players.sort_values("WRank").drop_duplicates("Winner").head(10)
    return [{"label": name, "value": name} for name in top_players["Winner"].unique()]

# Plot logic
@app.callback(
    Output("underdog-plot", "figure"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value")
)
def update_plot(year, surface, player_input):
    if not year or not surface:
        return px.scatter(title="Please select both year and surface.")

    df = df_all.copy()
    df = df[df["Date"].dt.year == int(year)]
    df = df[df["Surface"].str.lower() == surface.lower()]

    # Only top 10 vs 11–100 matches
    df = df[
        ((df["WRank"] <= 10) & (df["LRank"] > 10) & (df["LRank"] <= 100)) |
        ((df["LRank"] <= 10) & (df["WRank"] > 10) & (df["WRank"] <= 100))
    ].copy()

    df = df.dropna(subset=["B365W", "B365L"])
    df["Underdog"], df["UnderdogOdds"] = zip(*df.apply(
        lambda row: (row["Winner"], row["B365W"]) if row["WRank"] > row["LRank"] else (row["Loser"], row["B365L"]),
        axis=1
    ))
    df["UnderdogWon"] = df["Winner"] == df["Underdog"]
    df["Return"] = df["UnderdogOdds"].where(df["UnderdogWon"], 0)
    df["Label"] = df["Underdog"].apply(lambda x: f"Underdog: {x}")

    if player_input:
        df = df[(df["Loser"] == player_input) & (df["UnderdogWon"] == True)]

    if df.empty:
        return px.scatter(title="No matches found.")

    fig = px.scatter(
        df,
        x="UnderdogOdds",
        y="Return",
        color="UnderdogWon",
        hover_data=["Winner", "Loser", "UnderdogOdds", "Return"],
        text="Label",
        title=f"Underdog Wins vs Top 10 on {surface.capitalize()} ({year})"
    )

    fig.update_traces(
        mode="markers+text",
        textposition="top center",
        textfont=dict(size=11, color="black"),
        marker=dict(size=10, opacity=0.9)
    )

    # Add background texture
    fig.update_layout(
        xaxis_title="Underdog Odds",
        yaxis_title="Return on $1 Bet",
        legend_title="UnderdogWon",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        images=[{
            "source": surface_backgrounds.get(surface.lower()),
            "xref": "paper",
            "yref": "paper",
            "x": 0,
            "y": 1,
            "sizex": 1,
            "sizey": 1,
            "xanchor": "left",
            "yanchor": "top",
            "layer": "below",
            "sizing": "stretch",
            "opacity": 0.5  # adjust for transparency here
        }]
    )

    return fig

# Run server
if __name__ == "__main__":
    app.run(debug=True)


In [45]:
import os, re, base64, pandas as pd, plotly.express as px

# ─────────────────────  Dash / JupyterDash  ──────────────────────
try:
    from jupyter_dash import JupyterDash as Dash
except ImportError:
    from dash import Dash
from dash import dcc, html, Input, Output

# ─────────────────────  Load match data  ────────────────────────
DATA_DIR = "/Users/ellendagher/Desktop/DataViz/SmashData/csv_data/wta_womens_tour"
df_all = pd.concat(
    [pd.read_csv(os.path.join(DATA_DIR, f), low_memory=False)
     for f in os.listdir(DATA_DIR) if f.endswith(".csv")],
    ignore_index=True,
)
df_all["Date"] = pd.to_datetime(df_all["Date"], errors="coerce")
for col in ["B365W", "B365L"]:
    df_all[col] = pd.to_numeric(df_all[col], errors="coerce")
df_all = df_all.dropna(subset=["B365W", "B365L", "WRank", "LRank"])

# ─────────────────────  Event-tier mapper  ──────────────────────
def parse_event_tier(row):
    """
    Return (TierSize 1-6, TierCat str)

      6  Grand Slam
      5  WTA Finals / Tour Championships
      4  WTA 1000  (Premier Mandatory / Premier-5 / Tier I)
      3  WTA 500   (Premier / Tier II)
      2  WTA 250   (International / Tier III   +  ANY  WTA###  where ### < 500)
      1  125 / ITF (Tier IV and below)
    """

    # --- explicit Grand-Slam / Finals names ----------------------------------
    tname = str(row.get("Tournament", "")).lower()
    if any(s in tname for s in
           ["australian open", "roland garros", "wimbledon", "us open"]):
        return 6, "Grand Slam"
    if "wta finals" in tname or "tour championships" in tname \
       or row.get("tourney_level") == "F":
        return 5, "WTA Finals"

    # --- modern 'WTA###' codes in the Tier column ----------------------------
    tier_raw = str(row.get("Tier", "")).strip()
    m = re.match(r"^wta\s*(\d+)$", tier_raw, flags=re.I)
    if m:
        n = int(m.group(1))
        if n >= 1000:
            return 4, "WTA 1000"
        if n >= 500:
            return 3, "WTA 500"
        return 2, "WTA 250"          # every WTA### below 500 counts as 250

    # --- Series text ---------------------------------------------------------
    series = str(row.get("Series", "")).lower()
    if "1000" in series or "mandatory" in series or "premier 5" in series:
        return 4, "WTA 1000"
    if "500" in series or series == "premier":
        return 3, "WTA 500"
    if "250" in series or "international" in series:
        return 2, "WTA 250"
    if "125" in series:
        return 1, "125 / ITF"

    # --- Sackmann one-letter codes ------------------------------------------
    code = str(row.get("tourney_level", "")).upper()
    if code == "M":             # Premier Mandatory / Premier-5
        return 4, "WTA 1000"
    if code == "P":             # regular Premier
        return 3, "WTA 500"
    if code in {"I", "B"}:      # International or below
        return 2, "WTA 250"

    # --- classic 'Tier I … IV' ----------------------------------------------
    tier_map_txt = {"tier i":  (4, "WTA 1000"),
                    "tier ii": (3, "WTA 500"),
                    "tier iii":(2, "WTA 250"),
                    "tier iv": (1, "125 / ITF")}
    if tier_map_txt.get(tier_raw.lower()):
        return tier_map_txt[tier_raw.lower()]
    if tier_raw.lstrip("-").isdigit():
        n = abs(int(tier_raw))
        return {1: (4, "WTA 1000"),
                2: (3, "WTA 500"),
                3: (2, "WTA 250"),
                4: (1, "125 / ITF")}.get(n, (1, "125 / ITF"))

    # fallback
    return 1, "125 / ITF"

df_all[["TierSize", "TierCat"]] = df_all.apply(parse_event_tier,
                                              axis=1, result_type="expand")

# ─────────────────────  Court textures  ────────────────────────
surface_images = {
    "Hard":  "/Users/ellendagher/Desktop/DataViz/SmashData/hard.jpeg",
    "Clay":  "/Users/ellendagher/Desktop/DataViz/SmashData/clay.jpg",
    "Grass": "/Users/ellendagher/Desktop/DataViz/SmashData/grass.jpg",
}
surface_bg = {s.lower(): "data:image/jpeg;base64," +
              base64.b64encode(open(p, "rb").read()).decode()
              for s, p in surface_images.items()}

# ─────────────────────  Dash app  ───────────────────────────────
app = Dash(__name__)
app.title = "Underdog vs Top-10 (WTA)"

app.layout = html.Div(
    [
        html.Label("Year:"),
        dcc.Input(id="year-input", type="number", placeholder="e.g. 2024",
                  debounce=True),
        html.Br(),

        html.Label("Surface:"),
        dcc.Input(id="surface-dropdown", type="text",
                  placeholder="Hard / Clay / Grass", debounce=True),
        html.Br(),

        html.Label("Top-10 Opponent (optional):"),
        dcc.Dropdown(id="player-dropdown", placeholder="Select a player",
                     clearable=True),
        html.Br(),

        dcc.Graph(id="underdog-plot"),
    ],
    style={"width": "900px", "margin": "0 auto"},
)

# ─────────  Populate Top-10 list  ─────────
@app.callback(
    Output("player-dropdown", "options"),
    Input("year-input", "value"),
)
def populate_top10(year):
    if not year:
        return []
    y = df_all[df_all["Date"].dt.year == int(year)]
    tops = pd.concat(
        [y[y["WRank"] <= 10][["Winner", "WRank"]],
         y[y["LRank"] <= 10][["Loser", "LRank"]]
            .rename(columns={"Loser": "Winner", "LRank": "WRank"})]
    )
    names = tops.sort_values("WRank")["Winner"].dropna().drop_duplicates().head(10)
    return [{"label": n, "value": n} for n in names]

# ─────────  Bubble-chart callback  ─────────
@app.callback(
    Output("underdog-plot", "figure"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value"),
)
def make_chart(year, surface, opponent):
    if not year or not surface:
        return px.scatter(title="Select year & surface")

    df = df_all[
        (df_all["Date"].dt.year == int(year))
        & (df_all["Surface"].str.lower() == surface.lower())
        & (((df_all["WRank"] <= 10) & (df_all["LRank"].between(11, 100))) |
           ((df_all["LRank"] <= 10) & (df_all["WRank"].between(11, 100))))
    ].copy()
    if df.empty:
        return px.scatter(title="No matches found")

    # helpers
    df["Underdog"], df["Odds"] = zip(
        *df.apply(lambda r: (r["Winner"], r["B365W"])
                  if r["WRank"] > r["LRank"]
                  else (r["Loser"],  r["B365L"]), axis=1)
    )
    df["Won"]    = df["Winner"] == df["Underdog"]
    df["Return"] = df["Odds"].where(df["Won"], 0)
    df["UDRank"] = df.apply(lambda r: r["WRank"] if r["Won"] else r["LRank"],
                            axis=1)

    if opponent:
        df = df[df["Loser"] == opponent]
    if df.empty:
        return px.scatter(title="No matches with that filter")

    agg = (df.groupby("Underdog", as_index=False)
             .agg(AvgRank=("UDRank", "mean"),
                  AvgReturn=("Return", "mean"),
                  TierSize=("TierSize", "max"),
                  TierCat=("TierCat",  "max")))

    fig = px.scatter(
        agg,
        x="AvgRank",
        y="AvgReturn",
        size="TierSize",            # bubble ⇢ event importance
        color="TierCat",
        symbol="TierCat",
        symbol_map={"Grand Slam": "star",
                    "WTA Finals": "diamond"},
        color_discrete_map={
            "Grand Slam": "#FFD700",    # gold
            "WTA Finals": "#A020F0",    # purple
            "WTA 1000":  "#1f77b4",
            "WTA 500":   "#2ca02c",
            "WTA 250":   "#ff7f0e",
            "125 / ITF": "#8c564b"},
        size_max=40,
        hover_name="Underdog",
        title=f"Underdog vs Top-10 on {surface.capitalize()} • {year}",
    )
    fig.update_layout(
        xaxis_title="Average Rank (1 = best)",
        yaxis_title="Average Return on $1 Stake (Bet365)",
        xaxis_autorange="reversed",
        legend_title="Event Category",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        images=[{
            "source": surface_bg.get(surface.lower()),
            "xref": "paper", "yref": "paper",
            "x": 0, "y": 1, "sizex": 1, "sizey": 1,
            "xanchor": "left", "yanchor": "top",
            "layer": "below", "sizing": "stretch",
            "opacity": 0.5,
        }] if surface_bg.get(surface.lower()) else [],
    )
    return fig

# ─────────  Run the server  ─────────
if __name__ == "__main__":
    app.run(debug=True, use_reloader=False)



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



In [13]:
# If you’ve already built df_all from all your CSV files
unique_tiers = (
    df_all["Tier"]          # pick the column
         .dropna()          # ignore NaNs
         .astype(str)       # treat everything as text
         .str.strip()       # remove stray spaces
         .unique()          # one copy of each string
)

print(f"{len(unique_tiers)} unique Tier values:")
for val in sorted(unique_tiers, key=lambda x: (len(x), x)):
    print("  •", val)


37 unique Tier values:
  • Tier 1
  • Tier 2
  • Tier 3
  • Tier 4
  • WTA250
  • WTA251
  • WTA252
  • WTA253
  • WTA254
  • WTA255
  • WTA256
  • WTA257
  • WTA258
  • WTA259
  • WTA260
  • WTA261
  • WTA262
  • WTA263
  • WTA264
  • WTA265
  • WTA266
  • WTA267
  • WTA268
  • WTA269
  • WTA270
  • WTA271
  • WTA272
  • WTA273
  • WTA274
  • WTA275
  • WTA276
  • WTA500
  • Premier
  • WTA1000
  • Grand Slam
  • International
  • Tour Championships


### Finals reached vs Losses -WTA

In [66]:
import os, re, base64, pandas as pd, plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc

# ───────────────────── Load Data ──────────────────────
DATA_DIR = "/Users/ellendagher/Desktop/DataViz/SmashData/csv_data/wta_womens_tour"
df_all = pd.concat(
    [pd.read_csv(os.path.join(DATA_DIR, f), low_memory=False)
     for f in os.listdir(DATA_DIR) if f.endswith(".csv")],
    ignore_index=True,
)
df_all["Date"] = pd.to_datetime(df_all["Date"], errors="coerce")
for col in ["B365W", "B365L"]:
    df_all[col] = pd.to_numeric(df_all[col], errors="coerce")
df_all = df_all.dropna(subset=["B365W", "B365L", "WRank", "LRank"])

# ───────────────────── Tier Parser ─────────────────────
def parse_event_tier(row):
    tname = str(row.get("Tournament", "")).lower()
    if any(s in tname for s in ["australian open", "roland garros", "wimbledon", "us open"]):
        return 6, "Grand Slam"
    if "wta finals" in tname or "tour championships" in tname or row.get("tourney_level") == "F":
        return 5, "WTA Finals"

    tier_raw = str(row.get("Tier", "")).strip()
    m = re.match(r"^wta\s*(\d+)$", tier_raw, flags=re.I)
    if m:
        n = int(m.group(1))
        if n >= 1000: return 4, "WTA 1000"
        if n >= 500: return 3, "WTA 500"
        return 2, "WTA 250"

    series = str(row.get("Series", "")).lower()
    if "1000" in series or "mandatory" in series or "premier 5" in series:
        return 4, "WTA 1000"
    if "500" in series or series == "premier":
        return 3, "WTA 500"
    if "250" in series or "international" in series:
        return 2, "WTA 250"
    if "125" in series:
        return 1, "125 / ITF"

    code = str(row.get("tourney_level", "")).upper()
    if code == "M": return 4, "WTA 1000"
    if code == "P": return 3, "WTA 500"
    if code in {"I", "B"}: return 2, "WTA 250"

    tier_map_txt = {"tier i":  (4, "WTA 1000"),
                    "tier ii": (3, "WTA 500"),
                    "tier iii":(2, "WTA 250"),
                    "tier iv": (1, "125 / ITF")}
    if tier_map_txt.get(tier_raw.lower()):
        return tier_map_txt[tier_raw.lower()]
    if tier_raw.lstrip("-").isdigit():
        n = abs(int(tier_raw))
        return {1: (4, "WTA 1000"),
                2: (3, "WTA 500"),
                3: (2, "WTA 250"),
                4: (1, "125 / ITF")}.get(n, (1, "125 / ITF"))

    return 1, "125 / ITF"

df_all[["TierSize", "TierCat"]] = df_all.apply(parse_event_tier, axis=1, result_type="expand")

# ───────────────────── Background Images ─────────────────────
def encode_image(file_path):
    with open(file_path, "rb") as f:
        return "data:image/png;base64," + base64.b64encode(f.read()).decode()

surface_images = {
    "Hard":  "/Users/ellendagher/Desktop/DataViz/SmashData/hard.jpeg",
    "Clay":  "/Users/ellendagher/Desktop/DataViz/SmashData/clay.jpg",
    "Grass": "/Users/ellendagher/Desktop/DataViz/SmashData/grass.jpg",
}
surface_bg = {s.lower(): "data:image/jpeg;base64," +
              base64.b64encode(open(p, "rb").read()).decode()
              for s, p in surface_images.items()}

TIER_LOGOS = {
    "Grand Slam": {
        "australian": "ao.png",
        "roland": "rg.png",
        "wimbledon": "wimbledon.png",
        "us open": "usopen.png"
    },
    "WTA 1000": "logo-wta1000-black-1.png",
    "WTA 500": "wta500.png",
    "WTA 250": "wta250.png",
    "WTA Finals": "wtafinals.png",
    "125 / ITF": "wta125.png"
}

# ───────────────────── Dash App ─────────────────────
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.CYBORG])
app.title = "WTA Underdogs vs Top-10"

app.layout = html.Div([
    html.H2("🎾 Underdog Wins vs Top-10 (WTA)", style={"textAlign": "center", "marginTop": 20}),

    html.Label("Year:"),
    dcc.Slider(
        id="year-input",
        min=df_all["Date"].dt.year.min(),
        max=df_all["Date"].dt.year.max(),
        step=1,
        value=2024,
        marks={y: str(y) for y in range(2005, 2025)},
        tooltip={"placement": "bottom"}
    ),

    html.Br(),
    html.Label("Surface:"),
    dcc.Dropdown(
        id="surface-dropdown",
        options=[{"label": s, "value": s} for s in ["Hard", "Clay", "Grass"]],
        placeholder="Select Surface",
    ),

    html.Br(),
    html.Label("Top-10 Opponent (optional):"),
    dcc.Dropdown(id="player-dropdown", placeholder="Select a player", clearable=True),

    html.Div(id="summary-text", style={"color": "teal", "marginTop": 20, "fontWeight": "bold"}),

    dcc.Graph(id="underdog-plot", config={"displayModeBar": False}),
], style={"width": "85%", "margin": "0 auto"})

# ───────────────────── Callbacks ─────────────────────

@app.callback(
    Output("player-dropdown", "options"),
    Input("year-input", "value"),
)
def populate_top10(year):
    if not year:
        return []
    y = df_all[df_all["Date"].dt.year == int(year)]
    tops = pd.concat([
        y[y["WRank"] <= 10][["Winner", "WRank"]],
        y[y["LRank"] <= 10][["Loser", "LRank"]].rename(columns={"Loser": "Winner", "LRank": "WRank"})
    ])
    names = tops.sort_values("WRank")["Winner"].dropna().drop_duplicates().head(10)
    return [{"label": n, "value": n} for n in names]

@app.callback(
    Output("summary-text", "children"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value"),
)
def update_summary_text(year, surface, opponent):
    if not year or not surface:
        return "Please select a year and a surface."
    msg = f"\U0001F3BE Showing underdog wins vs Top-10 players on **{surface}** courts in **{year}**."
    if opponent:
        msg += f" Filtered by matches against **{opponent}**."
    return msg

@app.callback(
    Output("underdog-plot", "figure"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value"),
)
def make_chart(year, surface, opponent):
    if not year or not surface:
        return px.scatter(title="Select year and surface")

    df = df_all[
        (df_all["Date"].dt.year == int(year)) &
        (df_all["Surface"].str.lower() == surface.lower()) &
        (((df_all["WRank"] <= 10) & (df_all["LRank"].between(11, 100))) |
         ((df_all["LRank"] <= 10) & (df_all["WRank"].between(11, 100))))
    ].copy()

    if df.empty:
        return px.scatter(title="No matches found")

    df["Underdog"], df["Odds"] = zip(*df.apply(
        lambda r: (r["Winner"], r["B365W"]) if r["WRank"] > r["LRank"]
        else (r["Loser"],  r["B365L"]), axis=1))
    df["Won"]    = df["Winner"] == df["Underdog"]
    df["Return"] = df["Odds"].where(df["Won"], 0)
    df["UDRank"] = df.apply(lambda r: r["WRank"] if r["Won"] else r["LRank"], axis=1)

    if opponent:
        df = df[df["Loser"] == opponent]
    if df.empty:
        return px.scatter(title="No matches with that filter")

    agg = df.groupby("Underdog", as_index=False).agg(
        AvgRank=("UDRank", "mean"),
        AvgReturn=("Return", "mean"),
        TierSize=("TierSize", "max"),
        TierCat=("TierCat",  "max")
    )

    fig = px.scatter(
        agg, x="AvgRank", y="AvgReturn",
        size=[30]*len(agg),  # uniform placeholder for size
        hover_name="Underdog",
        title=f"Underdog vs Top-10 on {surface} • {year}"
    )

    fig.update_traces(marker=dict(opacity=0))

    for i, row in agg.iterrows():
        tier = row["TierCat"]
        x, y = row["AvgRank"], row["AvgReturn"]
        logo_path = None

        if tier == "Grand Slam":
            tournaments = df_all[df_all["Winner"] == row["Underdog"]]["Tournament"].dropna().unique()
            t_match = next((t for t in tournaments if any(k in t.lower() for k in TIER_LOGOS["Grand Slam"])), None)
            if t_match:
                logo_key = next((k for k in TIER_LOGOS["Grand Slam"] if k in t_match.lower()), None)
                if logo_key:
                    logo_path = TIER_LOGOS["Grand Slam"][logo_key]
        else:
            logo_path = TIER_LOGOS.get(tier)

        if logo_path and os.path.exists(logo_path):
            fig.add_layout_image(dict(
                source=encode_image(logo_path),
                xref="x", yref="y",
                x=x, y=y,
                sizex=3, sizey=0.6,
                xanchor="center", yanchor="middle",
                layer="above"
            ))
        else:
            print(f"⚠️ Logo not found for: {row['Underdog']} • Tier: {tier} • Path: {logo_path}")

    return fig

# ───────────────────── Run App ─────────────────────
app.run(mode="inline", debug=True)



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/anaconda3/envs/ada/lib/python3.9/site-packages/dash/dash.py:1326, in dispatch(
    self=<jupyter_dash.jupyter_app.JupyterDash object>
)
   1323 changed_props = body.get("changedPropIds", [])
   1324 g.triggered_inputs = [  # pylint: disable=assigning-non-slot
   1325     {"prop_id": x, "value": input_values.get(x)} for x in changed_props
-> 1326 ]
        g = {'inputs_list': [{'id': 'min-slider', 'property': 'value', 'value': 8}, {'id': 'search-box', 'property': 'value', 'value': ''}], 'states_list': [], 'outputs_list': {'id': 'pairs-table', 'property': 'data'}, 'input_values': {'min-slider.value': 8, 'search-box.value': ''}, 'state_values': {}, 'triggered_inputs': [{'prop_id': 'search-box.value', 'value': ''}], 'dash_response': <Response 0 bytes [200 OK]>}
        input_values = {'min-slider.value': 8, 'search-box.value': ''}
  

## Photos

In [36]:
# WTA RIVALRIES EXPLORER – NOTEBOOK EDITION
# ----------------------------------------
# • Slider: minimum meetings to list
# • Search box: filter by player name
# • Click a rivalry row: bubble-timeline of *all* matches
#   – green bubble  = first-listed player wins
#   – red   bubble  = second player wins
#   – bubble size   = event prestige (Slam > 1000 > 500 > 250 > 125/ITF)
#   – hover         = date, tournament, round, surface, score, odds

import os, re, pandas as pd, plotly.graph_objects as go
from jupyter_dash import JupyterDash as Dash      # ⭐ notebook-friendly Dash
from dash import dcc, html, Input, Output, dash_table

# ─────  LOAD & PREP DATA  ──────────────────────────────────────────────────
DATA_DIR = "/Users/ellendagher/Desktop/DataViz/SmashData/csv_data/wta_womens_tour"
df = pd.concat(
    [pd.read_csv(os.path.join(DATA_DIR, f), low_memory=False)
     for f in os.listdir(DATA_DIR) if f.endswith(".csv")],
    ignore_index=True,
)

df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df = df.dropna(subset=["Winner", "Loser", "Date"])

for col in ["B365W", "B365L"]:
    if col not in df.columns:
        df[col] = None
    else:
        df[col] = pd.to_numeric(df[col], errors="coerce")

def prestige(row):
    s = str(row.get("Series", "")).lower()
    if "grand slam" in s or row.get("tourney_level") == "G":
        return 5
    if "1000" in s or row.get("tourney_level") == "M":
        return 4
    if "500" in s or row.get("tourney_level") == "P":
        return 3
    if "250" in s or "international" in s:
        return 2
    return 1
df["TierSize"] = df.apply(prestige, axis=1)

df["PairKey"] = df.apply(lambda r: tuple(sorted((r["Winner"], r["Loser"]))), axis=1)
pair_counts = (df.groupby("PairKey").size()
                 .reset_index(name="Matches")
                 .sort_values("Matches", ascending=False))
pair_counts[["PlayerA", "PlayerB"]] = pd.DataFrame(pair_counts["PairKey"].tolist(),
                                                   index=pair_counts.index)

# ─────  BUILD APP  ──────────────────────────────────────────────────────────
external_fonts = "https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap"
app = Dash(__name__, external_stylesheets=[external_fonts])
app.title = "WTA Rivalries Explorer"

app.layout = html.Div(
    [
        html.H2("WTA Rivalries Explorer", style={"textAlign": "center",
                                                 "fontFamily": "Inter",
                                                 "marginBottom": "14px"}),

        html.Div([
            html.Div([
                html.Label("Minimum Meetings:", style={"fontWeight": "600"}),
                dcc.Slider(id="min-slider", min=3, max=30, step=1, value=8,
                           marks={3:"3",10:"10",20:"20",30:"30"},
                           tooltip={"placement":"bottom"})
            ], style={"flex":"2", "marginRight":"26px"}),

            html.Div([
                html.Label("Search Player:", style={"fontWeight": "600"}),
                dcc.Input(id="search-box", type="text", placeholder="Type a name…",
                          debounce=True, style={"width":"100%", "padding":"6px"})
            ], style={"flex":"1"})
        ], style={"display":"flex", "gap":"10px", "marginBottom":"20px"}),

        dash_table.DataTable(
            id="pairs-table",
            columns=[{"name": "Player 1", "id": "PlayerA"},
                     {"name": "Player 2", "id": "PlayerB"},
                     {"name": "Matches",  "id": "Matches"}],
            row_selectable="single",
            style_cell={"textAlign": "center", "fontFamily": "Inter", "padding":"6px"},
            style_header={"fontWeight": "700", "backgroundColor":"#f0f4fa"},
            style_data_conditional=[{"if":{"row_index":"odd"},
                                     "backgroundColor":"#f9fbfd"}],
            page_size=12, style_table={"overflowX":"auto",
                                       "border":"1px solid #d9dfe8"},
        ),
        html.Br(),

        dcc.Graph(id="pair-timeline", style={"height":"480px"}),
    ],
    style={"width":"960px", "margin":"0 auto"}
)

# ─────  CALLBACKS  ───────────────────────────────────────────────────────────
@app.callback(
    Output("pairs-table", "data"),
    [Input("min-slider", "value"), Input("search-box", "value")]
)
def filter_table(min_meetings, search):
    sub = pair_counts[pair_counts["Matches"] >= min_meetings].copy()
    if search:
        pat = re.escape(search.strip().lower())
        sub = sub[sub["PlayerA"].str.lower().str.contains(pat) |
                  sub["PlayerB"].str.lower().str.contains(pat)]
    return sub[["PlayerA","PlayerB","Matches"]].to_dict("records")

@app.callback(
    Output("pair-timeline", "figure"),
    [Input("pairs-table", "derived_virtual_selected_rows"),
     Input("pairs-table", "data")]
)
def build_timeline(selected, table_data):
    if not selected or selected[0] is None or not table_data:
        return go.Figure().update_layout(
            title="Choose a rivalry from the table.",
            plot_bgcolor="#ffffff")
    row = table_data[selected[0]]
    A, B = row["PlayerA"], row["PlayerB"]

    sub = df[(df["Winner"].isin([A,B])) & (df["Loser"].isin([A,B]))].copy()
    sub.sort_values("Date", inplace=True)
    sub["WinnerFlag"] = sub["Winner"].apply(lambda w: 0 if w==A else 1)
    sub["BubbleCol"]  = sub["WinnerFlag"].map({0:"seagreen",1:"indianred"})
    sub["BubbleSize"] = sub["TierSize"].map({5:40,4:32,3:26,2:20,1:14})

    def odds(r): return r["B365W"] if r["Winner"]==A else r["B365L"]
    sub["Odds"] = sub.apply(odds, axis=1)

    for c in ["Tournament","Round","Surface","Score"]:
        if c not in sub.columns: sub[c] = ""

    fig = go.Figure(go.Scatter(
        x=sub["Date"], y=sub["WinnerFlag"], mode="markers",
        marker=dict(size=sub["BubbleSize"], color=sub["BubbleCol"],
                    opacity=0.85, line=dict(width=1,color="black")),
        customdata=sub[["Tournament","Round","Surface","Score","Odds"]],
        hovertemplate=("<b>%{x|%Y-%m-%d}</b><br>%{customdata[0]} – %{customdata[1]}"
                       "<br>Surface: %{customdata[2]}<br>Score: %{customdata[3]}"
                       "<br>Odds winner: %{customdata[4]:.2f}<extra></extra>"),
        showlegend=False))

    fig.update_yaxes(
        tickvals=[0,1], ticktext=[A+" wins", B+" wins"],
        range=[-0.5,1.5], showgrid=False, zeroline=False)
    fig.update_layout(
        title=f"{A} vs {B} — {len(sub)} Matches",
        xaxis_title="Year",
        plot_bgcolor="#ffffff",
        margin=dict(l=50,r=50,t=70,b=50),
        font=dict(family="Inter"))
    return fig

# ─────  RUN IN NOTEBOOK  ─────────────────────────────────────────────────────
app.run(mode="inline", port=8050, debug=False)



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



In [42]:
import os, re, base64, pandas as pd, plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc

# ───────────────────── Load Data ──────────────────────
DATA_DIR = "/Users/ellendagher/Desktop/DataViz/SmashData/csv_data/wta_womens_tour"
df_all = pd.concat(
    [pd.read_csv(os.path.join(DATA_DIR, f), low_memory=False)
     for f in os.listdir(DATA_DIR) if f.endswith(".csv")],
    ignore_index=True,
)
df_all["Date"] = pd.to_datetime(df_all["Date"], errors="coerce")
for col in ["B365W", "B365L"]:
    df_all[col] = pd.to_numeric(df_all[col], errors="coerce")
df_all = df_all.dropna(subset=["B365W", "B365L", "WRank", "LRank"])

# ───────────────────── Tier Parser ─────────────────────
def parse_event_tier(row):
    tname = str(row.get("Tournament", "")).lower()
    if any(s in tname for s in ["australian open", "roland garros", "wimbledon", "us open"]):
        return 6, "Grand Slam"
    if "wta finals" in tname or "tour championships" in tname or row.get("tourney_level") == "F":
        return 5, "WTA Finals"

    tier_raw = str(row.get("Tier", "")).strip()
    m = re.match(r"^wta\s*(\d+)$", tier_raw, flags=re.I)
    if m:
        n = int(m.group(1))
        if n >= 1000: return 4, "WTA 1000"
        if n >= 500: return 3, "WTA 500"
        return 2, "WTA 250"

    series = str(row.get("Series", "")).lower()
    if "1000" in series or "mandatory" in series or "premier 5" in series:
        return 4, "WTA 1000"
    if "500" in series or series == "premier":
        return 3, "WTA 500"
    if "250" in series or "international" in series:
        return 2, "WTA 250"
    if "125" in series:
        return 1, "125 / ITF"

    code = str(row.get("tourney_level", "")).upper()
    if code == "M": return 4, "WTA 1000"
    if code == "P": return 3, "WTA 500"
    if code in {"I", "B"}: return 2, "WTA 250"

    tier_map_txt = {"tier i":  (4, "WTA 1000"),
                    "tier ii": (3, "WTA 500"),
                    "tier iii":(2, "WTA 250"),
                    "tier iv": (1, "125 / ITF")}
    if tier_map_txt.get(tier_raw.lower()):
        return tier_map_txt[tier_raw.lower()]
    if tier_raw.lstrip("-").isdigit():
        n = abs(int(tier_raw))
        return {1: (4, "WTA 1000"),
                2: (3, "WTA 500"),
                3: (2, "WTA 250"),
                4: (1, "125 / ITF")}.get(n, (1, "125 / ITF"))

    return 1, "125 / ITF"

df_all[["TierSize", "TierCat"]] = df_all.apply(parse_event_tier, axis=1, result_type="expand")

# ───────────────────── Background Images ─────────────────────
def encode_image(file_path):
    with open(file_path, "rb") as f:
        return "data:image/png;base64," + base64.b64encode(f.read()).decode()

surface_images = {
    "Hard":  "/Users/ellendagher/Desktop/DataViz/SmashData/hard.jpeg",
    "Clay":  "/Users/ellendagher/Desktop/DataViz/SmashData/clay.jpg",
    "Grass": "/Users/ellendagher/Desktop/DataViz/SmashData/grass.jpg",
}
surface_bg = {s.lower(): "data:image/jpeg;base64," +
              base64.b64encode(open(p, "rb").read()).decode()
              for s, p in surface_images.items()}

TIER_LOGOS = {
    "Grand Slam": {
        "australian": "ao.png",
        "roland": "rg.png",
        "wimbledon": "wimbledon.png",
        "us open": "usopen.png"
    },
    "WTA 1000": "logo-wta1000-black-1.png",
    "WTA 500": "wta500.png",
    "WTA 250": "wta250.png",
    "125 / ITF": None
}

# ───────────────────── Dash App ─────────────────────
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.CYBORG])
app.title = "WTA Underdogs vs Top-10"

app.layout = html.Div([
    html.H2("🎾 Underdog Wins vs Top-10 (WTA)", style={"textAlign": "center", "marginTop": 20}),

    html.Label("Year:"),
    dcc.Slider(
        id="year-input",
        min=df_all["Date"].dt.year.min(),
        max=df_all["Date"].dt.year.max(),
        step=1,
        value=2024,
        marks={y: str(y) for y in range(2005, 2025)},
        tooltip={"placement": "bottom"}
    ),

    html.Br(),
    html.Label("Surface:"),
    dcc.Dropdown(
        id="surface-dropdown",
        options=[{"label": s, "value": s} for s in ["Hard", "Clay", "Grass"]],
        placeholder="Select Surface",
    ),

    html.Br(),
    html.Label("Top-10 Opponent (optional):"),
    dcc.Dropdown(id="player-dropdown", placeholder="Select a player", clearable=True),

    html.Div(id="summary-text", style={"color": "teal", "marginTop": 20, "fontWeight": "bold"}),

    dcc.Graph(id="underdog-plot", config={"displayModeBar": False}),
], style={"width": "85%", "margin": "0 auto"})

# ───────────────────── Callbacks ─────────────────────

@app.callback(
    Output("player-dropdown", "options"),
    Input("year-input", "value"),
)
def populate_top10(year):
    if not year:
        return []
    y = df_all[df_all["Date"].dt.year == int(year)]
    tops = pd.concat([
        y[y["WRank"] <= 10][["Winner", "WRank"]],
        y[y["LRank"] <= 10][["Loser", "LRank"]].rename(columns={"Loser": "Winner", "LRank": "WRank"})
    ])
    names = tops.sort_values("WRank")["Winner"].dropna().drop_duplicates().head(10)
    return [{"label": n, "value": n} for n in names]

@app.callback(
    Output("summary-text", "children"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value"),
)
def update_summary_text(year, surface, opponent):
    if not year or not surface:
        return "Please select a year and a surface."
    msg = f"\U0001F3BE Showing underdog wins vs Top-10 players on **{surface}** courts in **{year}**."
    if opponent:
        msg += f" Filtered by matches against **{opponent}**."
    return msg

@app.callback(
    Output("underdog-plot", "figure"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value"),
)
def make_chart(year, surface, opponent):
    if not year or not surface:
        return px.scatter(title="Select year and surface")

    df = df_all[
        (df_all["Date"].dt.year == int(year)) &
        (df_all["Surface"].str.lower() == surface.lower()) &
        (((df_all["WRank"] <= 10) & (df_all["LRank"].between(11, 100))) |
         ((df_all["LRank"] <= 10) & (df_all["WRank"].between(11, 100))))
    ].copy()

    if df.empty:
        return px.scatter(title="No matches found")

    df["Underdog"], df["Odds"] = zip(*df.apply(
        lambda r: (r["Winner"], r["B365W"]) if r["WRank"] > r["LRank"]
        else (r["Loser"],  r["B365L"]), axis=1))
    df["Won"]    = df["Winner"] == df["Underdog"]
    df["Return"] = df["Odds"].where(df["Won"], 0)
    df["UDRank"] = df.apply(lambda r: r["WRank"] if r["Won"] else r["LRank"], axis=1)

    if opponent:
        df = df[df["Loser"] == opponent]
    if df.empty:
        return px.scatter(title="No matches with that filter")

    agg = df.groupby("Underdog", as_index=False).agg(
        AvgRank=("UDRank", "mean"),
        AvgReturn=("Return", "mean"),
        TierSize=("TierSize", "max"),
        TierCat=("TierCat",  "max")
    )

    fig = px.scatter(
        agg, x="AvgRank", y="AvgReturn",
        size=[30]*len(agg),  # uniform placeholder for size
        hover_name="Underdog",
        title=f"Underdog vs Top-10 on {surface} • {year}"
    )

    fig.update_traces(marker=dict(opacity=0))

    for i, row in agg.iterrows():
        tier = row["TierCat"]
        x, y = row["AvgRank"], row["AvgReturn"]

        if tier == "Grand Slam":
            tournaments = df_all[df_all["Winner"] == row["Underdog"]]["Tournament"].dropna().unique()
            t_match = next((t for t in tournaments if any(k in t.lower() for k in TIER_LOGOS["Grand Slam"])), None)
            logo_key = next((k for k in TIER_LOGOS["Grand Slam"] if k in t_match.lower()), "australian") if t_match else "australian"
            logo_path = TIER_LOGOS["Grand Slam"][logo_key]
        else:
            logo_path = TIER_LOGOS.get(tier)

        if logo_path:
            fig.add_layout_image(dict(
                source=encode_image(logo_path),
                xref="x", yref="y",
                x=x, y=y,
                sizex=3, sizey=0.6,
                xanchor="center", yanchor="middle",
                layer="above"
            ))

    fig.update_layout(
        xaxis_title="Average Rank (1 = best)",
        yaxis_title="Average Return on $1 Stake (Bet365)",
        xaxis_autorange="reversed",
        legend_title="Event Category",
        paper_bgcolor="rgba(0,0,0,0)",
        plot_bgcolor="rgba(0,0,0,0)",
        images=[{
            "source": surface_bg.get(surface.lower()),
            "xref": "paper", "yref": "paper",
            "x": 0, "y": 1, "sizex": 1, "sizey": 1,
            "xanchor": "left", "yanchor": "top",
            "layer": "below", "sizing": "stretch",
            "opacity": 0.5,
        }] if surface_bg.get(surface.lower()) else [],
    )

    return fig

# ───────────────────── Run App ─────────────────────
app.run(mode="inline", debug=True)




JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



In [65]:
import os, re, base64, pandas as pd, plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc

# ───────────────────── Load Data ──────────────────────
DATA_DIR = "/Users/ellendagher/Desktop/DataViz/SmashData/csv_data/wta_womens_tour"
df_all = pd.concat(
    [pd.read_csv(os.path.join(DATA_DIR, f), low_memory=False)
     for f in os.listdir(DATA_DIR) if f.endswith(".csv")],
    ignore_index=True,
)
df_all["Date"] = pd.to_datetime(df_all["Date"], errors="coerce")
for col in ["B365W", "B365L"]:
    df_all[col] = pd.to_numeric(df_all[col], errors="coerce")
df_all = df_all.dropna(subset=["B365W", "B365L", "WRank", "LRank"])

# ───────────────────── Tier Parser ─────────────────────
def parse_event_tier(row):
    tname = str(row.get("Tournament", "")).lower()
    if any(s in tname for s in ["australian open", "roland garros", "wimbledon", "us open"]):
        return 6, "Grand Slam"
    if "wta finals" in tname or "tour championships" in tname or row.get("tourney_level") == "F":
        return 5, "WTA Finals"

    tier_raw = str(row.get("Tier", "")).strip()
    m = re.match(r"^wta\s*(\d+)$", tier_raw, flags=re.I)
    if m:
        n = int(m.group(1))
        if n >= 1000: return 4, "WTA 1000"
        if n >= 500: return 3, "WTA 500"
        return 2, "WTA 250"

    series = str(row.get("Series", "")).lower()
    if "1000" in series or "mandatory" in series or "premier 5" in series:
        return 4, "WTA 1000"
    if "500" in series or series == "premier":
        return 3, "WTA 500"
    if "250" in series or "international" in series:
        return 2, "WTA 250"
    if "125" in series:
        return 1, "125 / ITF"

    code = str(row.get("tourney_level", "")).upper()
    if code == "M": return 4, "WTA 1000"
    if code == "P": return 3, "WTA 500"
    if code in {"I", "B"}: return 2, "WTA 250"

    tier_map_txt = {"tier i":  (4, "WTA 1000"),
                    "tier ii": (3, "WTA 500"),
                    "tier iii":(2, "WTA 250"),
                    "tier iv": (1, "125 / ITF")}
    if tier_map_txt.get(tier_raw.lower()):
        return tier_map_txt[tier_raw.lower()]
    if tier_raw.lstrip("-").isdigit():
        n = abs(int(tier_raw))
        return {1: (4, "WTA 1000"),
                2: (3, "WTA 500"),
                3: (2, "WTA 250"),
                4: (1, "125 / ITF")}.get(n, (1, "125 / ITF"))

    return 1, "125 / ITF"

df_all[["TierSize", "TierCat"]] = df_all.apply(parse_event_tier, axis=1, result_type="expand")

# ───────────────────── Background Images ─────────────────────
def encode_image(file_path):
    with open(file_path, "rb") as f:
        return "data:image/png;base64," + base64.b64encode(f.read()).decode()

surface_images = {
    "Hard":  "/Users/ellendagher/Desktop/DataViz/SmashData/hard.jpeg",
    "Clay":  "/Users/ellendagher/Desktop/DataViz/SmashData/clay.jpg",
    "Grass": "/Users/ellendagher/Desktop/DataViz/SmashData/grass.jpg",
}
surface_bg = {s.lower(): "data:image/jpeg;base64," +
              base64.b64encode(open(p, "rb").read()).decode()
              for s, p in surface_images.items()}

TIER_LOGOS = {
    "Grand Slam": {
        "australian": "ao.png",
        "roland": "rg.png",
        "wimbledon": "wimbledon.png",
        "us open": "usopen.png"
    },
    "WTA 1000": "logo-wta1000-black-1.png",
    "WTA 500": "wta500.png",
    "WTA 250": "wta250.png",
    "WTA Finals": "wtafinals.png",
    "125 / ITF": "wta125.png"
}

# ───────────────────── Dash App ─────────────────────
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.CYBORG])
app.title = "WTA Underdogs vs Top-10"

app.layout = html.Div([
    html.H2("🎾 Underdog Wins vs Top-10 (WTA)", style={"textAlign": "center", "marginTop": 20}),

    html.Label("Year:"),
    dcc.Slider(
        id="year-input",
        min=df_all["Date"].dt.year.min(),
        max=df_all["Date"].dt.year.max(),
        step=1,
        value=2024,
        marks={y: str(y) for y in range(2005, 2025)},
        tooltip={"placement": "bottom"}
    ),

    html.Br(),
    html.Label("Surface:"),
    dcc.Dropdown(
        id="surface-dropdown",
        options=[{"label": s, "value": s} for s in ["Hard", "Clay", "Grass"]],
        placeholder="Select Surface",
    ),

    html.Br(),
    html.Label("Top-10 Opponent (optional):"),
    dcc.Dropdown(id="player-dropdown", placeholder="Select a player", clearable=True),

    html.Div(id="summary-text", style={"color": "teal", "marginTop": 20, "fontWeight": "bold"}),

    dcc.Graph(id="underdog-plot", config={"displayModeBar": False}),
], style={"width": "85%", "margin": "0 auto"})

# ───────────────────── Callbacks ─────────────────────

@app.callback(
    Output("player-dropdown", "options"),
    Input("year-input", "value"),
)
def populate_top10(year):
    if not year:
        return []
    y = df_all[df_all["Date"].dt.year == int(year)]
    tops = pd.concat([
        y[y["WRank"] <= 10][["Winner", "WRank"]],
        y[y["LRank"] <= 10][["Loser", "LRank"]].rename(columns={"Loser": "Winner", "LRank": "WRank"})
    ])
    names = tops.sort_values("WRank")["Winner"].dropna().drop_duplicates().head(10)
    return [{"label": n, "value": n} for n in names]

@app.callback(
    Output("summary-text", "children"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value"),
)
def update_summary_text(year, surface, opponent):
    if not year or not surface:
        return "Please select a year and a surface."
    msg = f"\U0001F3BE Showing underdog wins vs Top-10 players on **{surface}** courts in **{year}**."
    if opponent:
        msg += f" Filtered by matches against **{opponent}**."
    return msg

@app.callback(
    Output("underdog-plot", "figure"),
    Input("year-input", "value"),
    Input("surface-dropdown", "value"),
    Input("player-dropdown", "value"),
)
def make_chart(year, surface, opponent):
    if not year or not surface:
        return px.scatter(title="Select year and surface")

    df = df_all[
        (df_all["Date"].dt.year == int(year)) &
        (df_all["Surface"].str.lower() == surface.lower()) &
        (((df_all["WRank"] <= 10) & (df_all["LRank"].between(11, 100))) |
         ((df_all["LRank"] <= 10) & (df_all["WRank"].between(11, 100))))
    ].copy()

    if df.empty:
        return px.scatter(title="No matches found")

    df["Underdog"], df["Odds"] = zip(*df.apply(
        lambda r: (r["Winner"], r["B365W"]) if r["WRank"] > r["LRank"]
        else (r["Loser"],  r["B365L"]), axis=1))
    df["Won"]    = df["Winner"] == df["Underdog"]
    df["Return"] = df["Odds"].where(df["Won"], 0)
    df["UDRank"] = df.apply(lambda r: r["WRank"] if r["Won"] else r["LRank"], axis=1)

    if opponent:
        df = df[df["Loser"] == opponent]
    if df.empty:
        return px.scatter(title="No matches with that filter")

    agg = df.groupby("Underdog", as_index=False).agg(
        AvgRank=("UDRank", "mean"),
        AvgReturn=("Return", "mean"),
        TierSize=("TierSize", "max"),
        TierCat=("TierCat",  "max")
    )

    fig = px.scatter(
        agg, x="AvgRank", y="AvgReturn",
        size=[30]*len(agg),  # uniform placeholder for size
        hover_name="Underdog",
        title=f"Underdog vs Top-10 on {surface} • {year}"
    )

    fig.update_traces(marker=dict(opacity=0))

    for i, row in agg.iterrows():
        tier = row["TierCat"]
        x, y = row["AvgRank"], row["AvgReturn"]
        logo_path = None

        if tier == "Grand Slam":
            tournaments = df_all[df_all["Winner"] == row["Underdog"]]["Tournament"].dropna().unique()
            t_match = next((t for t in tournaments if any(k in t.lower() for k in TIER_LOGOS["Grand Slam"])), None)
            if t_match:
                logo_key = next((k for k in TIER_LOGOS["Grand Slam"] if k in t_match.lower()), None)
                if logo_key:
                    logo_path = TIER_LOGOS["Grand Slam"][logo_key]
        else:
            logo_path = TIER_LOGOS.get(tier)

        if logo_path and os.path.exists(logo_path):
            fig.add_layout_image(dict(
                source=encode_image(logo_path),
                xref="x", yref="y",
                x=x, y=y,
                sizex=3, sizey=0.6,
                xanchor="center", yanchor="middle",
                layer="above"
            ))
        else:
            print(f"⚠️ Logo not found for: {row['Underdog']} • Tier: {tier} • Path: {logo_path}")

    fig.update_layout(
        xaxis_title="Average Rank (1 = best)",
        yaxis_title="Average Return on $1 Stake (Bet365)",
        xaxis_autorange="reversed",
        legend_title="Event Category",
        paper_bgcolor="rgba(0,0,0,0)",
        plot_bgcolor="rgba(0,0,0,0)",
        images=[{
            "source": surface_bg.get(surface.strip().lower()),
            "xref": "paper", "yref": "paper",
            "x": 0, "y": 1, "sizex": 1, "sizey": 1,
            "xanchor": "left", "yanchor": "top",
            "layer": "below", "sizing": "stretch",
            "opacity": 0.5,
        }] if surface_bg.get(surface.strip().lower()) else [],
    )

    return fig

# ───────────────────── Run App ─────────────────────
app.run(mode="inline", debug=True)



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.

