# EDA: MLB Pitch Strike Prediction

This notebook explores the processed pitch data to understand:
- Basic dataset statistics
- Distribution of strikes vs balls
- Strike rate by various factors (count, pitch type, batter stance)
- Visualizations of pitch characteristics



In [None]:
import sys
from pathlib import Path

# Add project root to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src import config
from src.plots import plot_strike_probability_heatmap, plot_release_speed_distribution

# Set style
plt.style.use('default')
%matplotlib inline

print(f"Project root: {project_root}")
print(f"Loading data from: {config.PROCESSED_DATA_FILE}")



## 1. Load Data


In [None]:
# Load processed data
df = pd.read_csv(config.PROCESSED_DATA_FILE)

print(f"Dataset shape: {df.shape}")
print(f"\nColumns ({len(df.columns)}):")
print(list(df.columns))
print(f"\nFirst few rows:")
df.head()



## 2. Basic Statistics


In [None]:
# Basic info
print("Dataset Info:")
print(f"  Total pitches: {len(df):,}")
print(f"  Columns: {len(df.columns)}")
print(f"\nMissing values:")
missing = df.isnull().sum()
print(missing[missing > 0])

print(f"\nTarget distribution:")
print(df['is_strike'].value_counts())
print(f"\nStrike rate: {df['is_strike'].mean():.2%}")



## 3. Strike Rate by Count

The count (balls and strikes) is a critical factor in pitch outcome. Let's see how strike rate varies.


In [None]:
# Strike rate by count
count_summary = df.groupby(['balls', 'strikes']).agg({
    'is_strike': ['count', 'sum', 'mean']
}).round(3)
count_summary.columns = ['Total', 'Strikes', 'Strike_Rate']
count_summary = count_summary.sort_index()

print("Strike Rate by Count:")
print(count_summary)

# Visualize
fig, ax = plt.subplots(figsize=(10, 6))
count_matrix = df.groupby(['balls', 'strikes'])['is_strike'].mean().unstack(fill_value=0)
im = ax.imshow(count_matrix.values, cmap='RdYlGn', aspect='auto')
ax.set_xticks(range(len(count_matrix.columns)))
ax.set_xticklabels(count_matrix.columns)
ax.set_yticks(range(len(count_matrix.index)))
ax.set_yticklabels(count_matrix.index)
ax.set_xlabel('Strikes', fontsize=12)
ax.set_ylabel('Balls', fontsize=12)
ax.set_title('Strike Rate by Count', fontsize=14, fontweight='bold')
plt.colorbar(im, ax=ax, label='Strike Rate')
plt.tight_layout()
plt.show()



## 4. Strike Rate by Pitch Type


In [None]:
# Strike rate by pitch type
pitch_type_summary = df.groupby('pitch_type')['is_strike'].agg(['count', 'mean']).round(3)
pitch_type_summary.columns = ['Count', 'Strike_Rate']
pitch_type_summary = pitch_type_summary.sort_values('Strike_Rate', ascending=False)

print("Strike Rate by Pitch Type:")
print(pitch_type_summary)

# Visualize
fig, ax = plt.subplots(figsize=(10, 6))
pitch_type_summary['Strike_Rate'].plot(kind='barh', ax=ax, color='steelblue')
ax.set_xlabel('Strike Rate', fontsize=12)
ax.set_title('Strike Rate by Pitch Type', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()



## 5. Strike Rate by Batter Stance


In [None]:
# Strike rate by batter stance
stance_summary = df.groupby('stand')['is_strike'].agg(['count', 'mean']).round(3)
stance_summary.columns = ['Count', 'Strike_Rate']
stance_summary = stance_summary.sort_values('Strike_Rate', ascending=False)

print("Strike Rate by Batter Stance:")
print(stance_summary)

# Visualize
fig, ax = plt.subplots(figsize=(8, 5))
stance_summary['Strike_Rate'].plot(kind='bar', ax=ax, color='coral', edgecolor='black')
ax.set_ylabel('Strike Rate', fontsize=12)
ax.set_xlabel('Batter Stance', fontsize=12)
ax.set_title('Strike Rate by Batter Stance', fontsize=14, fontweight='bold')
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()



## 6. Release Speed Distribution


In [None]:
# Plot release speed distribution
fig = plot_release_speed_distribution(df)
plt.show()

# Summary statistics
print("\nRelease Speed Statistics:")
print(df.groupby('is_strike')['release_speed'].describe())



import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import patches


def plot_savant_style_zone(
    df: pd.DataFrame,
    value: str = "count",   # "count" or "prob"
    figsize: tuple = (6, 7),
) -> plt.Figure:
    """
    Savant-style strike zone visualization with zone numbers and batter silhouettes.

    Parameters
    ----------
    df : DataFrame
        Must contain 'plate_x', 'plate_z', and 'is_strike' (0/1).
    value : {"count", "prob"}
        - "count": show total pitches in each box.
        - "prob": show strike probability in each box.
    figsize : tuple
        Figure size.

    Returns
    -------
    fig : matplotlib.Figure
    """

    # -----------------------------
    # 1. Define zone geometry
    # -----------------------------
    # Approximate Statcast zone (feet)
    zone_left, zone_right = -0.83, 0.83
    zone_bottom, zone_top = 1.5, 3.5

    # 3×3 grid inside the strike zone
    x_edges = np.linspace(zone_left, zone_right, 4)   # 3 columns → 4 edges
    z_edges = np.linspace(zone_bottom, zone_top, 4)   # 3 rows → 4 edges

    # Prepare bins to assign each pitch into 3x3 cells
    df = df.copy()
    df["col"] = pd.cut(df["plate_x"], bins=x_edges, labels=[0, 1, 2], include_lowest=True)
    df["row"] = pd.cut(df["plate_z"], bins=z_edges, labels=[0, 1, 2], include_lowest=True)

    # Keep only pitches that land inside the 3×3 zone
    df = df.dropna(subset=["row", "col"])
    df["row"] = df["row"].astype(int)
    df["col"] = df["col"].astype(int)

    # -----------------------------
    # 2. Aggregate values per cell
    # -----------------------------
    agg = df.groupby(["row", "col"]).agg(
        count=("is_strike", "size"),
        prob=("is_strike", "mean"),
    )

    # Turn into a 3×3 matrix (row 0 = top, row 2 = bottom)
    count_matrix = np.zeros((3, 3), dtype=int)
    prob_matrix = np.zeros((3, 3), dtype=float)

    for (r, c), vals in agg.iterrows():
        count_matrix[r, c] = int(vals["count"])
        prob_matrix[r, c] = float(vals["prob"]) if vals["count"] > 0 else np.nan

    # For easier display, flip vertically so row 0 is top on the plot
    count_display = np.flipud(count_matrix)
    prob_display = np.flipud(prob_matrix)

    # -----------------------------
    # 3. Create figure & axis
    # -----------------------------
    fig, ax = plt.subplots(figsize=figsize)

    # Set overall view limits to leave space for silhouettes
    ax.set_xlim(-3.0, 3.0)
    ax.set_ylim(-0.5, 5.0)
    ax.axis("off")  # Hide default axis

    # -----------------------------
    # 4. Draw the 3×3 grid boxes
    # -----------------------------
    for i in range(3):  # rows
        for j in range(3):  # cols
            # coordinates in data space
            x0 = x_edges[j]
            x1 = x_edges[j + 1]
            z0 = z_edges[i]
            z1 = z_edges[i + 1]

            rect = patches.Rectangle(
                (x0, z0),
                x1 - x0,
                z1 - z0,
                fill=False,
                edgecolor="black",
                linewidth=2,
            )
            ax.add_patch(rect)

            # center for text
            x_center = 0.5 * (x0 + x1)
            z_center = 0.5 * (z0 + z1)

            # pick which matrix we display
            if value == "prob":
                val = prob_display[i, j]
                text_str = f"{val:.3f}" if not np.isnan(val) else ""
            else:  # "count"
                val = count_display[i, j]
                text_str = str(val) if val > 0 else ""

            ax.text(
                x_center,
                z_center,
                text_str,
                ha="center",
                va="center",
                fontsize=11,
                fontweight="bold",
                color="black",
            )

    # -----------------------------
    # 5. Draw outer strike zone frame
    # -----------------------------
    outer_rect = patches.Rectangle(
        (zone_left, zone_bottom),
        zone_right - zone_left,
        zone_top - zone_bottom,
        fill=False,
        edgecolor="black",
        linewidth=2.5,
    )
    ax.add_patch(outer_rect)

    # -----------------------------
    # 6. Draw home plate
    # -----------------------------
    plate_coords = np.array([
        [-0.708, 0.00],
        [ 0.708, 0.00],
        [ 0.354, -0.146],
        [ 0.000, -0.292],
        [-0.354, -0.146],
    ])
    plate = patches.Polygon(
        plate_coords,
        closed=True,
        facecolor="none",
        edgecolor="black",
        linewidth=1.5,
    )
    ax.add_patch(plate)

    # -----------------------------
    # 7. Simple silhouettes
    # -----------------------------
    def draw_batter(ax, x_offset: float, flip: int = 1):
        """
        Draw a simple batter silhouette at x_offset.
        flip = 1  → face right
        flip = -1 → face left
        """
        color = (0.85, 0.85, 0.85)  # light gray

        # Body (rectangle)
        body = patches.FancyBboxPatch(
            (x_offset - 0.25, 1.3),
            0.5,
            1.2,
            boxstyle="round,pad=0.05",
            linewidth=0,
            facecolor=color,
        )
        ax.add_patch(body)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import patches


def plot_savant_style_zone(
    df: pd.DataFrame,
    value: str = "count",   # "count" or "prob"
    figsize: tuple = (6, 7),
) -> plt.Figure:
    """
    Savant-style strike zone visualization with zone numbers and batter silhouettes.

    Parameters
    ----------
    df : DataFrame
        Must contain 'plate_x', 'plate_z', and 'is_strike' (0/1).
    value : {"count", "prob"}
        - "count": show total pitches in each box.
        - "prob": show strike probability in each box.
    figsize : tuple
        Figure size.

    Returns
    -------
    fig : matplotlib.Figure
    """

    # -----------------------------
    # 1. Define zone geometry
    # -----------------------------
    # Approximate Statcast zone (feet)
    zone_left, zone_right = -0.83, 0.83
    zone_bottom, zone_top = 1.5, 3.5

    # 3×3 grid inside the strike zone
    x_edges = np.linspace(zone_left, zone_right, 4)   # 3 columns → 4 edges
    z_edges = np.linspace(zone_bottom, zone_top, 4)   # 3 rows → 4 edges

    # Prepare bins to assign each pitch into 3x3 cells
    df = df.copy()
    df["col"] = pd.cut(df["plate_x"], bins=x_edges, labels=[0, 1, 2], include_lowest=True)
    df["row"] = pd.cut(df["plate_z"], bins=z_edges, labels=[0, 1, 2], include_lowest=True)

    # Keep only pitches that land inside the 3×3 zone
    df = df.dropna(subset=["row", "col"])
    df["row"] = df["row"].astype(int)
    df["col"] = df["col"].astype(int)

    # -----------------------------
    # 2. Aggregate values per cell
    # -----------------------------
    agg = df.groupby(["row", "col"]).agg(
        count=("is_strike", "size"),
        prob=("is_strike", "mean"),
    )

    # Turn into a 3×3 matrix (row 0 = top, row 2 = bottom)
    count_matrix = np.zeros((3, 3), dtype=int)
    prob_matrix = np.zeros((3, 3), dtype=float)

    for (r, c), vals in agg.iterrows():
        count_matrix[r, c] = int(vals["count"])
        prob_matrix[r, c] = float(vals["prob"]) if vals["count"] > 0 else np.nan

    # For easier display, flip vertically so row 0 is top on the plot
    count_display = np.flipud(count_matrix)
    prob_display = np.flipud(prob_matrix)

    # -----------------------------
    # 3. Create figure & axis
    # -----------------------------
    fig, ax = plt.subplots(figsize=figsize)

    # Set overall view limits to leave space for silhouettes
    ax.set_xlim(-3.0, 3.0)
    ax.set_ylim(-0.5, 5.0)
    ax.axis("off")  # Hide default axis

    # -----------------------------
    # 4. Draw the 3×3 grid boxes
    # -----------------------------
    for i in range(3):  # rows
        for j in range(3):  # cols
            # coordinates in data space
            x0 = x_edges[j]
            x1 = x_edges[j + 1]
            z0 = z_edges[i]
            z1 = z_edges[i + 1]

            rect = patches.Rectangle(
                (x0, z0),
                x1 - x0,
                z1 - z0,
                fill=False,
                edgecolor="black",
                linewidth=2,
            )
            ax.add_patch(rect)

            # center for text
            x_center = 0.5 * (x0 + x1)
            z_center = 0.5 * (z0 + z1)

            # pick which matrix we display
            if value == "prob":
                val = prob_display[i, j]
                text_str = f"{val:.3f}" if not np.isnan(val) else ""
            else:  # "count"
                val = count_display[i, j]
                text_str = str(val) if val > 0 else ""

            ax.text(
                x_center,
                z_center,
                text_str,
                ha="center",
                va="center",
                fontsize=11,
                fontweight="bold",
                color="black",
            )

    # -----------------------------
    # 5. Draw outer strike zone frame
    # -----------------------------
    outer_rect = patches.Rectangle(
        (zone_left, zone_bottom),
        zone_right - zone_left,
        zone_top - zone_bottom,
        fill=False,
        edgecolor="black",
        linewidth=2.5,
    )
    ax.add_patch(outer_rect)

    # -----------------------------
    # 6. Draw home plate
    # -----------------------------
    plate_coords = np.array([
        [-0.708, 0.00],
        [ 0.708, 0.00],
        [ 0.354, -0.146],
        [ 0.000, -0.292],
        [-0.354, -0.146],
    ])
    plate = patches.Polygon(
        plate_coords,
        closed=True,
        facecolor="none",
        edgecolor="black",
        linewidth=1.5,
    )
    ax.add_patch(plate)

    # -----------------------------
    # 7. Simple silhouettes
    # -----------------------------
    def draw_batter(ax, x_offset: float, flip: int = 1):
        """
        Draw a simple batter silhouette at x_offset.
        flip = 1  → face right
        flip = -1 → face left
        """
        color = (0.85, 0.85, 0.85)  # light gray

        # Body (rectangle)
        body = patches.FancyBboxPatch(
            (x_offset - 0.25, 1.3),
            0.5,
            1.2,
            boxstyle="round,pad=0.05",
            linewidth=0,
            facecolor=color,
        )
        ax.add_patch(body)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import patches


def plot_savant_style_zone(
    df: pd.DataFrame,
    value: str = "count",   # "count" or "prob"
    figsize: tuple = (6, 7),
) -> plt.Figure:
    """
    Savant-style strike zone visualization with zone numbers and batter silhouettes.

    Parameters
    ----------
    df : DataFrame
        Must contain 'plate_x', 'plate_z', and 'is_strike' (0/1).
    value : {"count", "prob"}
        - "count": show total pitches in each box.
        - "prob": show strike probability in each box.
    figsize : tuple
        Figure size.

    Returns
    -------
    fig : matplotlib.Figure
    """

    # -----------------------------
    # 1. Define zone geometry
    # -----------------------------
    # Approximate Statcast zone (feet)
    zone_left, zone_right = -0.83, 0.83
    zone_bottom, zone_top = 1.5, 3.5

    # 3×3 grid inside the strike zone
    x_edges = np.linspace(zone_left, zone_right, 4)   # 3 columns → 4 edges
    z_edges = np.linspace(zone_bottom, zone_top, 4)   # 3 rows → 4 edges

    # Prepare bins to assign each pitch into 3x3 cells
    df = df.copy()
    df["col"] = pd.cut(df["plate_x"], bins=x_edges, labels=[0, 1, 2], include_lowest=True)
    df["row"] = pd.cut(df["plate_z"], bins=z_edges, labels=[0, 1, 2], include_lowest=True)

    # Keep only pitches that land inside the 3×3 zone
    df = df.dropna(subset=["row", "col"])
    df["row"] = df["row"].astype(int)
    df["col"] = df["col"].astype(int)

    # -----------------------------
    # 2. Aggregate values per cell
    # -----------------------------
    agg = df.groupby(["row", "col"]).agg(
        count=("is_strike", "size"),
        prob=("is_strike", "mean"),
    )

    # Turn into a 3×3 matrix (row 0 = top, row 2 = bottom)
    count_matrix = np.zeros((3, 3), dtype=int)
    prob_matrix = np.zeros((3, 3), dtype=float)

    for (r, c), vals in agg.iterrows():
        count_matrix[r, c] = int(vals["count"])
        prob_matrix[r, c] = float(vals["prob"]) if vals["count"] > 0 else np.nan

    # For easier display, flip vertically so row 0 is top on the plot
    count_display = np.flipud(count_matrix)
    prob_display = np.flipud(prob_matrix)

    # -----------------------------
    # 3. Create figure & axis
    # -----------------------------
    fig, ax = plt.subplots(figsize=figsize)

    # Set overall view limits to leave space for silhouettes
    ax.set_xlim(-3.0, 3.0)
    ax.set_ylim(-0.5, 5.0)
    ax.axis("off")  # Hide default axis

    # -----------------------------
    # 4. Draw the 3×3 grid boxes
    # -----------------------------
    for i in range(3):  # rows
        for j in range(3):  # cols
            # coordinates in data space
            x0 = x_edges[j]
            x1 = x_edges[j + 1]
            z0 = z_edges[i]
            z1 = z_edges[i + 1]

            rect = patches.Rectangle(
                (x0, z0),
                x1 - x0,
                z1 - z0,
                fill=False,
                edgecolor="black",
                linewidth=2,
            )
            ax.add_patch(rect)

            # center for text
            x_center = 0.5 * (x0 + x1)
            z_center = 0.5 * (z0 + z1)

            # pick which matrix we display
            if value == "prob":
                val = prob_display[i, j]
                text_str = f"{val:.3f}" if not np.isnan(val) else ""
            else:  # "count"
                val = count_display[i, j]
                text_str = str(val) if val > 0 else ""

            ax.text(
                x_center,
                z_center,
                text_str,
                ha="center",
                va="center",
                fontsize=11,
                fontweight="bold",
                color="black",
            )

    # -----------------------------
    # 5. Draw outer strike zone frame
    # -----------------------------
    outer_rect = patches.Rectangle(
        (zone_left, zone_bottom),
        zone_right - zone_left,
        zone_top - zone_bottom,
        fill=False,
        edgecolor="black",
        linewidth=2.5,
    )
    ax.add_patch(outer_rect)

    # -----------------------------
    # 6. Draw home plate
    # -----------------------------
    plate_coords = np.array([
        [-0.708, 0.00],
        [ 0.708, 0.00],
        [ 0.354, -0.146],
        [ 0.000, -0.292],
        [-0.354, -0.146],
    ])
    plate = patches.Polygon(
        plate_coords,
        closed=True,
        facecolor="none",
        edgecolor="black",
        linewidth=1.5,
    )
    ax.add_patch(plate)

    # -----------------------------
    # 7. Simple silhouettes
    # -----------------------------
    def draw_batter(ax, x_offset: float, flip: int = 1):
        """
        Draw a simple batter silhouette at x_offset.
        flip = 1  → face right
        flip = -1 → face left
        """
        color = (0.85, 0.85, 0.85)  # light gray

        # Body (rectangle)
        body = patches.FancyBboxPatch(
            (x_offset - 0.25, 1.3),
            0.5,
            1.2,
            boxstyle="round,pad=0.05",
            linewidth=0,
            facecolor=color,
        )
        ax.add_patch(body)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import patches


def plot_savant_style_zone(
    df: pd.DataFrame,
    value: str = "count",   # "count" or "prob"
    figsize: tuple = (6, 7),
) -> plt.Figure:
    """
    Savant-style strike zone visualization with zone numbers and batter silhouettes.

    Parameters
    ----------
    df : DataFrame
        Must contain 'plate_x', 'plate_z', and 'is_strike' (0/1).
    value : {"count", "prob"}
        - "count": show total pitches in each box.
        - "prob": show strike probability in each box.
    figsize : tuple
        Figure size.

    Returns
    -------
    fig : matplotlib.Figure
    """

    # -----------------------------
    # 1. Define zone geometry
    # -----------------------------
    # Approximate Statcast zone (feet)
    zone_left, zone_right = -0.83, 0.83
    zone_bottom, zone_top = 1.5, 3.5

    # 3×3 grid inside the strike zone
    x_edges = np.linspace(zone_left, zone_right, 4)   # 3 columns → 4 edges
    z_edges = np.linspace(zone_bottom, zone_top, 4)   # 3 rows → 4 edges

    # Prepare bins to assign each pitch into 3x3 cells
    df = df.copy()
    df["col"] = pd.cut(df["plate_x"], bins=x_edges, labels=[0, 1, 2], include_lowest=True)
    df["row"] = pd.cut(df["plate_z"], bins=z_edges, labels=[0, 1, 2], include_lowest=True)

    # Keep only pitches that land inside the 3×3 zone
    df = df.dropna(subset=["row", "col"])
    df["row"] = df["row"].astype(int)
    df["col"] = df["col"].astype(int)

    # -----------------------------
    # 2. Aggregate values per cell
    # -----------------------------
    agg = df.groupby(["row", "col"]).agg(
        count=("is_strike", "size"),
        prob=("is_strike", "mean"),
    )

    # Turn into a 3×3 matrix (row 0 = top, row 2 = bottom)
    count_matrix = np.zeros((3, 3), dtype=int)
    prob_matrix = np.zeros((3, 3), dtype=float)

    for (r, c), vals in agg.iterrows():
        count_matrix[r, c] = int(vals["count"])
        prob_matrix[r, c] = float(vals["prob"]) if vals["count"] > 0 else np.nan

    # For easier display, flip vertically so row 0 is top on the plot
    count_display = np.flipud(count_matrix)
    prob_display = np.flipud(prob_matrix)

    # -----------------------------
    # 3. Create figure & axis
    # -----------------------------
    fig, ax = plt.subplots(figsize=figsize)

    # Set overall view limits to leave space for silhouettes
    ax.set_xlim(-3.0, 3.0)
    ax.set_ylim(-0.5, 5.0)
    ax.axis("off")  # Hide default axis

    # -----------------------------
    # 4. Draw the 3×3 grid boxes
    # -----------------------------
    for i in range(3):  # rows
        for j in range(3):  # cols
            # coordinates in data space
            x0 = x_edges[j]
            x1 = x_edges[j + 1]
            z0 = z_edges[i]
            z1 = z_edges[i + 1]

            rect = patches.Rectangle(
                (x0, z0),
                x1 - x0,
                z1 - z0,
                fill=False,
                edgecolor="black",
                linewidth=2,
            )
            ax.add_patch(rect)

            # center for text
            x_center = 0.5 * (x0 + x1)
            z_center = 0.5 * (z0 + z1)

            # pick which matrix we display
            if value == "prob":
                val = prob_display[i, j]
                text_str = f"{val:.3f}" if not np.isnan(val) else ""
            else:  # "count"
                val = count_display[i, j]
                text_str = str(val) if val > 0 else ""

            ax.text(
                x_center,
                z_center,
                text_str,
                ha="center",
                va="center",
                fontsize=11,
                fontweight="bold",
                color="black",
            )

    # -----------------------------
    # 5. Draw outer strike zone frame
    # -----------------------------
    outer_rect = patches.Rectangle(
        (zone_left, zone_bottom),
        zone_right - zone_left,
        zone_top - zone_bottom,
        fill=False,
        edgecolor="black",
        linewidth=2.5,
    )
    ax.add_patch(outer_rect)

    # -----------------------------
    # 6. Draw home plate
    # -----------------------------
    plate_coords = np.array([
        [-0.708, 0.00],
        [ 0.708, 0.00],
        [ 0.354, -0.146],
        [ 0.000, -0.292],
        [-0.354, -0.146],
    ])
    plate = patches.Polygon(
        plate_coords,
        closed=True,
        facecolor="none",
        edgecolor="black",
        linewidth=1.5,
    )
    ax.add_patch(plate)

    # -----------------------------
    # 7. Simple silhouettes
    # -----------------------------
    def draw_batter(ax, x_offset: float, flip: int = 1):
        """
        Draw a simple batter silhouette at x_offset.
        flip = 1  → face right
        flip = -1 → face left
        """
        color = (0.85, 0.85, 0.85)  # light gray

        # Body (rectangle)
        body = patches.FancyBboxPatch(
            (x_offset - 0.25, 1.3),
            0.5,
            1.2,
            boxstyle="round,pad=0.05",
            linewidth=0,
            facecolor=color,
        )
        ax.add_patch(body)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import patches


def plot_savant_style_zone(
    df: pd.DataFrame,
    value: str = "count",   # "count" or "prob"
    figsize: tuple = (6, 7),
) -> plt.Figure:
    """
    Savant-style strike zone visualization with zone numbers and batter silhouettes.

    Parameters
    ----------
    df : DataFrame
        Must contain 'plate_x', 'plate_z', and 'is_strike' (0/1).
    value : {"count", "prob"}
        - "count": show total pitches in each box.
        - "prob": show strike probability in each box.
    figsize : tuple
        Figure size.

    Returns
    -------
    fig : matplotlib.Figure
    """

    # -----------------------------
    # 1. Define zone geometry
    # -----------------------------
    # Approximate Statcast zone (feet)
    zone_left, zone_right = -0.83, 0.83
    zone_bottom, zone_top = 1.5, 3.5

    # 3×3 grid inside the strike zone
    x_edges = np.linspace(zone_left, zone_right, 4)   # 3 columns → 4 edges
    z_edges = np.linspace(zone_bottom, zone_top, 4)   # 3 rows → 4 edges

    # Prepare bins to assign each pitch into 3x3 cells
    df = df.copy()
    df["col"] = pd.cut(df["plate_x"], bins=x_edges, labels=[0, 1, 2], include_lowest=True)
    df["row"] = pd.cut(df["plate_z"], bins=z_edges, labels=[0, 1, 2], include_lowest=True)

    # Keep only pitches that land inside the 3×3 zone
    df = df.dropna(subset=["row", "col"])
    df["row"] = df["row"].astype(int)
    df["col"] = df["col"].astype(int)

    # -----------------------------
    # 2. Aggregate values per cell
    # -----------------------------
    agg = df.groupby(["row", "col"]).agg(
        count=("is_strike", "size"),
        prob=("is_strike", "mean"),
    )

    # Turn into a 3×3 matrix (row 0 = top, row 2 = bottom)
    count_matrix = np.zeros((3, 3), dtype=int)
    prob_matrix = np.zeros((3, 3), dtype=float)

    for (r, c), vals in agg.iterrows():
        count_matrix[r, c] = int(vals["count"])
        prob_matrix[r, c] = float(vals["prob"]) if vals["count"] > 0 else np.nan

    # For easier display, flip vertically so row 0 is top on the plot
    count_display = np.flipud(count_matrix)
    prob_display = np.flipud(prob_matrix)

    # -----------------------------
    # 3. Create figure & axis
    # -----------------------------
    fig, ax = plt.subplots(figsize=figsize)

    # Set overall view limits to leave space for silhouettes
    ax.set_xlim(-3.0, 3.0)
    ax.set_ylim(-0.5, 5.0)
    ax.axis("off")  # Hide default axis

    # -----------------------------
    # 4. Draw the 3×3 grid boxes
    # -----------------------------
    for i in range(3):  # rows
        for j in range(3):  # cols
            # coordinates in data space
            x0 = x_edges[j]
            x1 = x_edges[j + 1]
            z0 = z_edges[i]
            z1 = z_edges[i + 1]

            rect = patches.Rectangle(
                (x0, z0),
                x1 - x0,
                z1 - z0,
                fill=False,
                edgecolor="black",
                linewidth=2,
            )
            ax.add_patch(rect)

            # center for text
            x_center = 0.5 * (x0 + x1)
            z_center = 0.5 * (z0 + z1)

            # pick which matrix we display
            if value == "prob":
                val = prob_display[i, j]
                text_str = f"{val:.3f}" if not np.isnan(val) else ""
            else:  # "count"
                val = count_display[i, j]
                text_str = str(val) if val > 0 else ""

            ax.text(
                x_center,
                z_center,
                text_str,
                ha="center",
                va="center",
                fontsize=11,
                fontweight="bold",
                color="black",
            )

    # -----------------------------
    # 5. Draw outer strike zone frame
    # -----------------------------
    outer_rect = patches.Rectangle(
        (zone_left, zone_bottom),
        zone_right - zone_left,
        zone_top - zone_bottom,
        fill=False,
        edgecolor="black",
        linewidth=2.5,
    )
    ax.add_patch(outer_rect)

    # -----------------------------
    # 6. Draw home plate
    # -----------------------------
    plate_coords = np.array([
        [-0.708, 0.00],
        [ 0.708, 0.00],
        [ 0.354, -0.146],
        [ 0.000, -0.292],
        [-0.354, -0.146],
    ])
    plate = patches.Polygon(
        plate_coords,
        closed=True,
        facecolor="none",
        edgecolor="black",
        linewidth=1.5,
    )
    ax.add_patch(plate)

    # -----------------------------
    # 7. Simple silhouettes
    # -----------------------------
    def draw_batter(ax, x_offset: float, flip: int = 1):
        """
        Draw a simple batter silhouette at x_offset.
        flip = 1  → face right
        flip = -1 → face left
        """
        color = (0.85, 0.85, 0.85)  # light gray

        # Body (rectangle)
        body = patches.FancyBboxPatch(
            (x_offset - 0.25, 1.3),
            0.5,
            1.2,
            boxstyle="round,pad=0.05",
            linewidth=0,
            facecolor=color,
        )
        ax.add_patch(body)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import patches


def plot_savant_style_zone(
    df: pd.DataFrame,
    value: str = "count",   # "count" or "prob"
    figsize: tuple = (6, 7),
) -> plt.Figure:
    """
    Savant-style strike zone visualization with zone numbers and batter silhouettes.

    Parameters
    ----------
    df : DataFrame
        Must contain 'plate_x', 'plate_z', and 'is_strike' (0/1).
    value : {"count", "prob"}
        - "count": show total pitches in each box.
        - "prob": show strike probability in each box.
    figsize : tuple
        Figure size.

    Returns
    -------
    fig : matplotlib.Figure
    """

    # -----------------------------
    # 1. Define zone geometry
    # -----------------------------
    # Approximate Statcast zone (feet)
    zone_left, zone_right = -0.83, 0.83
    zone_bottom, zone_top = 1.5, 3.5

    # 3×3 grid inside the strike zone
    x_edges = np.linspace(zone_left, zone_right, 4)   # 3 columns → 4 edges
    z_edges = np.linspace(zone_bottom, zone_top, 4)   # 3 rows → 4 edges

    # Prepare bins to assign each pitch into 3x3 cells
    df = df.copy()
    df["col"] = pd.cut(df["plate_x"], bins=x_edges, labels=[0, 1, 2], include_lowest=True)
    df["row"] = pd.cut(df["plate_z"], bins=z_edges, labels=[0, 1, 2], include_lowest=True)

    # Keep only pitches that land inside the 3×3 zone
    df = df.dropna(subset=["row", "col"])
    df["row"] = df["row"].astype(int)
    df["col"] = df["col"].astype(int)

    # -----------------------------
    # 2. Aggregate values per cell
    # -----------------------------
    agg = df.groupby(["row", "col"]).agg(
        count=("is_strike", "size"),
        prob=("is_strike", "mean"),
    )

    # Turn into a 3×3 matrix (row 0 = top, row 2 = bottom)
    count_matrix = np.zeros((3, 3), dtype=int)
    prob_matrix = np.zeros((3, 3), dtype=float)

    for (r, c), vals in agg.iterrows():
        count_matrix[r, c] = int(vals["count"])
        prob_matrix[r, c] = float(vals["prob"]) if vals["count"] > 0 else np.nan

    # For easier display, flip vertically so row 0 is top on the plot
    count_display = np.flipud(count_matrix)
    prob_display = np.flipud(prob_matrix)

    # -----------------------------
    # 3. Create figure & axis
    # -----------------------------
    fig, ax = plt.subplots(figsize=figsize)

    # Set overall view limits to leave space for silhouettes
    ax.set_xlim(-3.0, 3.0)
    ax.set_ylim(-0.5, 5.0)
    ax.axis("off")  # Hide default axis

    # -----------------------------
    # 4. Draw the 3×3 grid boxes
    # -----------------------------
    for i in range(3):  # rows
        for j in range(3):  # cols
            # coordinates in data space
            x0 = x_edges[j]
            x1 = x_edges[j + 1]
            z0 = z_edges[i]
            z1 = z_edges[i + 1]

            rect = patches.Rectangle(
                (x0, z0),
                x1 - x0,
                z1 - z0,
                fill=False,
                edgecolor="black",
                linewidth=2,
            )
            ax.add_patch(rect)

            # center for text
            x_center = 0.5 * (x0 + x1)
            z_center = 0.5 * (z0 + z1)

            # pick which matrix we display
            if value == "prob":
                val = prob_display[i, j]
                text_str = f"{val:.3f}" if not np.isnan(val) else ""
            else:  # "count"
                val = count_display[i, j]
                text_str = str(val) if val > 0 else ""

            ax.text(
                x_center,
                z_center,
                text_str,
                ha="center",
                va="center",
                fontsize=11,
                fontweight="bold",
                color="black",
            )

    # -----------------------------
    # 5. Draw outer strike zone frame
    # -----------------------------
    outer_rect = patches.Rectangle(
        (zone_left, zone_bottom),
        zone_right - zone_left,
        zone_top - zone_bottom,
        fill=False,
        edgecolor="black",
        linewidth=2.5,
    )
    ax.add_patch(outer_rect)

    # -----------------------------
    # 6. Draw home plate
    # -----------------------------
    plate_coords = np.array([
        [-0.708, 0.00],
        [ 0.708, 0.00],
        [ 0.354, -0.146],
        [ 0.000, -0.292],
        [-0.354, -0.146],
    ])
    plate = patches.Polygon(
        plate_coords,
        closed=True,
        facecolor="none",
        edgecolor="black",
        linewidth=1.5,
    )
    ax.add_patch(plate)

    # -----------------------------
    # 7. Simple silhouettes
    # -----------------------------
    def draw_batter(ax, x_offset: float, flip: int = 1):
        """
        Draw a simple batter silhouette at x_offset.
        flip = 1  → face right
        flip = -1 → face left
        """
        color = (0.85, 0.85, 0.85)  # light gray

        # Body (rectangle)
        body = patches.FancyBboxPatch(
            (x_offset - 0.25, 1.3),
            0.5,
            1.2,
            boxstyle="round,pad=0.05",
            linewidth=0,
            facecolor=color,
        )
        ax.add_patch(body)

        # Head (circle)
        head = patches.Circle(
            (x_offset, 2.7),
            radius=0.25,
            facecolor=color,
            edgecolor="none",
        )
        ax.add_patch(head)

        # Legs (lines)
        ax.plot(
            [x_offset - 0.15, x_offset - 0.25],
            [1.3, 0.6],
            color=color,
            linewidth=6,
            solid_capstyle="round",
        )
        ax.plot(
            [x_offset + 0.15, x_offset + 0.25],
            [1.3, 0.6],
            color=color,
            linewidth=6,
            solid_capstyle="round",
        )

        # Bat (line)
        bat_x = [x_offset + 0.1 * flip, x_offset + 0.9 * flip]
        bat_z = [2.9, 3.7]
        ax.plot(
            bat_x,
            bat_z,
            color=color,
            linewidth=6,
            solid_capstyle="round",
        )

    # Left-handed batter on left, right-handed on right
    draw_batter(ax, x_offset=-2.1, flip=1)
    draw_batter(ax, x_offset=2.1,  flip=-1)

    # -----------------------------
    # 8. Title
    # -----------------------------
    title_str = "Total Pitches by Zone" if value == "count" else "Strike Probability by Zone"
    ax.set_title(title_str, fontsize=14, fontweight="bold", y=0.98)

    retur

In [None]:
# Plot strike probability heatmap
fig = plot_strike_probability_heatmap(df, bins=25)
plt.show()



## Summary

Key insights:
- Overall strike rate: ~{:.1%}
- Strike rate varies significantly by count
- Different pitch types have different strike rates
- Plate location is strongly predictive of strike/ball outcome

