In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
import plotly.graph_objects as go

# Specify your paths
preprocessed_path = r"C:\Users\RaymondCarpenter\Documents\GitHub\bigdatabowl2025\data\combined\final_tracking_data.csv"
model_path = r'data/models/route_predictor_model.pkl'
players_path = r'C:\Users\RaymondCarpenter\Documents\GitHub\bigdatabowl2025\data\raw\players.csv'

# Load the preprocessed data
df = pd.read_csv(preprocessed_path)

# Load player data to get position information
try:
    players_df = pd.read_csv(players_path)
    if 'displayName' not in players_df.columns:
        raise KeyError("The 'displayName' column is missing from players_df.")
    print("Loaded player data.")
except Exception as e:
    print(f"Failed to load player data from {players_path}: {e}")
    exit()

# Load the pre-trained XGBoost model
try:
    xgb_model = joblib.load(model_path)
    print("Loaded pre-trained XGBoost model.")
except Exception as e:
    print(f"Failed to load model from {model_path}: {e}")
    exit()

# Choose a gameId and playId to visualize
gameId = 2022091104
playId = 390

# Filter for the specific game and play
df_play = df[(df['gameId'] == gameId) & (df['playId'] == playId)]

# If no data is found for this play, print a message and exit
if df_play.empty:
    print(f"No data available for gameId: {gameId} and playId: {playId}.")
    exit()

# Merge in the 'position' and 'displayName' information using nflId
df_play = df_play.merge(players_df[['nflId', 'position', 'displayName']], on='nflId', how='left')

# Rename displayName_y to displayName (ensure this is accurate)
df_play.rename(columns={'displayName_y': 'displayName'}, inplace=True)

# Drop duplicate columns (like displayName_x)
df_play.drop(columns=[col for col in df_play.columns if col.endswith('_x') or col.endswith('_y')], inplace=True, errors='ignore')

# Drop duplicate columns (like displayName_x)
df_play.drop(columns=[col for col in df_play.columns if col.endswith('_x') or col.endswith('_y') and col != 'displayName'], inplace=True, errors='ignore')

# Aggregate features (group by frameId and nflId)
df_frame_features = df_play.groupby(['gameId', 'playId', 'frameId', 'nflId']).agg(
    x=('x', 'mean'),
    y=('y', 'mean'),
    s=('s', 'mean'),
    a=('a', 'mean'),
    dis=('dis', 'mean'),
    o=('o', 'mean'),
    dir=('dir', 'mean'),
    quarter=('quarter', 'first'),
    down=('down', 'first'),
    yardsToGo=('yardsToGo', 'first'),
    yardlineNumber=('yardlineNumber', 'first'),
    gameClock=('gameClock', 'first'),
    position=('position', 'first'),
    displayName=('displayName', 'first')  # Include displayName aggregation here
).reset_index()

# Calculate distance-based features
for frame_id in df_frame_features['frameId'].unique():
    frame_data = df_play[df_play['frameId'] == frame_id]
    distances = frame_data.groupby('nflId')[['x', 'y']].apply(lambda x: np.sqrt((x['x'] - x['x'].mean())**2 + (x['y'] - x['y'].mean())**2))
    df_frame_features.loc[df_frame_features['frameId'] == frame_id, 'min_distance'] = distances.min()
    df_frame_features.loc[df_frame_features['frameId'] == frame_id, 'max_distance'] = distances.max()
    df_frame_features.loc[df_frame_features['frameId'] == frame_id, 'mean_distance'] = distances.mean()
    df_frame_features.loc[df_frame_features['frameId'] == frame_id, 'std_distance'] = distances.std()

# Convert gameClock to numeric by converting MM:SS to seconds
try:
    df_frame_features['gameClock'] = df_frame_features['gameClock'].str.split(':').apply(lambda x: int(x[0]) * 60 + int(x[1]))
except Exception as e:
    print(f"Failed to convert gameClock to numeric: {e}")
    exit()

# Extract features for the model
X = df_frame_features[['x', 'y', 's', 'a', 'dis', 'o', 'dir', 'quarter', 'down', 'yardsToGo', 'yardlineNumber', 'gameClock', 'min_distance', 'max_distance', 'mean_distance', 'std_distance']]

# If the dataframe is empty, exit the script
if X.empty:
    print(f"No data available for gameId: {gameId} and playId: {playId} after aggregation.")
    exit()


# Map route prediction indices to route names
route_map = {
    0: 'ANGLE',
    1: 'CORNER',
    2: 'CROSS',
    3: 'FLAT',
    4: 'GO',
    5: 'HITCH',
    6: 'IN',
    7: 'OUT',
    8: 'POST',
    9: 'SCREEN',
    10: 'SLANT',
    11: 'WHEEL'
}
# Make predictions using the loaded XGBoost model
probs = xgb_model.predict_proba(X)
df_frame_features['routePred'] = np.argmax(probs, axis=1)
df_frame_features['routeName'] = df_frame_features['routePred'].map(route_map)

route_colors = {
    'ANGLE': 'blue', 'CORNER': 'red', 'CROSS': 'green', 'FLAT': 'purple', 
    'GO': 'orange', 'HITCH': 'pink', 'IN': 'brown', 'OUT': 'yellow',
    'POST': 'cyan', 'SCREEN': 'magenta', 'SLANT': 'lime', 'WHEEL': 'black'
}


route_shapes = {
    "ANGLE": lambda x, y: [(x, y), (x + 10, y), (x + 5, y + 10)],
    "CORNER": lambda x, y: [(x, y), (x + 10, y), (x + 20, y + 10)],  
    "CROSS": lambda x, y: [(x, y), (x + 10, y + 5)],
    "FLAT": lambda x, y: [(x, y), (x + 5, y)],
    "GO": lambda x, y: [(x, y), (x + 25, y)],
    "HITCH": lambda x, y: [(x, y), (x + 5, y), (x + 5, y + 5)],
    "IN": lambda x, y: [(x, y), (x + 10, y)],
    "OUT": lambda x, y: [(x, y), (x + 10, y)],
    "POST": lambda x, y: [(x, y), (x + 15, y), (x + 20, y + 10)],
    "SCREEN": lambda x, y: [(x, y), (x, y + 5)],
    "SLANT": lambda x, y: [(x, y), (x + 5, y + 5)],
    "WHEEL": lambda x, y: [(x, y), (x + 5, y), (x + 10, y + 10)]
}

frames = []

# Pre-assign labels '1', '2', and '3' to wide receivers
wr_df = df_frame_features[df_frame_features['position'] == 'WR']  # Filter WRs
unique_wrs = wr_df[['nflId', 'displayName']].drop_duplicates().reset_index()

# Assign WR labels '1', '2', '3' based on their appearance
wr_labels = {nflId: str(i + 1) for i, nflId in enumerate(unique_wrs['nflId'])}

# Create frames for animation
frames = []
unique_frames = df_frame_features['frameId'].unique()

for frame_id in unique_frames:
    frame_data = []
    frame_df = df_frame_features[df_frame_features['frameId'] == frame_id]

    for row_index, row in frame_df.iterrows():
        x_start, y_start = row['x'], row['y']
        player_position = row['position']
        
        # Set color for dots (black for non-WR, route color for WR)
        color = 'black' if player_position != 'WR' else route_colors.get(row['routeName'], 'gray')

        # Add a dot for every player with WR labels (1, 2, 3)
        wr_text = wr_labels.get(row['nflId'], '') if player_position == 'WR' else ''
        frame_data.append(go.Scatter(
        x=[x_start],
        y=[y_start],
        mode='markers+text',
    marker=dict(size=10, color=color),  # Dot color
    text=wr_text,  # Pre-assigned WR numbers (1, 2, 3)
    textposition='middle center',
    textfont=dict(size=14, color="black", family="Arial", weight="bold"),  # Black and bold text
    hovertext=f"{row['displayName']}",  # Add WR display name to the hover text
    hoverinfo="text",  # Only show the hovertext
    showlegend=False  # Dots should not appear in the legend
))


        # Add WR route lines with predictions
        if player_position == 'WR' and row['routeName'] in route_shapes:
            points = route_shapes[row['routeName']](x_start, y_start)
            frame_data.append(go.Scatter(
                x=[p[0] for p in points],
                y=[p[1] for p in points],
                mode='lines',
                line=dict(color=color, width=2),
                name=f"{row['displayName']}",  # WR name in the legend
                showlegend=True
            ))

            # Add top-3 predictions for WRs
            if probs is not None and len(probs) > row_index:
                top_3_indices = np.argsort(probs[row_index])[-3:][::-1]  # Top 3 indices
                top_3_routes = [route_map[i] for i in top_3_indices]     # Top 3 routes
                top_3_probs = [probs[row_index][i] for i in top_3_indices]  # Corresponding probabilities

                for i, (route, prob) in enumerate(zip(top_3_routes, top_3_probs), start=1):
                    frame_data.append(go.Scatter(
                        x=[None],
                        y=[None],
                        mode='markers',
                        marker=dict(size=1, color=color),
                        name=f"Prediction {i}: {route} ({prob:.2%})",
                        showlegend=True
                    ))

    # Append the frame data
    frames.append(go.Frame(data=frame_data, name=f"Frame {frame_id}"))


# Create figure with the initial frame
initial_data = frames[0].data if frames else []
fig = go.Figure(
    data=initial_data,
    frames=frames
)

#yardline_number = df_frame_features['yardlineNumber'].iloc[0]
#line_of_scrimmage_x = yardline_number

# Grid line positions
grid_positions = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110]

# Add yard line numbers at the correct mirrored positions
yard_numbers = ['10', '20', '30', '40', '50', '40', '30', '20', '10']


# Ensure plays.csv is loaded for absoluteYardlineNumber and yardsToGo
plays_path = r'C:\Users\RaymondCarpenter\Documents\GitHub\bigdatabowl2025\data\raw\plays.csv'
plays_df = pd.read_csv(plays_path)

# Merge plays data with df_play for absoluteYardlineNumber and yardsToGo
df_play = df_play.merge(
    plays_df[['gameId', 'playId', 'absoluteYardlineNumber', 'yardsToGo']],
    on=['gameId', 'playId'],
    how='left'
)

# Calculate line of scrimmage
if 'absoluteYardlineNumber' in df_play.columns:
    line_of_scrimmage = df_play['absoluteYardlineNumber'].iloc[0]
else:
    # Fallback to x position of the center during line_set
    line_set_frame_id = df_play[df_play['event'] == 'line_set']['frameId'].min()
    center_position = df_play[(df_play['frameId'] == line_set_frame_id) & (df_play['position'] == 'C')]
    if not center_position.empty:
        line_of_scrimmage = center_position['x'].iloc[0]
    else:
        print("No line_set event or center position found. Defaulting to 0.")
        line_of_scrimmage = 0  # Default value if no data is available

# Calculate first down marker
if 'yardsToGo' in df_play.columns:
    first_down_marker = line_of_scrimmage + df_play['yardsToGo'].iloc[0]
else:
    print("No yardsToGo available. First down marker will not be added.")
    first_down_marker = None

# Add line of scrimmage to the figure
fig.add_trace(
    go.Scatter(
        x=[line_of_scrimmage, line_of_scrimmage],
        y=[0, 53.3],
        mode='lines',
        line=dict(color='blue', width=3, dash='dash'),
        name='Line of Scrimmage',
        showlegend=False,
        hoverinfo='none'
    )
)

# Add first down marker to the figure if calculated
if first_down_marker is not None:
    fig.add_trace(
        go.Scatter(
            x=[first_down_marker, first_down_marker],
            y=[0, 53.3],
            mode='lines',
            line=dict(color='yellow', width=3, dash='dash'),
            name='First Down Marker',
            showlegend=False,
            hoverinfo='none'
        )
    )


# Bottom yard numbers
fig.add_trace(
    go.Scatter(
        x=grid_positions,
        y=[5] * len(grid_positions),
        mode='text',
        text=yard_numbers,
        textfont_size=30,
        textfont_family="Courier New, monospace",
        textfont_color="#ffffff",
        showlegend=False,
        hoverinfo='none'
    )
)

# Top yard numbers
fig.add_trace(
    go.Scatter(
        x=grid_positions,
        y=[48.5] * len(grid_positions),
        mode='text',
        text=yard_numbers,
        textfont_size=30,
        textfont_family="Courier New, monospace",
        textfont_color="#ffffff",
        showlegend=False,
        hoverinfo='none'
    )
)

fig.update_layout(
    title="Offensive Player Routes - Frame by Frame (Up to SNAP)",
    autosize=False,
    width=1200,
    height=600,
    xaxis=dict(range=[-10, 110], autorange=False, tickmode='array', tickvals=np.arange(10, 121, 10).tolist(), showticklabels=False),
    yaxis=dict(range=[0, 53.3], autorange=False, showgrid=False, showticklabels=False),
    plot_bgcolor='#00B140',
    sliders=[dict(
        steps=[dict(method="animate", args=[[f"Frame {frame_id}"], {"mode": "immediate", "frame": {"duration": 500, "redraw": True}}], label=str(frame_id)) for frame_id in unique_frames],
        currentvalue={"prefix": "Frame: "},
        transition={"duration": 0},
    )],  
    updatemenus=[dict(
        type="buttons", 
        showactive=False,
        buttons=[
            dict(label="Play", method="animate", args=[None, {"frame": {"duration": 500, "redraw": True}, "fromcurrent": True}]),
            dict(label="Pause", method="animate", args=[[None], {"frame": {"duration": 0, "redraw": False}, "mode": "immediate"}])
        ]
    )],
    showlegend=True,  # Show legend
    legend=dict(
        x=1.05,  # Position legend outside the plot area
        y=1,  # Position legend at the top
        traceorder='normal',
        font=dict(size=12)
    )
)


# Show the figure
fig.show()

Loaded player data.
Loaded pre-trained XGBoost model.
No yardsToGo available. First down marker will not be added.
