In [1]:
''' Imports '''

import pandas as pd
import polars as pl
import numpy as np
import math

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from scipy.spatial import distance
from scipy.stats.mstats import trimmed_var
from scipy.stats import percentileofscore

from sklearn.cluster import KMeans, DBSCAN
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import silhouette_score, davies_bouldin_score
from sklearn.preprocessing import StandardScaler

from prep_data import load_pbp_participation_data, load_stats_team_tendencies_offense, load_stats_team_tendencies_defense

In [2]:
''' Load Data '''

pbp_data = load_pbp_participation_data()
offense_tendencies = load_stats_team_tendencies_offense()

print(pbp_data.head().to_string())
print(offense_tendencies.head().to_string())

KeyboardInterrupt: 

In [None]:
''' Constants '''

OFFENSE_FEATURES = [
    'Plays / Game', 'Drives / Game', 
    '% Pass',  'Scrambles / Game',
    '% Plays 11 Personnel', '% Plays Mult RBs', '% Plays Zero RBs', '% Plays Mult TEs', '% Plays Zero TEs', '% Plays Extra OL',
    '% Under Center', '% Shotgun', 'Shotgun % Pass', 'Under Center % Pass',
    'ADOT', 'ADOT to Sticks', 'Avg Time to Throw', '% Passes Behind LOS', '% Passes Deep', 'MaxTargetShare',
    '% Rush Inside', '% Rush Outside', 'MaxRushAttemptsShare',
]

VIZ_FEATURES = ['Plays / Game', '% Pass', 'Scrambles / Game', 
                '% Plays 11 Personnel', '% Plays Heavy Personnel',
                '% Under Center', '% Shotgun', 
                'ADOT', 'Avg Time to Throw', 'MaxTargetShare', 
                '% Rush Outside', 'MaxRushAttemptsShare']


# Offense

In [None]:
''' Get Closest Teams '''


def get_closest_teams(team: str, season: int):
    """
    Calculates the Euclidean distance using SciPy.
    """

    ## Data ##
    # Team tendencies
    team_sl = offense_tendencies.loc[(offense_tendencies.index.get_level_values('posteam') == team) &
                                     (offense_tendencies.index.get_level_values('season') == season), :]

    team_feature_vals = team_sl[OFFENSE_FEATURES].values.tolist()[0]

    # All other rows
    all_others_sl = offense_tendencies.loc[~offense_tendencies.index.isin(team_sl.index), :]

    all_others_feature_vals = all_others_sl[OFFENSE_FEATURES].values.tolist()

    ## Find Distances ##
    distances = [distance.euclidean(team_feature_vals, p) for p in all_others_feature_vals]

    df = pd.DataFrame(
        index=all_others_sl.index,
        data={'distance': distances}
    ).sort_values(by='distance', ascending=True)

    return df


closest_teams = get_closest_teams('DET', 2024)
print(closest_teams)


In [None]:
''' Visualize a team '''
# TODO - % rushes from under center vs shotgun


def get_team_pct_scores(features: list, feature_vals: list):
    
    # Feature value percentiles
    pct_scores = []
    for i in range(len(features)):
        feature = features[i]
        val = feature_vals[i]
        pct_score = percentileofscore(offense_tendencies[feature].tolist(), val, kind='weak') / 100
        
        pct_scores.append(pct_score)
    
    return pct_scores


def offense_team_spider_chart(team: str, season: int, show_similar_team: bool = True):

    ## Data - This Team ##

    # Get slice from offensive tendencies
    team_sl = offense_tendencies.loc[(offense_tendencies.index.get_level_values('posteam') == team) &
                                     (offense_tendencies.index.get_level_values('season') == season), :]
    
    # Feature values
    team_feature_vals = team_sl[VIZ_FEATURES].values.tolist()[0]
    vals_fmt = []
    for i in range(len(VIZ_FEATURES)):
        val = team_feature_vals[i]
        vals_fmt.append(f'{val:.1%}') if VIZ_FEATURES[i][0] == '%' else vals_fmt.append(f'{val:.2f}')

    # Pct scores
    pct_scores = get_team_pct_scores(features=VIZ_FEATURES, feature_vals=team_feature_vals)
    pct_scores_fmt = [f'{s:.1%}' for s in pct_scores]

    ## Data - Similar team ##

    closest_teams_df = get_closest_teams(team=team, season=season)
    closest_team = closest_teams_df.index[0][0]
    closest_season = closest_teams_df.index[0][1]
    print(closest_team, closest_season)

    # Get slice from offensive tendencies
    sim_team_sl = offense_tendencies.loc[(offense_tendencies.index.get_level_values('posteam') == closest_team) &
                                     (offense_tendencies.index.get_level_values('season') == closest_season), :]
    
    # Feature values
    sim_team_feature_vals = sim_team_sl[VIZ_FEATURES].values.tolist()[0]
    sim_team_vals_fmt = []
    for i in range(len(VIZ_FEATURES)):
        val = sim_team_feature_vals[i]
        sim_team_vals_fmt.append(f'{val:.1%}') if VIZ_FEATURES[i][0] == '%' else sim_team_vals_fmt.append(f'{val:.2f}')

    # Pct scores
    sim_team_pct_scores = get_team_pct_scores(features=VIZ_FEATURES, feature_vals=sim_team_feature_vals)
    sim_team_pct_scores_fmt = [f'{s:.1%}' for s in sim_team_pct_scores]

    ## Figure ##

    # Radars
    team_radar = px.line_polar(
        r=pct_scores,
        theta=VIZ_FEATURES,
        line_close=True,
        color_discrete_sequence=['blue'],
        
    )
    sim_team_radar = px.line_polar(
        r=sim_team_pct_scores,
        theta=VIZ_FEATURES,
        line_close=True,
        color_discrete_sequence=['red']
    )

    # Fig
    fig = make_subplots(
        rows=1, cols=2, 
        column_widths=[4,3],
        horizontal_spacing=0.1,
        specs=[[{"type": "polar"}, {"type": "domain"}]]
    )

    if show_similar_team:
        for trace in sim_team_radar.data:
            fig.add_trace(
                trace,
                row=1, col=1
            )
    
    for trace in team_radar.data:
        fig.add_trace(
            trace,
            row=1, col=1
        )

    fig.add_trace(
        go.Table(
            columnwidth=[2,1,1],
            header={
                "values": ['Component', 'Value', 'Percentile'],
            },
            cells={
                "values": [VIZ_FEATURES, vals_fmt, pct_scores_fmt]
            }
        ),
        row=1, col=2
    )

    fig.update_traces(
        fill='toself',
        opacity=0.6,
        mode='lines+markers+text',
        col=1
    )

    fig.update_layout(
        title_text=f"Team: {season} {team}",
        polar=dict(radialaxis_range=(0,1)),
        margin=dict(b=50, r=50, l=75, t=75),
        showlegend=True,
    )

    fig.show()

offense_team_spider_chart('IND', 2024)


In [None]:
''' Team personnel spider chart '''

PERSONNEL_COLS = ['11', '12', '13', '21', '22', 'Other']

offense_personnel = pbp_data.groupby(['posteam', 'season', 'OffensePersonnelGroup']).aggregate(
    Plays=('posteam', 'size')
)
offense_personnel['% Plays'] = offense_personnel['Plays'] / offense_personnel.groupby(level=['posteam', 'season'])['Plays'].sum()
offense_personnel['% Plays %ile'] = offense_personnel.groupby('OffensePersonnelGroup')['% Plays'].rank(pct=True, ascending=True)
offense_personnel = offense_personnel.reindex(labels=PERSONNEL_COLS, level='OffensePersonnelGroup')

print(offense_personnel.sort_values(by='% Plays', ascending=False).head().to_string())
# print(offense_personnel.loc[offense_personnel.index.get_level_values(2) == '11',:].sort_values(by='% Plays', ascending=False).to_string())

def offense_personnel_spider_chart(team: str, season: int):

    ## Data ##

    # Get slice from offensive tendencies
    team_sl = offense_personnel.loc[(offense_personnel.index.get_level_values('posteam') == team) &
                                     (offense_personnel.index.get_level_values('season') == season), :]
    
    # Personnel values
    cols = team_sl.index.get_level_values('OffensePersonnelGroup').tolist()
    vals = team_sl['% Plays'].tolist()
    percentiles = team_sl['% Plays %ile'].tolist()

    ## Figure ##

    fig = make_subplots(
        rows=1, cols=2, 
        column_widths=[4,3],
        horizontal_spacing=0.1,
        specs=[[{"type": "polar"}, {"type": "domain"}]]
    )

    spider = px.line_polar(
        r=vals,
        theta=cols,
        line_close=True,
        # line_dash='dash'
    )
    # spider = go.Scatterpolar(
    #     r=team_feature_vals,
    #     theta=PERSONNEL_COLS,
    #     opacity=0.7,
    #     fill='toself'
    # )
    for trace in spider.data:
        fig.add_trace(
            trace,
            row=1, col=1
        )

    polar_range = [0, offense_personnel['% Plays'].max() + 0.05]
    print(polar_range)
    fig.update_layout(
        title_text=f"Offensive Personnel: {season} {team}",
        polar=dict(
            radialaxis_range=polar_range
        ),
        margin=dict(b=50, r=50, l=75, t=75)
    )

    fig.add_trace(
        go.Table(
            columnwidth=[2,1,1],
            header={
                "values": ['Personnel', 'Value', 'Percentile'],
            },
            cells={
                "values": [cols, [f'{val:.1%}' for val in vals], [f'{p:.1%}' for p in percentiles]]
            }
        ),
        row=1, col=2
    )

    fig.show()


print(offense_personnel.head().to_string())

offense_personnel_spider_chart('LA', 2023)

# Defense

In [None]:
defense_tendencies = load_stats_team_tendencies_defense()

print(defense_tendencies.head().to_string())

In [None]:
''' Defense Coverage Spider Chart '''

PERSONNEL_COLS = [f'COVER_{i} % Plays' for i in [1,2,3,4,6]]
PERSONNEL_TYPES = [f'Cover {i}' for i in [1,2,3,4,6]]

def defense_coverage_spider_chart(team: str, season: int):

    ## Data ##

    # Get slice from offensive tendencies
    team_sl = defense_tendencies.loc[(defense_tendencies.index.get_level_values('defteam') == team) &
                                     (defense_tendencies.index.get_level_values('season') == season), :]
    
    # Personnel values
    cols = PERSONNEL_TYPES
    vals = team_sl[PERSONNEL_COLS].values.tolist()[0]
    # percentiles = team_sl['% Plays %ile'].tolist()

    vals_fmt = [f'{val:.1%}' for val in vals]

    ## Figure ##

    fig = make_subplots(
        rows=1, cols=2, 
        column_widths=[5,3],
        horizontal_spacing=0.15,
        specs=[[{"type": "polar"}, {"type": "domain"}]]
    )

    spider = px.line_polar(
        r=vals,
        theta=cols,
        line_close=True,
        text=vals_fmt,
        markers='marker+text'
        # line_dash='dash'
    )
    # spider = go.Scatterpolar(
    #     r=team_feature_vals,
    #     theta=PERSONNEL_COLS,
    #     opacity=0.7,
    #     fill='toself'
    # )
    for trace in spider.data:
        fig.add_trace(
            trace,
            row=1, col=1
        )

    polar_range = [0, defense_tendencies[PERSONNEL_COLS].max().max() + 0.05]
    print(polar_range)
    fig.update_traces(
        mode='lines+markers+text',
        textposition='top center'
    )
    fig.update_layout(
        title_text=f"Coverages: {season} {team}",
        polar=dict(
            radialaxis=dict(
                range=polar_range,
            ),
            angularaxis=dict(
                direction='clockwise'  # Reverses theta direction
            )
        ),
        margin=dict(b=50, r=50, l=75, t=75)
    )

    fig.add_trace(
        go.Table(
            columnwidth=[2,1,1],
            header={
                "values": ['Personnel', 'Value',]# 'Percentile'],
            },
            cells={
                "values": [cols, vals_fmt]#, [f'{p:.1%}' for p in percentiles]]
            }
        ),
        row=1, col=2
    )

    fig.show()



defense_coverage_spider_chart('PHI', 2024)