In [1]:
import pandas as pd
import os
import numpy as np
def get_shot_categories_and_mapping():
    # Updated shot categories with separated 2PT and 3PT jump shots
    shot_categories = {
        'TWO_POINT_JUMPERS': [
            '2PT Jump Shot',
            '2PT Pullup Jump Shot',
            '2PT Step Back Jump Shot',
            '2PT Fadeaway Jump Shot',
            '2PT Running Jump Shot',
            '2PT Turnaround Jump Shot',
            '2PT Driving Jump Shot'
        ],
        
        'THREE_POINT_JUMPERS': [
            '3PT Jump Shot',
            '3PT Pullup Jump Shot',
            '3PT Step Back Jump Shot',
            '3PT Fadeaway Jump Shot',
            '3PT Running Jump Shot',
            '3PT Turnaround Jump Shot',
            '3PT Driving Jump Shot'
        ],
        
        'LAYUPS': [
            'Standard Layup',
            'Driving Layup',
            'Finger Roll',
            'Reverse Layup',
            'Putback Layup',
            'Alley Oop Layup'
        ],
        
        'DUNKS': [
            'Standard Dunk',
            'Driving Dunk',
            'Reverse Dunk',
            'Putback Dunk',
            'Alley Oop Dunk',
            'Specialty Dunk'
        ],
        
        'POST_MOVES': [
            'Hook Shot',
            'Running Hook Shot',
            'Turnaround Hook Shot',
            'Driving Hook Shot'
        ],
        
        'FLOATING_SHOTS': [
            '2PT Floating Jump Shot',
            '3PT Floating Jump Shot'
        ],
        
        'TIP_SHOTS': [
            'Tip Shot'
        ],
        
        'NO_SHOT': ['No Shot']
    }
    
    # Updated mapping to include shot type distinction
    shot_mapping = {
        # 2PT Jump Shots
        'Jump Shot': lambda x: '3PT Jump Shot' if x == '3' else '2PT Jump Shot',
        'Pullup Jump shot': lambda x: '3PT Pullup Jump Shot' if x == '3' else '2PT Pullup Jump Shot',
        'Pullup Bank shot': lambda x: '3PT Pullup Jump Shot' if x == '3' else '2PT Pullup Jump Shot',
        'Step Back Jump shot': lambda x: '3PT Step Back Jump Shot' if x == '3' else '2PT Step Back Jump Shot',
        'Step Back Bank Jump Shot': lambda x: '3PT Step Back Jump Shot' if x == '3' else '2PT Step Back Jump Shot',
        'Fadeaway Jump Shot': lambda x: '3PT Fadeaway Jump Shot' if x == '3' else '2PT Fadeaway Jump Shot',
        'Fadeaway Bank shot': lambda x: '3PT Fadeaway Jump Shot' if x == '3' else '2PT Fadeaway Jump Shot',
        'Running Jump Shot': lambda x: '3PT Running Jump Shot' if x == '3' else '2PT Running Jump Shot',
        'Running Pull-Up Jump Shot': lambda x: '3PT Running Jump Shot' if x == '3' else '2PT Running Jump Shot',
        'Running Bank shot': lambda x: '3PT Running Jump Shot' if x == '3' else '2PT Running Jump Shot',
        'Jump Bank Shot': lambda x: '3PT Jump Shot' if x == '3' else '2PT Jump Shot',
        'Turnaround Jump Shot': lambda x: '3PT Turnaround Jump Shot' if x == '3' else '2PT Turnaround Jump Shot',
        'Turnaround Fadeaway shot': lambda x: '3PT Turnaround Jump Shot' if x == '3' else '2PT Turnaround Jump Shot',
        'Turnaround Bank shot': lambda x: '3PT Turnaround Jump Shot' if x == '3' else '2PT Turnaround Jump Shot',
        'Turnaround Fadeaway Bank Jump Shot': lambda x: '3PT Turnaround Jump Shot' if x == '3' else '2PT Turnaround Jump Shot',
        'Driving Jump shot': lambda x: '3PT Driving Jump Shot' if x == '3' else '2PT Driving Jump Shot',
        'Driving Bank shot': lambda x: '3PT Driving Jump Shot' if x == '3' else '2PT Driving Jump Shot',
        'Floating Jump shot': lambda x: '3PT Floating Jump Shot' if x == '3' else '2PT Floating Jump Shot',
        'Driving Floating Jump Shot': lambda x: '3PT Floating Jump Shot' if x == '3' else '2PT Floating Jump Shot',
        'Driving Floating Bank Jump Shot': lambda x: '3PT Floating Jump Shot' if x == '3' else '2PT Floating Jump Shot',
        
        # Non-jump shots (remain the same regardless of shot type)
        'Layup Shot': 'Standard Layup',
        'Running Layup Shot': 'Standard Layup',
        'Cutting Layup Shot': 'Standard Layup',
        'Driving Layup Shot': 'Driving Layup',
        'Driving Reverse Layup Shot': 'Driving Layup',
        'Reverse Layup Shot': 'Reverse Layup',
        'Running Reverse Layup Shot': 'Reverse Layup',
        'Tip Layup Shot': 'Putback Layup',
        'Putback Layup Shot': 'Putback Layup',
        'Alley Oop Layup shot': 'Alley Oop Layup',
        'Running Alley Oop Layup Shot': 'Alley Oop Layup',
        'Driving Finger Roll Layup Shot': 'Finger Roll',
        'Finger Roll Layup Shot': 'Finger Roll',
        'Running Finger Roll Layup Shot': 'Finger Roll',
        'Cutting Finger Roll Layup Shot': 'Finger Roll',
        'Finger Roll Shot': 'Finger Roll',
        'Running Finger Roll Shot': 'Finger Roll',
        'Turnaround Finger Roll Shot': 'Finger Roll',
        'Driving Finger Roll Shot': 'Finger Roll',
        'Dunk Shot': 'Standard Dunk',
        'Slam Dunk Shot': 'Standard Dunk',
        'Running Dunk Shot': 'Standard Dunk',
        'Running Slam Dunk Shot': 'Standard Dunk',
        'Driving Dunk Shot': 'Driving Dunk',
        'Driving Slam Dunk Shot': 'Driving Dunk',
        'Driving Reverse Dunk Shot': 'Reverse Dunk',
        'Reverse Dunk Shot': 'Reverse Dunk',
        'Reverse Slam Dunk Shot': 'Reverse Dunk',
        'Running Reverse Dunk Shot': 'Reverse Dunk',
        'Putback Dunk Shot': 'Putback Dunk',
        'Putback Reverse Dunk Shot': 'Putback Dunk',
        'Putback Slam Dunk Shot': 'Putback Dunk',
        'Alley Oop Dunk Shot': 'Alley Oop Dunk',
        'Running Alley Oop Dunk Shot': 'Alley Oop Dunk',
        'Cutting Dunk Shot': 'Specialty Dunk',
        'Tip Dunk Shot': 'Specialty Dunk',
        'Follow Up Dunk Shot': 'Specialty Dunk',
        'Hook Shot': 'Hook Shot',
        'Jump Hook Shot': 'Hook Shot',
        'Running Hook Shot': 'Running Hook Shot',
        'Running Bank Hook Shot': 'Running Hook Shot',
        'Turnaround Hook Shot': 'Turnaround Hook Shot',
        'Turnaround Bank Hook Shot': 'Turnaround Hook Shot',
        'Driving Hook Shot': 'Driving Hook Shot',
        'Driving Bank Hook Shot': 'Driving Hook Shot',
        'Hook Bank Shot': 'Hook Shot',
        'Jump Bank Hook Shot': 'Hook Shot',
        'Tip Shot': 'Tip Shot',
        'Running Tip Shot': 'Tip Shot',
        'No Shot': 'No Shot'
    }
    
    return shot_categories, shot_mapping

def collect_year(year):
    path = 'team/'+str(year)+'/'
    arr = os.listdir(path)
    csvs = [file for file in arr if '.csv' in file and file !='avg.csv']
    frames= []
    for csv in csvs:
        df=pd.read_csv(path+csv)
        frames.append(df)
    master = pd.concat(frames)
    return master

def collect_types():
    shot_categories, mapping = get_shot_categories_and_mapping()
    category_map = {}
    
    # Build category mapping dictionary
    for shot_type, shotlist in shot_categories.items():
        for shot in shotlist:
            category_map[shot] = shot_type

    all_types = []
    
    for year in range(1997, 2026):
        df = collect_year(year)

        threes = ['24+ ft.', 'Back Court Shot']


        df['SHOT_TYPE'] = df['SHOT_ZONE_RANGE'].apply(lambda x:'3PT Field Goal' if x in threes else '2PT Field Goal')

        #df['SHOT_TYPE'].fillna('2PT Field Goal',inplace=True)
     

        value_map = {'2PT Field Goal': '2', '3PT Field Goal': '3'}
        if year>2004:
            print(df[df.PLAYER_ID==2544]['SHOT_TYPE'].value_counts())
        
            print(f" 3Pt Shots before mapping for player 2544: {df[(df.PLAYER_ID==2544)&(df.SHOT_TYPE.str.contains('3P'))]['SHOT_ATTEMPTED_FLAG'].sum()}")
        df['SHOT_TYPE'] = df['SHOT_TYPE'].map(value_map)

        df['shot_description'] = df.apply(
            lambda row: mapping[row['ACTION_TYPE']](row['SHOT_TYPE']) 
            if callable(mapping[row['ACTION_TYPE']]) 
            else mapping[row['ACTION_TYPE']], 
            axis=1
        )

        
        df['SHOT_ATTEMPTED_FLAG'] = df['SHOT_ATTEMPTED_FLAG'].astype(int)
        df['SHOT_MADE_FLAG'] = df['SHOT_MADE_FLAG'].astype(int)
        
        # Convert shot type to '2' or '3'
        df['SHOT_TYPE'] = np.where(df['SHOT_TYPE'].str.contains('3pt', case=False, na=False), '3', '2')
        
        # Create player breakdown
        player_breakdown = (
            df.groupby(['PLAYER_NAME', 'PLAYER_ID', 'shot_description'])
            .agg(
                shot_count=('SHOT_ATTEMPTED_FLAG', 'sum'),
                shot_makes=('SHOT_MADE_FLAG', 'sum'),
            )
            .reset_index()
        )
        
        # Apply the mapping with shot type consideration

        # Debug unique values before mapping
        print("\nUnique shot descriptions before mapping:")
        print(player_breakdown['shot_description'].unique())
        
        # Apply mappings and check for missing values
        #player_breakdown['old_description'] = player_breakdown['shot_description']
       
        
        # Debug missing mappings
        missing_mappings = player_breakdown[player_breakdown['shot_description'].isna()]
        if not missing_mappings.empty:
            print("\nMissing mappings found for:")
            print(missing_mappings['old_description'].unique())
        
        # Apply category mapping
        player_breakdown['shot_group'] = player_breakdown['shot_description'].map(category_map)
        
        # Debug category mapping
        missing_categories = player_breakdown[player_breakdown['shot_group'].isna()]
        if not missing_categories.empty:
            print("\nMissing categories found for:")
            print(missing_categories['shot_description'].unique())
        
        print(f" 3Pt Shots after mapping for player 2544: {player_breakdown[(player_breakdown.PLAYER_ID==2544)&(player_breakdown.shot_description.str.contains('3P'))]['shot_count'].sum()}")
        
        # Calculate eFG%

        
        # Filter out invalid shot combinations
        '''
        invalid_masks = [
            (player_breakdown['shot_group'] == 'LAYUPS') & (player_breakdown['shot_type'] == '3PT Field Goal'),
            (player_breakdown['shot_group'] == 'POST_MOVES') & (player_breakdown['shot_type'] == '3PT Field Goal')
        ]
        for mask in invalid_masks:
            player_breakdown = player_breakdown[~mask]
        '''
        #print(f"Final shots for player 2544: {player_breakdown[player_breakdown.PLAYER_ID==2544]['shot_count'].sum()}")
        
        # Add year and save breakdown
        player_breakdown['year'] = year
        player_breakdown.to_csv(f'shot_breakdown/{year}_shotbreakdown.csv', index=False)
        
        # Create overall shot breakdown
        shot_breakdown = (
            df.groupby(['shot_description'])
            .agg(
                shot_count=('SHOT_ATTEMPTED_FLAG', 'sum'),
                shot_makes=('SHOT_MADE_FLAG', 'sum')
            )
            .reset_index()
            .rename(columns={})
        )
        
        shot_breakdown['year'] = year
        shot_breakdown['shot_group'] = shot_breakdown['shot_description'].map(category_map)
        all_types.append(shot_breakdown)
    
    return pd.concat(all_types)

all_shots = collect_types()


# Add No Shot category if needed
shot_categories,mapping=get_shot_categories_and_mapping()
category_map={}
for shot_type in shot_categories.keys():
    shotlist = shot_categories[shot_type]
    for shot in shotlist:
        category_map[shot]=shot_type

#all_shots['shot_group'] = all_shots['shot_description'].map(category_map)

other_categories = {
   
}

all_shots.dropna(subset='shot_description',inplace=True)

all_shots.to_csv('shot_types.csv')
all_shots


Unique shot descriptions before mapping:
['2PT Jump Shot' '3PT Jump Shot' 'Driving Layup' 'Hook Shot'
 'Standard Dunk' 'Standard Layup' 'Tip Shot' '2PT Running Jump Shot'
 'Driving Dunk' '3PT Running Jump Shot']
 3Pt Shots after mapping for player 2544: 0

Unique shot descriptions before mapping:
['2PT Jump Shot' '2PT Running Jump Shot' '3PT Jump Shot' 'Driving Dunk'
 'Driving Layup' 'Hook Shot' 'No Shot' 'Standard Dunk' 'Standard Layup'
 'Tip Shot' '3PT Running Jump Shot']
 3Pt Shots after mapping for player 2544: 0

Unique shot descriptions before mapping:
['2PT Jump Shot' '3PT Jump Shot' 'Driving Dunk' 'Driving Layup'
 'Hook Shot' 'Standard Dunk' 'Standard Layup' 'Tip Shot'
 '2PT Running Jump Shot' '3PT Running Jump Shot' 'No Shot']
 3Pt Shots after mapping for player 2544: 0

Unique shot descriptions before mapping:
['2PT Jump Shot' '3PT Jump Shot' 'Driving Layup' 'Hook Shot'
 'Standard Dunk' 'Standard Layup' 'Tip Shot' '2PT Running Jump Shot'
 'Driving Dunk' '3PT Running Jump Sho

Unnamed: 0,shot_description,shot_count,shot_makes,year,shot_group
0,2PT Jump Shot,181610,70352,1997,TWO_POINT_JUMPERS
1,2PT Running Jump Shot,7258,4878,1997,TWO_POINT_JUMPERS
2,3PT Jump Shot,40598,14210,1997,THREE_POINT_JUMPERS
3,3PT Running Jump Shot,68,20,1997,THREE_POINT_JUMPERS
4,Driving Dunk,2082,2028,1997,DUNKS
...,...,...,...,...,...
24,Reverse Layup,1624,1070,2025,LAYUPS
25,Specialty Dunk,3828,3410,2025,DUNKS
26,Standard Dunk,4062,3692,2025,DUNKS
27,Standard Layup,16778,10044,2025,LAYUPS


In [2]:
len(all_shots['shot_description'].unique())

34

In [3]:
player_shots=pd.concat([pd.read_csv('shot_breakdown/'+str(year)+'_shotbreakdown.csv') for year in range(1997,2026)] )
player_shots.dropna(subset='shot_description',inplace=True)
player_shots

player_ids=player_shots['PLAYER_ID'].unique().tolist()


for player_id in player_ids:

    tempframe=player_shots[player_shots.PLAYER_ID==player_id]
    tempframe.to_csv('../extra_data/shot_averages/'+str(player_id)+'.csv',index=False)


In [4]:
import plotly.graph_objects as go
import pandas as pd

def create_shot_efficiency_chart(selected_player, selected_group, selected_year, custom_width=1200, custom_height=800):
    """
    Creates a shot efficiency comparison chart between a player and league averages.
    
    Parameters:
    -----------
    selected_player : str
        Player ID
    selected_group : str
        Shot group category (e.g., 'JUMP_SHOTS')
    selected_year : int
        Selected season year
    custom_width : int, optional
        Width of the chart (default: 1200)
    custom_height : int, optional
        Height of the chart (default: 800)
        
    Returns:
    --------
    plotly.graph_objects.Figure
        The complete figure object
    """
    
    # Load data
    avg_df = pd.read_csv('https://raw.githubusercontent.com/gabriel1200/shot_data/refs/heads/master/shot_types.csv')
    player_df = pd.read_csv(f'https://raw.githubusercontent.com/gabriel1200/shot_averages/refs/heads/main/{selected_player}.csv')
    
    # Filter data
    if selected_player:
        player_df = player_df[player_df['PLAYER_ID'] == int(selected_player)]
    group_df = player_df[player_df['shot_group'] == selected_group]
    
    # Process shot data
    shot_data = []
    chart_data = {'labels': [], 'player_pct': [], 'league_pct': [], 'attempts': []}
    
    for shot_desc in group_df['shot_description'].unique():
        player_shots = group_df[group_df['shot_description'] == shot_desc]
        league_avg = avg_df[(avg_df['shot_description'] == shot_desc) & 
                          (avg_df['year'] == int(selected_year))]
        
        if not player_shots.empty and not league_avg.empty:
            player_pct = (player_shots['shot_makes'].sum() / player_shots['shot_count'].sum()) * 100
            league_pct = league_avg['efg%'].iloc[0] * 100
            attempts = player_shots['shot_count'].sum()
            
            # Format long labels with line breaks
            formatted_desc = '<br>'.join([shot_desc[i:i+20] for i in range(0, len(shot_desc), 20)])
            
            chart_data['labels'].append(formatted_desc)
            chart_data['player_pct'].append(round(player_pct, 1))
            chart_data['league_pct'].append(round(league_pct, 1))
            chart_data['attempts'].append(attempts)
    
    # Create traces
    player_trace = go.Bar(
        name="Player Efficiency",
        x=chart_data["labels"],
        y=chart_data["player_pct"],
        marker=dict(
            color="rgba(59, 130, 246, 0.85)",  # blue-500 with opacity
            line=dict(color="rgba(59, 130, 246, 1)", width=1.5)
        ),
        hovertemplate="<b>%{x}</b><br>" +
                     "Efficiency: %{y:.1f}%<br>" +
                     f"Attempts: %{{text}}<br>" +
                     "<extra></extra>",
        text=chart_data["attempts"]
    )

    league_trace = go.Bar(
        name="League Average",
        x=chart_data["labels"],
        y=chart_data["league_pct"],
        marker=dict(
            color="rgba(239, 68, 68, 0.85)",  # red-500 with opacity
            line=dict(color="rgba(239, 68, 68, 1)", width=1.5)
        ),
        hovertemplate="<b>%{x}</b><br>" +
                     "League Avg: %{y:.1f}%<br>" +
                     "<extra></extra>"
    )

    # Create and customize figure
    fig = go.Figure(data=[player_trace, league_trace])
    
    # Update layout with improved styling
    fig.update_layout(
        title=dict(
            text=f"Shot Efficiency Comparison - {selected_group.replace('_', ' ').title()}",
            font=dict(size=24, color="#F3F4F6"),
            x=0.5,
            y=0.95
        ),
        legend=dict(
            title=dict(text="", font=dict(color="#F3F4F6")),
            font=dict(size=14, color="#F3F4F6"),
            bgcolor="rgba(0,0,0,0.5)",
            bordercolor="rgba(255,255,255,0.2)",
            borderwidth=1
        ),
        xaxis=dict(
            title=dict(
                text="Shot Types",
                font=dict(size=16, color="#F3F4F6")
            ),
            tickangle=45,
            tickfont=dict(size=12, color="#F3F4F6"),
            gridcolor="rgba(107, 114, 128, 0.2)",
            showgrid=True
        ),
        yaxis=dict(
            title=dict(
                text="Efficiency %",
                font=dict(size=16, color="#F3F4F6")
            ),
            tickfont=dict(size=12, color="#F3F4F6"),
            gridcolor="rgba(107, 114, 128, 0.2)",
            showgrid=True,
            range=[0, max(max(chart_data["player_pct"]), max(chart_data["league_pct"])) * 1.1]
        ),
        barmode="group",
        width=custom_width,
        height=custom_height,
        plot_bgcolor="rgba(0, 0, 0, 0)",
        paper_bgcolor="rgba(0, 0, 0, 0)",
        margin=dict(t=100, b=100, l=100, r=50),
        showlegend=True,
        bargap=0.15,
        bargroupgap=0.1
    )
    
    return fig

# Example usage:
fig = create_shot_efficiency_chart('2544', 'JUMP_SHOTS', 2025)
fig.show()

ValueError: max() arg is an empty sequence