In [12]:
import pandas as pd
import os


def collect_year(year):
    path = 'team/'+str(year)+'/'
    arr = os.listdir(path)
    csvs = [file for file in arr if '.csv' in file and file !='avg.csv']
    frames= []
    for csv in csvs:
        df=pd.read_csv(path+csv)
        frames.append(df)
    master = pd.concat(frames)
    return master


def collect_types():
    all_types = []
    for year in range(1997,2026):
        df = collect_year(year)
        df.drop_duplicates(subset=['SHOT_ID'],inplace=True)

  
       
        shot_breakdown = df.groupby(['ACTION_TYPE','SHOT_TYPE']).sum(numeric_only=True)[['SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG']].reset_index()
        shot_breakdown.rename(columns = {'ACTION_TYPE':'shot_description','SHOT_TYPE':'shot_type', 'SHOT_ATTEMPTED_FLAG':'shot_count',  'SHOT_MADE_FLAG':'shot_makes'},inplace=True)

        
        
        shot_breakdown['year'] = year
        
      
        all_types.append(shot_breakdown)
    return pd.concat(all_types)


all_shots = collect_types()
shot_categories = {
    'JUMP_SHOTS': [
        'Jump Shot',
        'Pullup Jump shot',
        'Step Back Jump shot',
        'Fadeaway Jump Shot',
        'Running Jump Shot',
        'Running Pull-Up Jump Shot',
        'Jump Bank Shot',
        'Turnaround Jump Shot',
        'Turnaround Fadeaway shot',
        'Driving Jump shot',
        'Fadeaway Bank shot',
        'Pullup Bank shot',
        'Running Bank shot',
        'Step Back Bank Jump Shot',
        'Turnaround Bank shot',
        'Turnaround Fadeaway Bank Jump Shot',
        'Driving Bank shot'
    ],
    
    'LAYUPS': [
        'Layup Shot',
        'Driving Layup Shot',
        'Reverse Layup Shot',
        'Running Layup Shot',
        'Cutting Layup Shot',
        'Driving Finger Roll Layup Shot',
        'Finger Roll Layup Shot',
        'Running Finger Roll Layup Shot',
        'Driving Reverse Layup Shot',
        'Tip Layup Shot',
        'Putback Layup Shot',
        'Running Reverse Layup Shot',
        'Alley Oop Layup shot',
        'Running Alley Oop Layup Shot',
        'Cutting Finger Roll Layup Shot',
        'Finger Roll Shot',
        'Running Finger Roll Shot',
        'Turnaround Finger Roll Shot',
        'Driving Finger Roll Shot'
    ],
    
    'DUNKS': [
        'Dunk Shot',
        'Driving Dunk Shot',
        'Slam Dunk Shot',
        'Alley Oop Dunk Shot',
        'Running Dunk Shot',
        'Reverse Dunk Shot',
        'Putback Dunk Shot',
        'Cutting Dunk Shot',
        'Tip Dunk Shot',
        'Follow Up Dunk Shot',
        'Driving Reverse Dunk Shot',
        'Driving Slam Dunk Shot',
        'Putback Reverse Dunk Shot',
        'Putback Slam Dunk Shot',
        'Reverse Slam Dunk Shot',
        'Running Alley Oop Dunk Shot',
        'Running Reverse Dunk Shot',
        'Running Slam Dunk Shot'
    ],
    
    'POST_MOVES': [
        'Hook Shot',
        'Jump Hook Shot',
        'Running Hook Shot',
        'Turnaround Hook Shot',
        'Hook Bank Shot',
        'Driving Hook Shot',
        'Driving Bank Hook Shot',
        'Jump Bank Hook Shot',
        'Running Bank Hook Shot',
        'Turnaround Bank Hook Shot'
    ],
    
    'SPECIALTY_SHOTS': [
        'Floating Jump shot',
        'Driving Floating Jump Shot',
        'Tip Shot',
        'Running Tip Shot',
        'Driving Floating Bank Jump Shot'
    ],
     'NO_SHOT': ['No Shot']
}

# Add No Shot category if needed
other_categories = {
   
}

category_map={}
for shot_type in shot_categories.keys():
    shotlist = shot_categories[shot_type]
    for shot in shotlist:
        category_map[shot]=shot_type
all_shots['shot_group']=all_shots['shot_description'].map(category_map)
value_map={'2PT Field Goal':1,'3PT Field Goal':1.5}
all_shots['shot_value']=all_shots['shot_type'].map(value_map)
all_shots['efg%']=(all_shots['shot_makes']*all_shots['shot_value'])/all_shots['shot_count']
all_shots.drop(columns='shot_value',inplace=True)

all_shots[(all_shots.shot_group=='LAYUPS') & (all_shots.shot_type=='3PT Field Goal') ]



# Create a mask for rows where the condition is met
mask = (all_shots['shot_group'] == 'LAYUPS') & (all_shots['shot_type'] == '3PT Field Goal')

# Drop the rows where the mask is True
all_shots = all_shots[~mask]

mask = (all_shots['shot_group'] == 'POST_MOVES') & (all_shots['shot_type'] == '3PT Field Goal')

# Drop the rows where the mask is True
all_shots = all_shots[~mask]

all_shots.to_csv('shot_types.csv')
all_shots

Unnamed: 0,shot_description,shot_type,shot_count,shot_makes,year,shot_group,efg%
0,Driving Dunk Shot,2PT Field Goal,93,92,1997,DUNKS,0.989247
1,Driving Layup Shot,2PT Field Goal,1003,815,1997,LAYUPS,0.812562
2,Dunk Shot,2PT Field Goal,315,282,1997,DUNKS,0.895238
3,Hook Shot,2PT Field Goal,459,268,1997,POST_MOVES,0.583878
4,Jump Shot,2PT Field Goal,5663,2163,1997,JUMP_SHOTS,0.381953
...,...,...,...,...,...,...,...
57,Turnaround Fadeaway shot,2PT Field Goal,1555,661,2025,JUMP_SHOTS,0.425080
58,Turnaround Fadeaway shot,3PT Field Goal,15,4,2025,JUMP_SHOTS,0.400000
59,Turnaround Hook Shot,2PT Field Goal,1124,553,2025,POST_MOVES,0.491993
60,Turnaround Jump Shot,2PT Field Goal,1106,469,2025,JUMP_SHOTS,0.424051


In [13]:
all_shots[['shot_group', 'shot_type']].drop_duplicates()


Unnamed: 0,shot_group,shot_type
0,DUNKS,2PT Field Goal
1,LAYUPS,2PT Field Goal
3,POST_MOVES,2PT Field Goal
4,JUMP_SHOTS,2PT Field Goal
5,JUMP_SHOTS,3PT Field Goal
10,SPECIALTY_SHOTS,2PT Field Goal
7,NO_SHOT,2PT Field Goal
17,SPECIALTY_SHOTS,3PT Field Goal


In [11]:
all_shots[(all_shots.shot_group=='LAYUPS') & (all_shots.shot_type=='3PT Field Goal') ]


Unnamed: 0,shot_description,shot_type,shot_count,shot_makes,year,shot_group,efg%
7,Layup Shot,3PT Field Goal,1,0,2000,LAYUPS,0.0
12,Layup Shot,3PT Field Goal,2,1,2001,LAYUPS,0.75
9,Driving Layup Shot,3PT Field Goal,1,0,2008,LAYUPS,0.0
26,Layup Shot,3PT Field Goal,1,1,2008,LAYUPS,1.5


In [None]:
for year in range(1996,2026):
    yearframe=all_shots[all_shots.year==year].reset_index(drop=True)
    print(yearframe['count'].sum())

In [None]:
df= pd.read_csv('shot_types.csv')
def organize_shottypes(df):
    frames=[]
    for year in df['year'].unique().tolist():
        yeardf = df[df.year==year].reset_index(drop=True)
        # Define categories and their corresponding shot types
        categories = {
            'Dunk': ['Dunk', 'Slam Dunk', 'Alley Oop Dunk'],
            'Layup': ['Layup', 'Finger Roll', 'Alley Oop Layup'],
            'Jump Shot': ['Jump Shot', 'Fadeaway', 'Pull-Up', 'Step Back'],
            'Hook Shot': ['Hook Shot', 'Bank Hook'],
            'Bank Shot': ['Bank shot'],
            'Floating Shot': ['Floating'],
            'Tip Shot': ['Tip'],
            'Other': ['No Shot']
        }
        
        # Function to categorize shot types
        def categorize_shot(shot_type):
            for category, keywords in categories.items():
                if any(keyword in shot_type for keyword in keywords):
                    return category
            return 'Other'
        
        # Apply categorization
        yeardf['category'] = yeardf['shot_type'].apply(categorize_shot)
        
        # Group by category and sum the counts
        result = yeardf.groupby('category')['count'].sum().reset_index()
        
        # Sort by count in descending order
        result = result.sort_values('count', ascending=False)
        result['year']=year
        frames.append(result)
        print(categories.keys())
    return pd.concat(frames)

shot_breakdown= organize_shottypes(df)
shot_breakdown


In [None]:
df= pd.read_csv('shot_types.csv')
total_count = df.groupby('shot_type').sum()[['count']].reset_index()
total_count.to_csv('shot_counts.csv',index=False)

In [None]:
shot_totals = pd.read_csv('shot_types.csv')

start_year = 2008
shot_totals = shot_totals[shot_totals.year>=start_year]
print(len(shot_totals['shot_type'].unique()))

for year in range(start_year,2024):
    temp = shot_totals[shot_totals.year==year]
    #print(year)
    print(str(year) +': '+str(len(temp['shot_type']))+' unique shot descriptions')
    print('')
shot_totals = shot_totals.groupby('shot_type').sum()['count'].reset_index(name='count')


In [None]:
shot_totals = shot_totals.sort_values(by='count',ascending = False)
shot_totals

In [None]:
total = shot_totals['count'].sum()

shot_totals['percentage'] = 100 * shot_totals['count']/total
shot_totals

In [None]:
shots = shot_totals[shot_totals['percentage']>0]
def type_filter(df,term):
    return df[df.shot_type.str.lower().str.contains(term)]
tips = type_filter(shots,'tip')
print(tips.sum())

In [None]:
shots.shot_type.unique()

In [None]:
small = shots.reset_index(drop = True)
small['shot_type'] = small['shot_type'].str.split(' ').str[-2]
desc =small.shot_type.unique()
desc = [shot for shot in desc if shot!='Fadeaway']
desc

In [None]:
small = shots.reset_index(drop = True)
small['shot_type'] = small['shot_type'].str.split(' ').str[0]
qual = small.shot_type.unique()
qual = set(qual) - set(desc)
qual

In [None]:
unique_shots = shots['shot_type'].unique()
qual = ['Alley',
 'Cutting',
 'Driving',
 'Fadeaway',
 'Finger',
 'Floating',
 'Pullup',
 'Putback',
 'Reverse',
 'Running',
 'Slam',
 'Step',
 'Turnaround']

found_shots= []
for shot in unique_shots:
    for val in qual:
        if val.lower() in shot.lower():
            found_shots.append(shot)

shot_qual = shots[shots.shot_type.isin(found_shots)]
shot_qual.percentage.sum()

In [None]:
non_qual = shots[~shots.shot_type.isin(found_shots)]
print(non_qual.percentage.sum())
non_qual

In [None]:
layups = shots[shots.shot_type.str.lower().str.contains('layup')]
layups['percentage'].sum()

In [None]:
layup_shots = layups['shot_type']
layup_shots

In [None]:
shot_types

In [None]:
shots[shots.shot_type=='Turnaround Fadeaway shot']