In [1]:
import pandas as pd
import pandas as pd
import numpy as np

def calculate_basketball_percentages(df):
    """
    Calculate basketball percentage statistics from raw totals.
    
    Args:
        df (pandas.DataFrame): DataFrame containing the raw totals columns
        
    Returns:
        pandas.DataFrame: DataFrame with added percentage columns
    """
    # Make a copy to avoid modifying the original
    result = df.copy()
    
    # Basic shooting percentages
    result['Fg3Pct'] = (result['FG3M'] / result['FG3A'] * 100).fillna(0)
    result['Fg2Pct'] = (result['FG2M'] / result['FG2A'] * 100).fillna(0)
    result['FGA']= result['FG2A']+result['FG3A']
    result['FGM']= result['FG2M']+result['FG3M']
    result['NonHeaveFg3Pct'] = (result['FG3M'] / (result['FG3A']-result['HeaveAttempts']) * 100).fillna(0)
    
    # Advanced shooting percentages
    result['EfgPct'] = ((result['FG2M'] + 1.5 * result['FG3M']) / (result['FG2A'] + result['FG3A']) * 100).fillna(0)
    result['TsPct'] = (result['Points'] / (2 * (result['FG2A'] + result['FG3A'] + 0.44 * result['FTA'])) * 100).fillna(0)
    
    # Second chance percentages
    result['SecondChanceFg3Pct'] = (result['SecondChanceFG3M'] / result['SecondChanceFG3A'] * 100).fillna(0)
    result['SecondChanceFg2Pct'] = (result['SecondChanceFG2M'] / result['SecondChanceFG2A'] * 100).fillna(0)
    result['SecondChanceEfgPct'] = ((result['SecondChanceFG2M'] + 1.5 * result['SecondChanceFG3M']) / 
                                   (result['SecondChanceFG2A'] + result['SecondChanceFG3A']) * 100).fillna(0)
    result['SecondChanceTsPct'] = (result['SecondChancePoints'] / 
                                  (2 * (result['SecondChanceFG2A'] + result['SecondChanceFG3A'])) * 100).fillna(0)

    
    result['SecondChancePointsPct'] = (result['SecondChancePoints'] / result['Points'] * 100).fillna(0)
    
    # Shot distribution
    result['FG3APct'] = (result['FG3A'] / (result['FG2A'] + result['FG3A']) * 100).fillna(0)
    
    result['FG2APctBlocked'] = (result['opp_Fg2aBlocked'] / result['FG2A'] * 100).fillna(0)
    result['AtRimPctBlocked'] = (result['opp_BlockedAtRim'] / result['AtRimFGA'] * 100).fillna(0)
    result['LongMidRangePctBlocked'] = (result['opp_BlockedLongMidRange'] / result['LongMidRangeFGA'] * 100).fillna(0)
    result['ShortMidRangePctBlocked'] = (result['opp_BlockedShortMidRange'] / result['ShortMidRangeFGA'] * 100).fillna(0)
    result['FG3APctBlocked'] = (result['opp_Fg3aBlocked'] / result['FG3A'] * 100).fillna(0)
    result['Corner3PctBlocked'] = (result['opp_Blocked3s'] / result['Corner3FGA'] * 100).fillna(0)
    result['Arc3PctBlocked'] = (result['opp_BlockedArc3'] / result['Arc3FGA'] * 100).fillna(0)
    
    # Rebound percentages - Field Goals (corrected to be relative to missed shots)
    result['DefFGReboundPct'] = (result['DefRebounds'] / (result['opp_FG2A'] - result['opp_FG2M'] + result['opp_FG3A'] - result['opp_FG3M']) * 100).fillna(0)
    result['OffFGReboundPct'] = (result['OffRebounds'] / (result['FG2A'] - result['FG2M'] + result['FG3A'] - result['FG3M']) * 100).fillna(0)
    
    # Rebound percentages by shot location (corrected to be relative to missed shots of that type)
    result['OffLongMidRangeReboundPct'] = (result['OffTwoPtRebounds'] / (result['LongMidRangeFGA'] - result['LongMidRangeFGM']) * 100).fillna(0)
    result['DefLongMidRangeReboundPct'] = (result['DefTwoPtRebounds'] / (result['LongMidRangeFGA'] - result['LongMidRangeFGM']) * 100).fillna(0)
    result['DefArc3ReboundPct'] = (result['DefThreePtRebounds'] / (result['Arc3FGA'] - result['Arc3FGM']) * 100).fillna(0)
    result['OffArc3ReboundPct'] = (result['OffThreePtRebounds'] / (result['Arc3FGA'] - result['Arc3FGM']) * 100).fillna(0)
    result['DefAtRimReboundPct'] = (result['DefTwoPtRebounds'] / (result['AtRimFGA'] - result['AtRimFGM']) * 100).fillna(0)
    result['OffAtRimReboundPct'] = (result['OffTwoPtRebounds'] / (result['AtRimFGA'] - result['AtRimFGM']) * 100).fillna(0)
    result['DefShortMidRangeReboundPct'] = (result['DefTwoPtRebounds'] / (result['ShortMidRangeFGA'] - result['ShortMidRangeFGM']) * 100).fillna(0)
    result['OffShortMidRangeReboundPct'] = (result['OffTwoPtRebounds'] / (result['ShortMidRangeFGA'] - result['ShortMidRangeFGM']) * 100).fillna(0)
    result['DefCorner3ReboundPct'] = (result['DefThreePtRebounds'] / (result['Corner3FGA'] - result['Corner3FGM']) * 100).fillna(0)
    result['OffCorner3ReboundPct'] = (result['OffThreePtRebounds'] / (result['Corner3FGA'] - result['Corner3FGM']) * 100).fillna(0)
    
    # Free throw rebound percentages (corrected to be relative to FT misses)

    # Assist percentages
    result['Assisted2sPct'] = (result['PtsAssisted2s'] / (2 * result['FG2M']) * 100).fillna(0)
    result['Assisted3sPct'] = (result['PtsAssisted3s'] / (3 * result['FG3M']) * 100).fillna(0)
    result['NonPutbacksAssisted2sPct'] = (result['PtsAssisted2s'] / (2 * (result['FG2M'] - result['PtsPutbacks']/2)) * 100).fillna(0)
    result['Corner3PctAssisted'] = (result['Corner3Assists'] / result['Corner3FGM'] * 100).fillna(0)
    result['Arc3PctAssisted'] = (result['Arc3Assists'] / result['Arc3FGM'] * 100).fillna(0)
    result['SecondChanceCorner3PctAssisted'] = (result['Corner3Assists'] / result['SecondChanceCorner3FGM'] * 100).fillna(0)
    result['SecondChanceArc3PctAssisted'] = (result['Arc3Assists'] / result['SecondChanceArc3FGM'] * 100).fillna(0)
    result['SecondChanceAtRimPctAssisted'] = (result['AtRimAssists'] / result['SecondChanceAtRimFGM'] * 100).fillna(0)
    result['AtRimPctAssisted'] = (result['AtRimAssists'] / result['AtRimFGM'] * 100).fillna(0)
    result['ShortMidRangePctAssisted'] = (result['ShortMidRangeAssists'] / result['ShortMidRangeFGM'] * 100).fillna(0)
    result['LongMidRangePctAssisted'] = (result['LongMidRangeAssists'] / result['LongMidRangeFGM'] * 100).fillna(0)
    
    # Penalty percentages
    result['PenaltyPointsPct'] = (result['PenaltyPoints'] / result['Points'] * 100).fillna(0)
    result['PenaltyOffPossPct'] = (result['PenaltyOffPoss'] / result['OffPoss'] * 100).fillna(0)
    result['PenaltyFg2Pct'] = (result['PenaltyFG2M'] / result['PenaltyFG2A'] * 100).fillna(0)
    result['PenaltyFg3Pct'] = (result['PenaltyFG3M'] / result['PenaltyFG3A'] * 100).fillna(0)
    result['PenaltyEfgPct'] = ((result['PenaltyFG2M'] + 1.5 * result['PenaltyFG3M']) / 
                              (result['PenaltyFG2A'] + result['PenaltyFG3A']) * 100).fillna(0)
    result['PenaltyTsPct'] = (result['PenaltyPoints'] / 
                             (2 * (result['PenaltyFG2A'] + result['PenaltyFG3A'] + 0.44 * result['FTA'])) * 100).fillna(0)
    
    # Miscellaneous percentages
    result['BlocksRecoveredPct'] = (result['RecoveredBlocks'] / result['Blocks'] * 100).fillna(0)
    result['LiveBallTurnoverPct'] = (result['LiveBallTurnovers'] / result['Turnovers'] * 100).fillna(0)
    result['SelfORebPct'] = (result['SelfOReb'] /(result['FGA']- result['FGM']) * 100).fillna(0)
    
    # Fouls percentages
    total_shooting_fouls = result['TwoPtShootingFoulsDrawn'] + result['ThreePtShootingFoulsDrawn']
    result['ShootingFoulsDrawnPct'] = (result['ShootingFouls'] / (result['FG2A']+result['FG3A']) * 100).fillna(0)
    result['TwoPtShootingFoulsDrawnPct'] = ((result['TwoPtShootingFoulsDrawn'])/ (result['FG2A']+result['2pt And 1 Free Throw Trips'])* 100).fillna(0)
    result['ThreePtShootingFoulsDrawnPct'] = (result['ThreePtShootingFoulsDrawn'] / result['FG3A'] * 100).fillna(0)
    total_def_rebounds = result['DefTwoPtRebounds'] + result['DefThreePtRebounds']
    total_off_rebounds = result['OffTwoPtRebounds'] + result['OffThreePtRebounds']
    result['ThreePtShootingFoulsDrawnPct'] = result['ThreePtShootingFoulsDrawn'] / result['FG3A'] * 100
    
    result['DefTwoPtReboundPct'] = (result['DefTwoPtRebounds'] / total_def_rebounds * 100).fillna(0)
    result['DefThreePtReboundPct'] = (result['DefThreePtRebounds'] / total_def_rebounds * 100).fillna(0)
    result['OffTwoPtReboundPct'] = (result['OffTwoPtRebounds'] /(result['FG2A']-result['FG2M']) * 100).fillna(0)
    result['OffThreePtReboundPct'] = (result['OffThreePtRebounds'] / total_off_rebounds * 100).fillna(0)
    return result

def calculate_weighted_average(df, value_col, weight_col, group_by=None):
    """
    Calculate weighted average of a value column based on a weight column.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        Input dataframe
    value_col : str
        Name of the column containing the values to average
    weight_col : str
        Name of the column containing the weights
    group_by : str or list, optional
        Column(s) to group by before calculating weighted average
        
    Returns:
    --------
    If group_by is None: returns a float (weighted average)
    If group_by is specified: returns a Series with weighted averages per group
    
    Examples:
    --------
    # Single weighted average
    df = pd.DataFrame({
        'value': [10, 20, 30],
        'weight': [1, 2, 3]
    })
    result = calculate_weighted_average(df, 'value', 'weight')
    
    # Grouped weighted averages
    df = pd.DataFrame({
        'category': ['A', 'A', 'B', 'B'],
        'value': [10, 20, 30, 40],
        'weight': [1, 2, 3, 4]
    })
    result = calculate_weighted_average(df, 'value', 'weight', 'category')
    """
    
    # Input validation
    if value_col not in df.columns:
        raise ValueError(f"Value column '{value_col}' not found in dataframe")
    if weight_col not in df.columns:
        raise ValueError(f"Weight column '{weight_col}' not found in dataframe")
    
    # Handle negative weights
    if (df[weight_col] < 0).any():
        raise ValueError("Negative weights found. Please ensure all weights are non-negative")
    
    # Remove rows where either value or weight is null
    df = df.dropna(subset=[value_col, weight_col])
    
    # If all weights are zero, return nan
    if (df[weight_col] == 0).all():
        return np.nan
    
    if group_by is None:
        # Calculate single weighted average
        weighted_sum = (df[value_col] * df[weight_col]).sum()
        weight_sum = df[weight_col].sum()
        return weighted_sum / weight_sum if weight_sum != 0 else np.nan
    else:
        # Calculate grouped weighted averages
        grouped = df.groupby(group_by)
        weighted_sums = grouped.apply(lambda x: (x[value_col] * x[weight_col]).sum())
        weight_sums = grouped[weight_col].sum()
        weight_sum = df[weight_col].sum()
        return weighted_sums / weight_sum
df1 = pd.read_csv('data/2016/1610612739_ps.csv')
df2 = pd.read_csv('data/2016/1610612739_vs_ps.csv')
df2.drop(columns='team_vs',inplace=True)

id_col=['EntityId',]
oppnames=[]
for col in df2.columns:
    newcol = 'opp_'+col if col not in id_col else col
    oppnames.append(newcol)
df2.columns= oppnames
df=df1.merge(df2,on=id_col)
print(df.Minutes.sum())
df=df[df.EntityId.str.contains('2544')]
df.fillna(0,inplace=True)
comp = pd.read_csv('../../contract/nba_rapm/on-off/players_ps/2544.csv')
comp2 = pd.read_csv('../../contract/nba_rapm/on-off/players_ps/2544vs.csv')
comp=pd.concat([comp])
comp=comp[comp.year==2016]

comp2=comp2[comp2.year==2016]

print(comp.Minutes.sum())

comp=comp[comp.player_on==True]
comp2=comp2[comp2.player_on==True]
oppnames=[]
for col in comp2.columns:
    newcol = 'opp_'+col if col !='player_id' else col
    oppnames.append(newcol)
comp2.columns=oppnames
print(comp.columns)
comp=comp.merge(comp2,on='player_id')

id_col=['EntityId',
 'TeamId',
 'Name',
 'ShortName',
 'RowId',
 'TeamAbbreviation',
'team_id',
 'year',
 'season',
 'team_vs']
df['FGA']= df['FG2A']+df['FG3A']
df['FGM']= df['FG2M']+df['FG3M']
df['opp_FGA']= df['opp_FG2A']+df['opp_FG3A']
df['opp_FGM']= df['opp_FG2M']+df['opp_FG3M']

1004.0
1008.0
Index(['player_id', 'year', 'eFG%', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%',
       'BLK%', 'TOV%',
       ...
       'rOffFGReboundPct', 'rdrtg', 'rortg', 'rAST%', 'rSecondChancePer100',
       'rRimAstPer100', 'rAtRimFrequency', 'rAtRimAccuracy', 'rFG3APct',
       'rFg3Pct'],
      dtype='object', length=278)


In [2]:
df1['Minutes'].sum()

df.drop(columns=['opp_Name', 'opp_ShortName', 'opp_RowId', 'opp_TeamAbbreviation', 'opp_season'],inplace=True)
df['two_point_misses']= df['FG2A'] - df['FG2M']
df['opp_two_point_misses']= df['opp_FG2A'] - df['opp_FG2M']

# Specific location misses
df['at_rim_misses']= df['AtRimFGA'] - df['AtRimFGM']
df['opp_at_rim_misses']= df['opp_AtRimFGA'] - df['opp_AtRimFGM']

df['short_midrange_misses']= df['ShortMidRangeFGA'] - df['ShortMidRangeFGM']

df['opp_short_midrange_misses']= df['opp_ShortMidRangeFGA'] - df['opp_ShortMidRangeFGM']

df['long_midrange_misses']= df['LongMidRangeFGA'] - df['LongMidRangeFGM']
df['opp_long_midrange_misses']= df['opp_LongMidRangeFGA'] - df['opp_LongMidRangeFGM']

# Three point misses by location
df['corner3_misses']= df['Corner3FGA'] - df['Corner3FGM']
df['opp_corner3_misses']= df['opp_Corner3FGA'] - df['opp_Corner3FGM']

df['arc3_misses']= df['Arc3FGA'] - df['Arc3FGM']
df['opp_arc3_misses']= df['opp_Arc3FGA'] - df['opp_Arc3FGM']

# Free throw misses
df['ft_misses']= df['FTA'] - df['FtPoints']/1  # Assuming 1 point per FT

df['opp_ft_misses']= df['opp_FTA'] - df['opp_FtPoints']/1  # Assuming 1 point per FT


# Total misses
df['fg_misses']= (df['FGA'] - df['FGM'])
df['opp_fg_misses']= (df['opp_FGA'] - df['opp_FGM'])

weight_mapping = {
    'DefTwoPtReboundPct': 'opp_two_point_misses',
    'OffTwoPtReboundPct': 'two_point_misses',
    'DefThreePtReboundPct':'opp_FG3A',
    'DefFGReboundPct': 'opp_fg_misses',
    'OffFGReboundPct': 'fg_misses',
    'OffLongMidRangeReboundPct': 'long_midrange_misses',
    'DefLongMidRangeReboundPct': 'opp_long_midrange_misses',
    'OffThreePtReboundPct': 'opp_FG3A',
    'OffArc3ReboundPct': 'arc3_misses',
    'DefArc3ReboundPct': 'opp_arc3_misses',
    'DefAtRimReboundPct': 'opp_at_rim_misses',
    'DefShortMidRangeReboundPct': 'opp_short_midrange_misses',
    'DefCorner3ReboundPct': 'opp_corner3_misses',
    'OffAtRimReboundPct': 'at_rim_misses',
    'SelfORebPct': 'fg_misses',
    'OffShortMidRangeReboundPct': 'short_midrange_misses',
    'DefFTReboundPct': 'FTDefRebounds',
    'OffFTReboundPct':'FTOffRebounds',
    'OffCorner3ReboundPct': 'corner3_misses',
    'SecondChanceTsPct':'SecondChancePoints',
    'SecondChanceCorner3PctAssisted':'SecondChanceCorner3FGM',


    'SecondChanceArc3PctAssisted':'SecondChanceArc3FGM',
    'SecondChanceAtRimPctAssisted':'SecondChanceAtRimFGM'
}

values=[]
for key in weight_mapping.keys():
    val = calculate_weighted_average(df, key, weight_mapping[key], 'team_id').iloc[0]

    values.append(val)

    
weight_list=list(weight_mapping.keys())
values


[0.7258030574562834,
 0.34889901290812453,
 0.7767002514198669,
 0.7520298793461992,
 0.31540745257162367,
 0.23333333333333334,
 0.811111111111111,
 0.265391901224586,
 0.25284494584467965,
 0.7872351982618142,
 0.6642156862745098,
 0.7264457439896037,
 0.7432432432432432,
 0.43617021276595747,
 0.03956004471006045,
 0.32061068702290074,
 0.7250728438228439,
 0.313953488372093,
 0.7047072584588967,
 0.9166666666666666,
 1.0,
 0.2]

In [3]:
pct= [col for col in df.columns if 'pct' in col.lower()]
sum = [col for col in df.columns if col not in id_col and col not in pct]
sum
sums= df.groupby('TeamId').sum(numeric_only=True)[sum].reset_index(drop=True)
rebounds=[
    "OffAtRimReboundPct",
    "OffShortMidRangeReboundPct",
    "OffLongMidRangeReboundPct",
    "OffArc3ReboundPct",
    "DefAtRimReboundPct",
    "DefShortMidRangeReboundPct",
    "DefLongMidRangeReboundPct",
    "DefArc3ReboundPct",
    "DefCorner3ReboundPct"
]

exclude = [

    "PenaltyTsPct"
]
pct = [col for col in pct if col not in exclude]

pct = [col for col in pct if 'opp_' not in col.lower()]


In [4]:
newframe=calculate_basketball_percentages(sums)

newframe[weight_list]=values

In [5]:
comp['FG2APctBlocked']

0    0.082965
Name: FG2APctBlocked, dtype: float64

In [11]:
newframe[pct[40:50]]

Unnamed: 0,OffAtRimReboundPct,SelfORebPct,ShortMidRangePctAssisted,ShootingFoulsDrawnPct,TwoPtShootingFoulsDrawnPct,OffShortMidRangeReboundPct,PenaltyFg3Pct,ShortMidRangePctBlocked,DefFTReboundPct,SecondChanceArc3PctAssisted
0,0.43617,0.03956,41.463415,9.199438,14.072495,0.320611,40.594059,6.103286,0.725073,1.0


In [12]:
comp[pct[40:50]]

Unnamed: 0,OffAtRimReboundPct,SelfORebPct,ShortMidRangePctAssisted,ShootingFoulsDrawnPct,TwoPtShootingFoulsDrawnPct,OffShortMidRangeReboundPct,PenaltyFg3Pct,ShortMidRangePctBlocked,DefFTReboundPct,SecondChanceArc3PctAssisted
0,0.43617,0.039877,0.414634,0.091028,0.131737,0.323077,0.405941,0.061033,0.88,1.0


In [13]:
for col in df.columns:
    if 'ft' in col.lower() and 'opp' not in col.lower():
        if 'pct' not in col.lower():
            print(col)

OffTwoPtRebounds
DefThreePtRebounds
DefTwoPtRebounds
FtPoints
PenaltyFtPoints
OffThreePtRebounds
FTA
SecondChanceFtPoints
FTDefRebounds
FTOffRebounds
ft_misses
