In [77]:
import pandas as pd 
import numpy as np 


In [78]:
team_data = pd.read_csv('high_school_gbb_data_2021_2022.csv')


In [79]:
def get_ppa_two(row):
    total_points = 2 * row['Two_Points_Made']
    total_attempts = row['Two_Point_Attempts']
    return total_points / total_attempts

def get_ppa_three(row):
    total_points = 3 * row['Three_Points_Made']
    total_attempts = row['Three_Point_Attempts']
    return total_points / total_attempts

def get_total_ppa(row):
    total_points = 2 * row['Two_Points_Made'] + 3 * row['Three_Points_Made']
    total_attempts = row['Total_FGA']
    return total_points / total_attempts

def team_scoring_poss(row):
    team_attempts = row['Total_FGM']
    inner_part = (1 - (1 - (row['Free_Throws_Made'] / row['Free_Throw_Attempts']) ** 2))
    return team_attempts + inner_part *  row['Free_Throw_Attempts'] * 0.4

def team_play_percent(row):
    numerator = row['Scoring_Possesions']
    denom = row['Total_FGA'] + row['Free_Throw_Attempts'] * 0.4 + row['Turnovers']
    return numerator / denom

def possesions(row):
    fga = row['Total_FGA']
    other_team_def_rebounds = (row['Total_FGA'] - row['Total_FGM']) - row['Offensive_Rebounds']
    reb_part = row['Offensive_Rebounds']/(row['Offensive_Rebounds'] + other_team_def_rebounds)
    fg_part = row['Total_FGA'] - row['Total_FGM']
    return fga - reb_part * fg_part * 1.07 + row['Turnovers'] + row['Free_Throw_Attempts'] * 0.4

def won_game(row):
    if row['Points_Scored'] > row['Points_Allowed']:
        return 1
    else:
        return 0

def winning_first_half(row):
    if row['Points_Scored_First_Half'] > row['Points_Allowed_First_Half']:
        return 1
    else:
        return 0

def winning_second_half(row):
    if row['Points_Scored_Second_Half'] > row['Points_Allowed_Second_Half']:
        return 1
    else:
        return 0

def final_difference(row):
    return row['Points_Scored'] - row['Points_Allowed']

def first_half_difference(row):
    return row['Points_Scored_First_Half'] - row['Points_Allowed_First_Half']

def second_half_difference(row):
    return row['Points_Scored_Second_Half'] - row['Points_Allowed_Second_Half']

def offensive_efficiency(row):
    num = row['Total_FGM'] + row['Assists']
    denom = row['Total_FGA'] - row['Offensive_Rebounds'] + row['Assists'] + row['Turnovers']
    return num / denom

def efficient_offense(row):
    inner = .76 * row['Assists'] + row['Points_Scored']
    return inner * row['Offensive_Efficiency']

def effective_fgp(row):
    num = row['Total_FGM'] + (.5 * row['Three_Points_Made'])
    return num / row['Total_FGA']

In [80]:
team_data

Unnamed: 0,Game_ID,Points_Scored,Points_Allowed,Points_Scored_First_Half,Points_Allowed_First_Half,Points_Scored_Second_Half,Points_Allowed_Second_Half,Two_Point_Attempts,Two_Points_Made,Three_Point_Attempts,...,Free_Throw_Attempts,Free_Throws_Made,Total_FGA,Total_FGM,Assists,Turnovers,Offensive_Rebounds,Defensive_Rebounds,Total_Rebounds,Steals
0,1,67,60,26,27,41,33,53,18,10,...,23,13,63,24,15,13,14,19,32,6


In [81]:
team_data['Points_Per_Attempt_Two'] = team_data.apply(get_ppa_two, axis = 1)
team_data['Points_Per_Attempt_Three'] = team_data.apply(get_ppa_three, axis = 1)
team_data['Points_Per_Attempt_Total'] = team_data.apply(get_total_ppa, axis = 1)
team_data['Scoring_Possesions'] = team_data.apply(team_scoring_poss, axis = 1)
team_data['Team_Play_Percentage'] = team_data.apply(team_play_percent, axis = 1)
team_data['Possesions'] = team_data.apply(possesions, axis = 1)
team_data['Winning_First_Half'] = team_data.apply(winning_first_half, axis = 1)
team_data['Winning_Second_Half'] = team_data.apply(winning_second_half, axis = 1)
team_data['Won_Game'] = team_data.apply(won_game, axis = 1)
team_data['First_Half_Score_Difference'] = team_data.apply(first_half_difference, axis = 1)
team_data['Second_Half_Score_Difference'] = team_data.apply(second_half_difference, axis = 1)
team_data['Final_Score_Difference'] = team_data.apply(final_difference, axis = 1)
team_data['Offensive_Efficiency'] = team_data.apply(offensive_efficiency, axis = 1)
team_data['Raw_EOP'] = team_data.apply(efficient_offense, axis = 1)
team_data['Effective_Field_Goal_Percentage'] = team_data.apply(effective_fgp, axis = 1)

In [82]:
team_data.to_csv('Full_NDA_Team_Game_Data.csv', index = False)

In [83]:
spot_details = pd.read_csv('spots_on_court.csv')
player_raw_data = pd.read_csv('NDA_Player_Raw_Data.csv')
player_spot_data = pd.read_csv('NDA_Player_Spot_Data.csv')

In [84]:
player_spot_data_with_details = pd.merge(player_spot_data, spot_details, left_on = 'Spot_ID', right_on = 'SpotID')
player_spot_data_with_details = player_spot_data_with_details[['Player_ID', 'Game_ID', 'Spot_ID', 'Spot_FGA', 'Spot_FGM', 'SpotOnCourt', 'PointValue']]
player_spot_data_with_details

Unnamed: 0,Player_ID,Game_ID,Spot_ID,Spot_FGA,Spot_FGM,SpotOnCourt,PointValue
0,1,1,1,1,0,3LeftCorner,3
1,3,1,1,0,0,3LeftCorner,3
2,4,1,1,0,0,3LeftCorner,3
3,5,1,1,0,0,3LeftCorner,3
4,10,1,1,0,0,3LeftCorner,3
...,...,...,...,...,...,...,...
105,11,1,11,7,4,Paint,2
106,15,1,11,0,0,Paint,2
107,20,1,11,1,0,Paint,2
108,24,1,11,8,3,Paint,2


In [86]:
def spot_points_per_attempt(row):
    num = row['Spot_FGM'] * row['PointValue']
    denom = row['Spot_FGA'] 
    if denom > 0:
        return num / denom
    else:
        return 0

player_spot_data_with_details['Points_Per_Attempt'] = player_spot_data_with_details.apply(spot_points_per_attempt, axis = 1)
average = pd.DataFrame(player_spot_data_with_details.groupby(['Spot_ID', 'SpotOnCourt'], as_index = False)['Points_Per_Attempt'].mean())
sum = pd.DataFrame(player_spot_data_with_details.groupby(['Spot_ID', 'SpotOnCourt'], as_index = False)['Spot_FGA'].sum())
final_shot_data = pd.merge(average, sum, on = ['Spot_ID', 'SpotOnCourt'])
player_spot_data_with_details.to_csv('Final_Player_Spot_Data.csv', index = False)
final_shot_data.to_csv('Final_Player_Spot_Data_Overview.csv', index = False)


In [87]:
player_raw_data

Unnamed: 0,Player_ID,Game_ID,Points_Scored,Total_FGA,Total_FGM,First_Half_FGA,First_Half_FGM,Second_Half_FGA,Second_Half_FGM,Two_FGA,...,Three_FGA,Three_FGM,Offensive_Rebounds,Defensive_Rebounds,Total_Rebounds,Turnovers,Assists,Steals,Free_Throw_Attempts,Free_Throw_Makes
0,1,1,23,14,7,9,4,5,3,11,...,3,1,3,1,4,1,3,0,12,8
1,3,1,9,7,4,1,0,6,4,6,...,1,1,1,7,8,2,1,2,0,0
2,4,1,3,3,1,2,1,1,0,2,...,1,1,0,0,0,0,3,1,1,0
3,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,10,1,5,2,2,0,0,2,2,2,...,0,0,0,2,2,3,4,2,4,1
5,11,1,10,8,4,4,1,4,3,7,...,1,0,9,5,14,4,0,0,2,2
6,15,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
7,20,1,3,3,1,2,0,1,1,2,...,1,1,0,1,1,1,2,0,0,0
8,24,1,14,10,5,7,3,3,2,8,...,2,2,1,4,5,2,3,1,4,2
9,25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [88]:
def get_player_ppa_two(row):
    total_points = 2 * row['Two_FGM']
    total_attempts = row['Two_FGA']
    return total_points / total_attempts

def get_player_ppa_three(row):
    total_points = 3 * row['Three_FGM']
    total_attempts = row['Three_FGA']
    return total_points / total_attempts

def get_player_total_ppa(row):
    total_points = 2 * row['Two_FGM'] + 3 * row['Three_FGM']
    total_attempts = row['Total_FGA']
    return total_points / total_attempts

def offensive_player_efficiency(row):
    num = row['Total_FGM'] + row['Assists']
    denom = row['Total_FGA'] - row['Offensive_Rebounds'] + row['Assists'] + row['Turnovers']
    return num / denom

def efficient_player_offense(row):
    inner = .76 * row['Assists'] + row['Points_Scored']
    return inner * row['Offensive_Efficiency']

def effective_player_fgp(row):
    num = row['Total_FGM'] + (.5 * row['Three_FGM'])
    return num / row['Total_FGA']

def first_half_percent(row):
    num = row['First_Half_FGM']
    denom = row['First_Half_FGA']
    if denom > 0:
        return num / row['First_Half_FGA']
    else: 
        return 0

def second_half_percent(row):
    num = row['Second_Half_FGM']
    denom = row['Second_Half_FGA'] 
    if denom > 0:
        return num / row['Second_Half_FGA']
    else: 
        return 0

def total_percent(row):
    num = row['Total_FGM']
    denom = row['Total_FGA']
    if denom> 0:
        return num / row['Total_FGA']
    else:
        return 0




In [91]:
player_raw_data['Two_PPA'] = player_raw_data.apply(get_player_ppa_two, axis = 1)
player_raw_data['Three_PPA'] = player_raw_data.apply(get_player_ppa_three, axis = 1)
player_raw_data['Total_PPA'] = player_raw_data.apply(get_player_total_ppa, axis = 1)
player_raw_data['Offensive_Efficiency'] = player_raw_data.apply(offensive_player_efficiency, axis = 1)
player_raw_data['Raw_EOP'] = player_raw_data.apply(efficient_player_offense, axis = 1)
player_raw_data['Effective_FG_Percent'] = player_raw_data.apply(effective_player_fgp, axis = 1)
player_raw_data['First_Half_FG_Percent'] = player_raw_data.apply(first_half_percent, axis = 1)
player_raw_data['Second_Half_FG_Percent'] = player_raw_data.apply(second_half_percent, axis = 1)
player_raw_data['Total_FG_Percent'] = player_raw_data.apply(total_percent, axis = 1)
player_raw_data = player_raw_data.fillna(0)
player_raw_data.to_csv('Final_Player_Data.csv', index = False)

  return total_points / total_attempts
  return total_points / total_attempts
  return total_points / total_attempts
  return num / denom
  return num / row['Total_FGA']


In [10]:
lower = 3
upper = 23
prime = []
for numbers in range(lower, upper):
    if numbers > 1:
        for i in range(2, numbers):
            if (numbers % i) == 0:
                break 
        else:
            prime.append(numbers)

prime


[3, 5, 7, 11, 13, 17, 19]