    The data in the boxscores and Anthropometrics files both go through an initial cleaning using MS excel, then uploaded into the mySQL server using MySQL workbench. For the Anthropometrics file, the data was based off of the NBA draft combine information for the years 2000 to 2023. For those who did not attend the combine, there wasn't information. Missing information for height and weight was collected from the internet and imputed using MS excel.

In [1]:
import numpy as np
import scipy
from scipy import stats
import sklearn
import pandas as pd
import seaborn as sns

In [5]:
box_scores = pd.read_csv('NBA-BoxScores-2023-2024.csv')
box_scores

Unnamed: 0,500,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
0,0,22300061,1610612747,LAL,Los Angeles,1627752,Taurean Prince,Taurean,29.883333,6,...,1,2,3,1,0,1,1,0,18,-14
1,1,22300061,1610612747,LAL,Los Angeles,2544,LeBron James,LeBron,29.016667,10,...,1,7,8,5,1,0,0,1,21,7
2,2,22300061,1610612747,LAL,Los Angeles,203076,Anthony Davis,Anthony,34.150000,6,...,1,7,8,4,0,2,2,3,17,-17
3,3,22300061,1610612747,LAL,Los Angeles,1630559,Austin Reaves,Austin,31.333333,4,...,4,4,8,4,2,0,2,2,14,-14
4,4,22300061,1610612747,LAL,Los Angeles,1626156,D'Angelo Russell,D'Angelo,36.183333,4,...,0,4,4,7,1,0,3,3,11,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13183,20,22300581,1610612762,UTA,Utah,203903,Jordan Clarkson,Jordan,34.950000,7,...,2,0,2,5,0,0,1,1,18,-3
13184,21,22300581,1610612762,UTA,Utah,1641718,Keyonte George,Keyonte,13.916667,1,...,0,1,1,2,0,0,2,0,3,-9
13185,22,22300581,1610612762,UTA,Utah,1631117,Walker Kessler,Walker,24.183333,3,...,5,6,11,1,1,3,1,1,7,15
13186,23,22300581,1610612762,UTA,Utah,1630534,Ochai Agbaji,Ochai,14.300000,1,...,0,1,1,1,2,0,2,1,4,-8


In [11]:
box_scores_sorted = box_scores.sort_values(['GAME_ID','TEAM_ID','MIN'])
box_scores_sorted.drop(['NICKNAME'],axis=1)

Unnamed: 0,500,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,MIN,FGM,FGA,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
1601,8,22300001,1610612739,CLE,Cleveland,1629731,Dean Wade,2.766667,0,0,...,0,0,0,0,0,0,0,1,0,-9
1602,9,22300001,1610612739,CLE,Cleveland,202684,Tristan Thompson,2.800000,0,0,...,0,1,1,0,0,0,0,1,0,-8
1595,2,22300001,1610612739,CLE,Cleveland,1628386,Jarrett Allen,21.116667,4,6,...,2,5,7,0,1,2,1,4,10,-1
1599,6,22300001,1610612739,CLE,Cleveland,1630171,Isaac Okoro,23.016667,2,3,...,0,1,1,2,2,0,1,4,5,-19
1598,5,22300001,1610612739,CLE,Cleveland,1627777,Georges Niang,27.133333,4,9,...,0,5,5,1,0,0,0,2,12,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6596,14,22301230,1610612747,LAL,Los Angeles,2544,LeBron James,22.533333,9,12,...,0,5,5,8,0,0,0,1,30,36
6603,21,22301230,1610612747,LAL,Los Angeles,1630559,Austin Reaves,23.200000,5,9,...,0,5,5,7,1,0,2,0,17,21
6598,16,22301230,1610612747,LAL,Los Angeles,1627752,Taurean Prince,23.283333,5,9,...,0,3,3,1,1,0,0,3,15,23
6595,13,22301230,1610612747,LAL,Los Angeles,1629629,Cam Reddish,23.433333,3,6,...,2,3,5,2,0,0,1,1,9,31


In [None]:
anthropometric = pd.read_csv('NBA_Anthropometric.csv')
anthropometric

Function to find the potential matchups for a given player
psuedocode:
box_scores and anthropometrics are the pandas dataframes.
-given a player in box scores 
-find their position(guard or forward classification) in anthropometrics
-in the opposition lineup find players with same position classification.
-since the players are sorted by minutes played for the respective game and team in box_scores the opponents of interest for a player are ranked as follows: the player whose rank in minutes played is equal to player of interest is ranked first opponent of interest, ranked second would be the one with one rank higher minutes and third ranked would be the one with one rank lower minutes played.
-Incase the player is the highest ranked by minutes consider the two players with ranks below equivalent opponent to be 2 and 3 in matchup ranks.
-and if lowest minutes played consider 2nd last and 3rd last for opponents

In [5]:
def Opponents_of_interest(player_name, box_scores, anthropometrics):
    # Find the player's position in the anthropometrics DataFrame
    player_position = anthropometrics.loc[anthropometrics['player_name'] == player_name, 'position'].values[0]

    # Find the game and team for the given player in the box_scores DataFrame
    player_info = box_scores.loc[box_scores['player'] == player_name]
    game_id = player_info['game_id'].values[0]
    team = player_info['team'].values[0]

    # Find the opposition team
    opposition_team = 'home' if team == 'away' else 'away'

    # Get the opposition lineup with the same position classification
    opposition_lineup = box_scores.loc[(box_scores['game_id'] == game_id) &
                                       (box_scores['team'] == opposition_team) &
                                       (anthropometrics.loc[anthropometrics['player'] == box_scores['player'], 'position'].values[0] == player_position)]

    # Sort the opposition lineup by minutes played
    opposition_lineup = opposition_lineup.sort_values(by='minutes', ascending=False)

    # Get the player's rank in minutes played
    player_rank = opposition_lineup.loc[opposition_lineup['player'] == player_name, 'minutes'].index[0]

    # Get the top 3 opponents of interest
    top_opponents = []
    for rank_offset in [-1, 0, 1]:
        try:
            opponent_rank = player_rank + rank_offset
            if opponent_rank < 0:
                opponent_rank = len(opposition_lineup) + opponent_rank
            top_opponents.append(opposition_lineup.iloc[opponent_rank]['player'])
        except IndexError:
            # Handle the case where the player is the highest or lowest ranked by minutes played
            if rank_offset == -1:
                top_opponents.append(opposition_lineup.iloc[-2]['player'])
            elif rank_offset == 1:
                top_opponents.append(opposition_lineup.iloc[-1]['player'])

    return top_opponents

Unnamed: 0,player_name,position,height,height_with_shoes,weight,wingspan,draft_year
0,Tyler Ulis,PG,174.62,177.80,67.68,187.96,2016
1,John Lucas III,PG,175.90,179.07,70.03,182.88,2005
2,Cordell Henry,PG,172.72,175.26,70.31,185.42,2002
3,Russ Smith,PG,181.61,184.78,72.57,191.77,2014
4,Daryl Dorsey,PG,180.98,184.15,72.94,181.61,2005
...,...,...,...,...,...,...,...
1626,Pavel Podkolzin,C,222.25,224.79,137.44,227.96,2003
1627,Zach Edey,C,221.62,224.16,138.98,240.03,2023
1628,DeSagana Diop,C,208.28,210.82,142.43,229.87,2001
1629,James Lang,C/PF,203.84,206.38,143.34,229.24,2003
