# NBA recommendation engine

In [145]:
import pandas as pd
import numpy as np
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors

In [33]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo

## List of all active players

In [2]:
# get_players returns a list of dictionaries, each representing a player.
nba_players = pd.DataFrame(players.get_active_players())
print(f'Number of players fetched: {len(nba_players)}')

Number of players fetched: 519


In [3]:
nba_players

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,203500,Steven Adams,Steven,Adams,True
1,1628389,Bam Adebayo,Bam,Adebayo,True
2,200746,LaMarcus Aldridge,LaMarcus,Aldridge,True
3,1629734,Kyle Alexander,Kyle,Alexander,True
4,1629638,Nickeil Alexander-Walker,Nickeil,Alexander-Walker,True
...,...,...,...,...,...
514,201152,Thaddeus Young,Thaddeus,Young,True
515,1629027,Trae Young,Trae,Young,True
516,203469,Cody Zeller,Cody,Zeller,True
517,1627790,Ante Zizic,Ante,Zizic,True


## Preparation of dataset (time-consuming, only executed once)

In [32]:
from nba_api.stats.endpoints import playercareerstats
import time

stats = list()

for player in tqdm(nba_players['id']):
    time.sleep(.600)
    call_career = playercareerstats.PlayerCareerStats(player_id=player) 
    # how to select data more efficiently, i.e. only last season?
    stats.append(call_career.get_data_frames()[0])

100%|██████████| 519/519 [20:12<00:00,  2.34s/it]


In [91]:
stats_df = pd.concat(stats)
stats_df.to_csv('playercareerstats.csv', index = False)

  3%|▎         | 15/592 [11:26<7:20:22, 45.79s/it]


## Loading dataset

In [92]:
playercareerstats = pd.read_csv('playercareerstats.csv')

In [98]:
stats_lastSeason = playercareerstats[playercareerstats['SEASON_ID'] == '2020-21'].reset_index().drop(columns=['index'])

In [99]:
stats_lastSeason

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,203500,2020-21,0,1610612740,NOP,27.0,58,58,1605.0,189,...,0.444,213,301,514,111,54,38,78,113,438
1,1628389,2020-21,0,1610612748,MIA,23.0,64,64,2143.0,456,...,0.799,142,431,573,346,75,66,169,145,1197
2,200746,2020-21,0,1610612759,SAS,35.0,21,18,544.0,115,...,0.838,17,77,94,36,8,18,20,36,288
3,200746,2020-21,0,1610612751,BKN,35.0,5,5,130.0,25,...,1.000,2,22,24,13,3,11,7,11,64
4,200746,2020-21,0,0,TOT,35.0,26,23,674.0,140,...,0.872,19,99,118,49,11,29,27,47,352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
587,1626153,2020-21,0,0,TOT,29.0,63,39,1748.0,240,...,0.802,65,204,269,278,101,30,83,75,645
588,201152,2020-21,0,1610612741,CHI,33.0,68,23,1652.0,370,...,0.628,168,255,423,291,74,40,137,152,823
589,1629027,2020-21,0,1610612737,ATL,22.0,63,63,2125.0,487,...,0.886,38,207,245,594,53,12,261,111,1594
590,203469,2020-21,0,1610612766,CHA,28.0,48,21,1005.0,181,...,0.714,119,209,328,86,27,17,51,121,451


In [79]:
stats_lastSeason['PLAYER_ID'].nunique() # -> where duplicates?

435

In [100]:
positions = []
for i in tqdm(range(stats_lastSeason.shape[0])):
    time.sleep(.600)
    positions.append(commonplayerinfo.CommonPlayerInfo(stats_lastSeason['PLAYER_ID'][i]).get_data_frames()[0]['POSITION'][0])
print(len(positions))

100%|██████████| 592/592 [13:22<00:00,  1.36s/it]

592





In [106]:
def adj_position(pos):
    if pos[0] in ['C', 'F', 'G']:
        return pos[0]
    else:
        print('Please enter a valid position.')
        pass

In [109]:
from collections import Counter

Counter([adj_position(position) for position in positions])

Counter({'C': 88, 'G': 266, 'F': 238})

In [110]:
stats_lastSeason['POSITION'] = [adj_position(position) for position in positions]

In [112]:
#stats_lastSeason.to_csv('stats_lastSeason.csv', index = False)
stats_lS = pd.read_csv('stats_lastSeason.csv')

## Class definition

In [251]:
class RecommendationEngine:
    def __init__(self, replacing_player):
        self.player_name = replacing_player
        try:
            self.player_id = nba_players[nba_players["full_name"] == replacing_player]['id'][0]
        except KeyError:
            print("Please provide the full name of a valid active NBA player.")
        self.position = adj_position(commonplayerinfo.CommonPlayerInfo(self.player_id).get_data_frames()[0]['POSITION'][0])
            
    def recommend(self):
        stats = stats_lastSeason[(stats_lastSeason['POSITION'] == self.position)].drop(columns=['POSITION'])
        stats_repl_player = stats[stats['PLAYER_ID'] == self.player_id].iloc[:,5:]
        stats_num = stats.iloc[:,5:]
        
        model = NearestNeighbors().fit(stats_num)
        res_players = model.kneighbors(stats_repl_player, return_distance = False)[0]
        res_player_0, res_player_1 = res_players[0], res_players[1]
        if stats.reset_index()['PLAYER_ID'][res_player_0] == self.player_id:
            print("Clustering worked.")
            
        #print(stats['PLAYER_ID'].head(10))
            
        res_player_id = stats.reset_index()['PLAYER_ID'][res_player_1]
        
        rec_player = nba_players[nba_players['id'] == res_player_id]['full_name'].iloc[0]
        
        print('Input Player:', self.player_name,'\nRecommended Player:', rec_player)
        return rec_player

## Exemplary execution

In [262]:
r1 = RecommendationEngine("Steven Adams")
#print(r1.position)
rec_player = r1.recommend()

Clustering worked.
Input Player: Steven Adams 
Recommended Player: Khem Birch
