## MAKING FEATURE FROM PLAYER NAME

gradient boosting, random forest, maybe deep learning, linear model with lots of features

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

In [2]:
from nba_api.stats.static import teams
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelogs
from nba_api.stats.endpoints import playerdashboardbyopponent

Get player ID and info based on their name

In [3]:
name = 'Karl-Anthony Towns'
player_info = players.find_players_by_full_name(name) #can also search by player number, first name, last name, etc.
player_id = player_info[0]['id']

Get base stats (`PTS`, `RBS`, etc.) and advanced stats (`AST%`, etc.) and join them into one dataframe. Also drop the columns with `RANK` in the name; we don't care about those.

In [4]:
base_logs = playergamelogs.PlayerGameLogs(player_id_nullable = player_id, season_nullable='2021-22').get_data_frames()[0]
adv_logs = playergamelogs.PlayerGameLogs(player_id_nullable=player_id, season_nullable='2021-22', measure_type_player_game_logs_nullable='Advanced').get_data_frames()[0]

#join the advanced and base logs. 'inner' will drop the identical columns e.g. 'PLAYER_NAME', 'TEAM_NAME', etc.
merge_logs = pd.merge(base_logs,adv_logs,on=['PLAYER_ID','GAME_ID'],suffixes=['','_RANK'])
#lets also drop all of the columns with 'RANK' in the name. we don't care about rankings...
merge_logs.drop([i for i in merge_logs.columns if 'RANK' in i], axis=1, inplace=True)


Now, get the data from the opposing team, based on *previous* matchups.

In [5]:
#parse the matchup name to get the opponent abbreviation
merge_logs['OPPONENT'] = merge_logs['MATCHUP'].str.split(' vs. | @ ',expand=True)[1]

In [6]:
#get the opponent ID from the teams API
def GetTeamID(s):
    return teams.find_team_by_abbreviation(s)['id']
merge_logs['OPPONENT_ID'] = merge_logs['OPPONENT'].apply(GetTeamID)

In [7]:
def GetPriorTeamDefense(row):
    player_id = row['PLAYER_ID']
    opponent_id = row['OPPONENT_ID']
    game_date = row['GAME_DATE']
    
    t = datetime.strptime(game_date,'%Y-%m-%dT%H:%M:%S')
    y = t - timedelta(days=1) #calculate the day before, because you want to look at PRIOR defensive matchups
    a = playerdashboardbyopponent.PlayerDashboardByOpponent(player_id=player_id, opponent_team_id=opponent_id, date_to_nullable=y.strftime('%Y-%m-%dT%H:%M:%S'), per_mode_detailed='PerGame').get_data_frames()[0]
    a.drop([i for i in a.columns if 'RANK' in i], axis=1, inplace=True)
    return a

In [8]:
B = merge_logs.iloc[:3].apply(GetPriorTeamDefense, axis=1)
B

0      GROUP_SET GROUP_VALUE  GP  W  L  W_PCT   MIN...
1      GROUP_SET GROUP_VALUE  GP  W  L  W_PCT   MIN...
2      GROUP_SET GROUP_VALUE  GP  W  L  W_PCT   MIN...
dtype: object