# Collect Player Stats

Get stats from individual players across their careers.

It is easier to get a fair amount of data in this manner.

# Preliminaries

In [1]:
# Black Codeformatter
%load_ext lab_black

## Imports

In [2]:
import numpy as np
import pandas as pd
import os

from time import sleep

In [3]:
from pathlib import Path

from nba_api.stats.static import players, teams

from nba_anomaly_generator.data import (
    get_team_roster_dataframe,
    get_plyr_stats_dataframe,
)

In [4]:
DATA_DIR = Path().resolve().parent.parent / "data"

PLYR_DIR = DATA_DIR / "players"
TEAM_DIR = DATA_DIR / "rosters"

# Implementation


In [5]:
def _fn_roster(season_id=2018, **team_kwargs):
    team_abbreviation = get_team(**team_kwargs)["abbreviation"]
    return "{}-{}.csv".format(team_abbreviation, season_id)


def _fn_player(**player_kwargs):
    player_id = get_player(**player_kwargs)["id"]
    return "{}.csv".format(player_id)


def get_team(**kwargs):
    for k, v in kwargs.items():
        return [x for x in teams.get_teams() if x[k] == v][0]


def get_player(**kwargs):
    for k, v in kwargs.items():
        return [x for x in players.get_players() if x[k] == v][0]

In [6]:
get_team(abbreviation="LAL")

{'id': 1610612747,
 'full_name': 'Los Angeles Lakers',
 'abbreviation': 'LAL',
 'nickname': 'Lakers',
 'city': 'Los Angeles',
 'state': 'California',
 'year_founded': 1948}

In [7]:
get_player(full_name="LeBron James")

{'id': 2544,
 'full_name': 'LeBron James',
 'first_name': 'LeBron',
 'last_name': 'James',
 'is_active': True}

# Collect Rosters

This is where functions and classes are implemented.

In [8]:
# Collect Rosters

full_name = "Chicago Bulls"

for season_id in range(1980, 2019):
    fn = _fn_roster(season_id=season_id, full_name=full_name)
    fn = TEAM_DIR / fn
    if not Path(fn).exists():
        df = get_team_roster_dataframe(
            team_id=full_name, season_id=season_id, timeout_s=60
        )

        df.to_csv(fn)
        print("Collection of {} done".format(fn))
        sleep(3)
    else:
        print("{} already exists.".format(fn))

ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=10)