In [9]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re 
import seaborn as sns
from nba_api.stats.endpoints import teamyearbyyearstats
from nba_api.stats.static import teams
%matplotlib inline

In [2]:
#We'll start from 1990/1991 Season to 2016/2017
#Pull all player data and clean
full_player_data = pd.DataFrame()
for i in range(1991,2020):
    url = 'https://www.basketball-reference.com/leagues/NBA_{}_per_game.html'.format(i)
    html = urlopen(url)
    soup = BeautifulSoup(html, 'html.parser')
    column_headers = [th.getText() for th in
                  soup.findAll('tr', limit=1)[0].findAll('th')[1:]]

    data_rows = soup.findAll('tr')[1:]
    player_data = [[td.getText() for td in data_rows[i].findAll('td')]
            for i in range(len(data_rows))]
    player_data_02 = []  # create an empty list to hold all the data

    for j in range(len(data_rows)):  # for each table row
        player_row = []  # create an empty list for each player

        # for each table data element from each table row
        for td in data_rows[j].findAll('td'):
            # get the text content and append to the player_row
            player_row.append(td.getText())

        # then append each pick/player to the player_data matrix
        player_data_02.append(player_row)
    df = pd.DataFrame(player_data, columns=column_headers)
    df.fillna(0, inplace=True)
    df.columns = df.columns.str.replace('%', '_Perc')
    df.rename(columns={'PS/G':'PS_per_game'}, inplace=True)
    df.rename(columns={'MP':'MP_per_game'}, inplace=True)
    cols = df.columns.drop(['Player', 'Pos', 'Tm'])
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
    df = df[:].fillna(0)
    df.insert(0, 'Season', '{}'.format(str(i-1) + "-" + str(i)[-2:]))
    full_player_data = full_player_data.append(df, ignore_index=True)


In [3]:
full_player_data.head()

Unnamed: 0,Season,Player,Pos,Age,Tm,G,GS,MP_per_game,FG,FGA,...,FT_Perc,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1990-91,Alaa Abdelnaby,PF,22,POR,43,0,6.7,1.3,2.7,...,0.568,0.6,1.4,2.1,0.3,0.1,0.3,0.5,0.9,3.1
1,1990-91,Mahmoud Abdul-Rauf,PG,21,DEN,67,19,22.5,6.2,15.1,...,0.857,0.5,1.3,1.8,3.1,0.8,0.1,1.6,2.2,14.1
2,1990-91,Mark Acres,C,28,ORL,68,0,19.3,1.6,3.1,...,0.653,2.1,3.2,5.3,0.4,0.4,0.4,0.6,3.2,4.2
3,1990-91,Michael Adams,PG,28,DEN,66,66,35.5,8.5,21.5,...,0.879,0.9,3.0,3.9,10.5,2.2,0.1,3.6,2.5,26.5
4,1990-91,Mark Aguirre,SF,31,DET,78,13,25.7,5.4,11.7,...,0.757,1.7,3.1,4.8,1.8,0.6,0.3,1.6,2.7,14.2


In [None]:
full_player_data.to_csv('full_player_data_mvp.csv', index=False)

In [19]:
nba_teams = teams.get_teams()
nba_teams[0]['id']

1610612737

In [None]:
#Grab team standings info
full_team_info = pd.DataFrame()
for i in nba_teams: 
    team_info = teamyearbyyearstats.TeamYearByYearStats(league_id='00',
                                                  per_mode_simple='Totals'
                                                  , season_type_all_star='Regular Season'
                                                             , team_id=nba_teams[0]['id']).get_data_frames()[0]
    full_team_info = full_team_info.append(team_info)
    print(i)
full_team_info.head()

In [None]:
#Merge player stats with team standings