In [5]:
import datetime
import requests
import pandas as pd
import numpy as np
import warnings
from bs4 import BeautifulSoup
warnings.filterwarnings('ignore')

In [13]:
total_stats = pd.DataFrame()

year = datetime.datetime.today().year


for year in range(2010, year+1):
    req = requests.get(
    "https://www.espn.com/nba/player/gamelog/_/id/3975/type/nba/year/{}".format(year))
    html = req.text

    soup = BeautifulSoup(html, 'html.parser')

    tables = soup.select('table')
    seasons = soup.find_all('div', attrs={'class':'Table__Title'})

    table_html = str(tables)
    table_df_list = pd.read_html(table_html)
    
    s_list = []
    for s in seasons:
        s_name = s.get_text()
        if 'Regular Season' in s_name:
            s_list.append('Regular')
        else:
            s_list.append(s_name.replace('season', ''))
            
    
    Year_table = pd.DataFrame()
    s_count = 0

    for table in table_df_list:
        if table.shape[1] == 17:
            T = pd.DataFrame(table)

            # Create Season column
            T['Season_year'] = '{}-{}'.format(year-1, year)
            T['Season_div'] = s_list[s_count]

            # Opp column values replace
            T['OPP'] = T['OPP'].map(lambda x : str(x).replace('@', '').replace('vs', ''))
            T.loc[T['OPP'] == 'SA', 'OPP'] = 'SAS'
            T.loc[T['OPP'] == 'NO', 'OPP'] = 'NOP'
            T.loc[T['OPP'] == 'NY', 'OPP'] = 'NYK'
            T.loc[T['OPP'] == 'NJ', 'OPP'] = 'BKN'
            T.loc[T['OPP'] == 'UTAH', 'OPP'] = 'UTA'
            
            Year_table = pd.concat([Year_table, T])
        else:
            s_count += 1

    Year_table.reset_index(drop=True, inplace=True)
    
    # Unnecessary rows are droped 
    del_idx_list = []
    for idx in Year_table.index:
        if len(Year_table.iloc[idx, 1]) > 3:
            del_idx_list.append(idx)

    Year_table.drop(del_idx_list, axis=0, inplace=True)

    # Result column is divided to Result column and Score column
    Year_table['Result'] = Year_table['Result'].map(lambda x : str(x).replace(' OT', '').replace(' 2OT', '').replace(' 3OT', ''))
    Year_table['Score'] = Year_table['Result'].map(lambda x: str(x)[1:])
    Year_table['Result'] = Year_table['Result'].map(lambda x: str(x)[0])

    # # Score column is divided to T Score column and O Score column
    # # T Score is team's score involved Stephen Curry like Golden State Warriers O Score is other's score
    # # And Score is droped 
    Year_table['T Score'] = Year_table.apply(lambda x: str(x['Score']).split('-')[0] if x['Result'] == 'W' else str(x['Score']).split('-')[1], axis=1)
    Year_table['O Score'] = Year_table.apply(lambda x: str(x['Score']).split('-')[1] if x['Result'] == 'W' else str(x['Score']).split('-')[0], axis=1)
    Year_table.drop(['Score'], axis=1, inplace=True)  
    
    total_stats = pd.concat([total_stats, Year_table])

In [14]:
total_stats['FGM'] = total_stats['FG'].map(lambda x: str(x).split('-')[0])
total_stats['FGA'] = total_stats['FG'].map(lambda x: str(x).split('-')[1])
total_stats['3PTM'] = total_stats['3PT'].map(lambda x: str(x).split('-')[0])
total_stats['3PTA'] = total_stats['3PT'].map(lambda x: str(x).split('-')[1])
total_stats['FTM'] = total_stats['FT'].map(lambda x: str(x).split('-')[0])
total_stats['FTA'] = total_stats['FT'].map(lambda x: str(x).split('-')[1])
total_stats.reset_index(drop=True, inplace=True)

total_stats = total_stats[[
    'Season_year',
    'Season_div',
    'Date',
    'OPP',
    'Result',
    'T Score',
    'O Score',
    'MIN',
    'FG',
    'FGM',
    'FGA',
    'FG%',
    '3PT',
    '3PTM',
    '3PTA',
    '3P%',
    'FT',
    'FTM',
    'FTA',
    'FT%',
    'REB',
    'AST',
    'BLK',
    'STL',
    'PF',
    'TO',
    'PTS'
]]

In [15]:
int_col_list = ['T Score', 'O Score', 'MIN', 'FGM', 'FGA', '3PTM', '3PTA', 'FTM', 'FTA', 'REB', 'AST', 'BLK', 'STL', 'PF', 'TO', 'PTS']
float_col_list = ['MIN', 'FG%', '3P%', 'FT%']

for col in int_col_list:
    total_stats[col] = total_stats[col].astype(int)
    
for col in float_col_list:
    total_stats[col] = total_stats[col].astype(float)

In [16]:
total_stats.head()

Unnamed: 0,Season_year,Season_div,Date,OPP,Result,T Score,O Score,MIN,FG,FGM,...,FTM,FTA,FT%,REB,AST,BLK,STL,PF,TO,PTS
0,2009-2010,Regular,Wed 4/14,POR,W,122,116,48.0,13-25,13,...,12,12,100.0,9,8,1,2,0,2,42
1,2009-2010,Regular,Tue 4/13,UTA,L,94,103,41.0,5-15,5,...,6,6,100.0,5,6,0,2,4,2,17
2,2009-2010,Regular,Sun 4/11,OKC,W,120,117,35.0,9-16,9,...,4,6,66.7,7,7,0,1,2,5,25
3,2009-2010,Regular,Sat 4/10,LAC,L,104,107,41.0,10-19,10,...,4,4,100.0,9,4,0,3,5,3,29
4,2009-2010,Regular,Wed 4/7,MIN,W,116,107,44.0,12-22,12,...,0,0,0.0,8,14,0,7,4,4,27


In [21]:
Regular_stats = total_stats.loc[total_stats['Season_div'] == 'Regular', :]
Regular_stats.reset_index(drop=True, inplace=True)
Post_stats = total_stats.loc[total_stats['Season_div'] == 'Post', :]
Post_stats.reset_index(drop=True, inplace=True)
Pre_stats = total_stats.loc[total_stats['Season_div'] == 'Pre', :]
Pre_stats.reset_index(drop=True, inplace=True)

total_stats.to_csv('./Stephen Curry Stats.csv', index=False)
Regular_stats.to_csv('./Stephen Curry Regularseason Stats.csv', index=False)
Post_stats.to_csv('./Stephen Curry Postseason Stats.csv', index=False)
Pre_stats.to_csv('./Stephen Curry Preseason Stats.csv', index=False)