In [1]:
import pandas as pd
import numpy as np
import requests
import time
import re
import glob
import os
from bs4 import BeautifulSoup

pd.options.display.max_columns = 999

In [2]:
players = [
    '4900668/cole-anderson',
    '4900669/ariel-bland',
    '4702594/gage-gomez',
    #'4900670/henry-hartwell',
    #'4592899/zach-harvey',
    '4397646/robinson-idehen',
    '4431850/jakov-kukic',
    '4900671/ajay-mitchell',
    '4397643/jay-nagle',
    '4397104/miles-norris',
    '4900672/david-pickles',
    '4431795/josh-pierre-louis',
    '4397676/ajare-sanni',
    '4900673/max-sheldon',
    '4397644/amadou-sow',
    '4397642/sekou-toure',
    '4397521/calvin-wishart'
]

In [3]:
for player in players:
    url = f'https://www.espn.com/mens-college-basketball/player/gamelog/_/id/{player}'
    res = requests.get(url)
    with open(f'./Data/espn_player_stats/{player[7:]}.xls', 'wb') as f:
        f.write(res.content)
    df = pd.read_html(f'./Data/espn_player_stats/{player[7:]}.xls')
    stats = pd.DataFrame(df[0]) 
    stats['Player'] = player[8:]
    
    stats = stats[:-1] #drops last row which is an averages row.         
    stats.to_csv(f'./Data/espn_player_stats/{player[7:]}.csv')
    
    time.sleep(3)

In [4]:
def change_col_types(df):
    numcols_to_change = df.columns
    for col in numcols_to_change:
        try:
            df[col] = df[col].astype(float)
        except:
            continue

In [5]:
path = 'Data/espn_player_stats' # use your path
files = glob.glob(os.path.join(path, "*.csv"))

lst = []
lst2 = []
for file in files:
    df = pd.read_csv(file, index_col=0)
    df = df[df["Date"].str.contains('Hercules') == False]
    df = df[df["Date"].str.contains('Skyline') == False]
    change_col_types(df)
    lst.append(df)
    
    total_df = df.groupby('Player').sum()
    lst2.append(total_df)
    
all_games = pd.concat(lst, axis=0, ignore_index=True)
all_games = all_games[['Player','Date', 'OPP', 'Result', 'MIN', 'FG', 'FG%', '3PT', '3P%', 'FT', 'FT%',
       'REB', 'AST', 'BLK', 'STL', 'PF', 'TO', 'PTS']]
all_games['rebounds/min'] = all_games['REB'] / all_games['MIN']
all_games['assists/min'] = all_games['AST'] / all_games['MIN']
all_games['blocks/min'] = all_games['BLK'] / all_games['MIN']
all_games['stls/min'] = all_games['STL'] / all_games['MIN']
all_games['fouls/min'] = all_games['PF'] / all_games['MIN']
all_games['points/min'] = all_games['PTS'] / all_games['MIN']
all_games['turnovers/min'] = all_games['TO'] / all_games['MIN']
all_games['rebounds/min'] = all_games['rebounds/min'].apply(lambda x: round(x, 2))
all_games['assists/min'] = all_games['assists/min'].apply(lambda x: round(x, 2))
all_games['stls/min'] = all_games['stls/min'].apply(lambda x: round(x, 2))
all_games['fouls/min'] = all_games['fouls/min'].apply(lambda x: round(x, 2))
all_games['points/min'] = all_games['points/min'].apply(lambda x: round(x, 2))
all_games['turnovers/min'] = all_games['turnovers/min'].apply(lambda x: round(x,2))
all_games.to_csv('./Data/player_all_games.csv', index = False)

player_totals = pd.concat(lst2, ignore_index=True)
player_totals = player_totals.drop(columns = ['FG%', '3P%', 'FT%'])
player_totals['rebounds/min'] = player_totals['REB'] / all_games['MIN']
player_totals['assists/min'] = player_totals['AST'] / all_games['MIN']
player_totals['blocks/min'] = player_totals['BLK'] / all_games['MIN']
player_totals['stls/min'] = player_totals['STL'] / all_games['MIN']
player_totals['fouls/min'] = player_totals['PF'] / all_games['MIN']
player_totals['points/min'] = player_totals['PTS'] / all_games['MIN']
player_totals['turnovers/min'] = player_totals['TO'] / all_games['MIN']
player_totals['rebounds/min'] = player_totals['rebounds/min'].apply(lambda x: round(x, 2))
player_totals['assists/min'] = player_totals['assists/min'].apply(lambda x: round(x, 2))
player_totals['stls/min'] = player_totals['stls/min'].apply(lambda x: round(x, 2))
player_totals['fouls/min'] = player_totals['fouls/min'].apply(lambda x: round(x, 2))
player_totals['points/min'] = player_totals['points/min'].apply(lambda x: round(x, 2))
player_totals['turnovers/min'] = player_totals['turnovers/min'].apply(lambda x: round(x,2))
player_totals.to_csv('./Data/player_totals.csv')

In [6]:
player_totals

Unnamed: 0,MIN,REB,AST,BLK,STL,PF,TO,PTS,rebounds/min,assists/min,blocks/min,stls/min,fouls/min,points/min,turnovers/min
0,583.0,59.0,64.0,0.0,23.0,41.0,31.0,224.0,1.59,1.73,0.0,0.62,1.11,6.05,0.84
1,866.0,60.0,101.0,6.0,22.0,79.0,48.0,312.0,2.22,3.74,0.222222,0.81,2.93,11.56,1.78
2,832.0,235.0,20.0,23.0,21.0,69.0,55.0,438.0,9.79,0.83,0.958333,0.88,2.88,18.25,2.29
3,38.0,9.0,2.0,2.0,0.0,4.0,2.0,10.0,0.45,0.1,0.1,0.0,0.2,0.5,0.1
4,714.0,75.0,72.0,4.0,30.0,60.0,42.0,182.0,2.78,2.67,0.148148,1.11,2.22,6.74,1.56
5,283.0,19.0,9.0,0.0,6.0,22.0,6.0,111.0,2.71,1.29,0.0,0.86,3.14,15.86,0.86
6,3.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.04,0.09,0.04
7,11.0,3.0,2.0,0.0,0.0,0.0,0.0,3.0,0.11,0.07,0.0,0.0,0.0,0.11,0.0
8,36.0,11.0,5.0,0.0,1.0,3.0,6.0,22.0,0.3,0.14,0.0,0.03,0.08,0.59,0.16
9,164.0,23.0,19.0,2.0,4.0,7.0,3.0,26.0,0.66,0.54,0.057143,0.11,0.2,0.74,0.09
