# Harden Stats

* Load in a flat file of James Harden stats for 2018-19 and manipulate the data types and calculate basic stats.

### Import data and load csv

In [1]:
import pandas as pd
import os

In [5]:
os.chdir('./game_logs')

In [6]:
os.listdir(os.getcwd())

['2018_19_Harden_James.csv']

In [50]:
df = pd.read_csv('2018_19_Harden_James.csv')
df.head(3)

Unnamed: 0,game_season,date_game,age,team_id,game_location,opp_id,game_result,gs,mp,fg,...,drb,trb,ast,stl,blk,tov,pf,pts,game_score,plus_minus
0,1.0,2018-10-17,29-052,HOU,,NOP,L (-19),1.0,34:43,6.0,...,7.0,9.0,10.0,3.0,1.0,4.0,2.0,18.0,18.9,-23.0
1,2.0,2018-10-20,29-055,HOU,@,LAL,W (+9),1.0,37:50,10.0,...,6.0,7.0,5.0,2.0,1.0,6.0,2.0,36.0,27.0,-3.0
2,3.0,2018-10-21,29-056,HOU,@,LAC,L (-3),1.0,39:45,11.0,...,4.0,4.0,14.0,2.0,0.0,3.0,1.0,31.0,26.4,5.0


### Check out the new dtypes

In [51]:
df.dtypes

game_season      float64
date_game         object
age               object
team_id           object
game_location     object
opp_id            object
game_result       object
gs               float64
mp                object
fg               float64
fga              float64
fg_pct           float64
fg3              float64
fg3a             float64
fg3_pct          float64
ft               float64
fta              float64
ft_pct           float64
orb              float64
drb              float64
trb              float64
ast              float64
stl              float64
blk              float64
tov              float64
pf               float64
pts              float64
game_score       float64
plus_minus       float64
dtype: object

In [52]:
# points per game
df[df.game_season != '']['pts'].mean().round(1)

  result = method(y)


36.1

### Transfrom minutes from string to float

In [54]:
def str_min_to_float(x):
    if pd.isna(x)==True:
        return 0
    else:
        mins = int(x.split(':')[0])
        secs = int(x.split(':')[1])/60.0
        val = mins + secs
        return round(val, 1)
    
df['mp'] = df['mp'].apply(str_min_to_float)
df.head(2)    

Unnamed: 0,game_season,date_game,age,team_id,game_location,opp_id,game_result,gs,mp,fg,...,drb,trb,ast,stl,blk,tov,pf,pts,game_score,plus_minus
0,1.0,2018-10-17,29-052,HOU,,NOP,L (-19),1.0,34.7,6.0,...,7.0,9.0,10.0,3.0,1.0,4.0,2.0,18.0,18.9,-23.0
1,2.0,2018-10-20,29-055,HOU,@,LAL,W (+9),1.0,37.8,10.0,...,6.0,7.0,5.0,2.0,1.0,6.0,2.0,36.0,27.0,-3.0


### Create new column for games played

In [61]:
df['g']= [1 if pd.isna(df.game_season[gm])==False else 0 for gm in range(len(df))]

In [63]:
#confirm number of games played last season for Harden
df.g.sum()

78

### Create Home Game Binary Field

In [65]:
df['home_game'] = [1 if pd.isna(df.game_location[gm])==True else 0 for gm in range(len(df))]
df.drop(['game_location'], inplace=True, axis=1)

### Create Binary win field

In [70]:
df['win'] = [1 if df['game_result'][gm][:1] == 'W' else 0 for gm in range(len(df))]
sum(df.win)

## Calculating Stats

In [107]:
def stat_per_time(stat, minutes, per= 36):
    if minutes == 0:
        return 0
    else:
        val_min = stat/minutes
        return round(val_min * per, 1)

### Applying Stats per time with lambda

In [109]:
df['pts_per_36'] = df.apply(lambda row: stat_per_time(stat = row['pts'], minutes = row['mp']), axis=1)

In [110]:
df['trb_per_36'] = df.apply(lambda row: stat_per_time(stat = row['trb'], minutes = row['mp']), axis=1)

In [112]:
df['tov_per_12'] = df.apply(lambda row: stat_per_time(stat = row['tov'], minutes = row['mp'], per= 12), axis=1)

### Reviewing which game Harden had a higher pts per 36

In [117]:
df[(df.date_game=='2018-11-23')|(df.date_game =='2018-11-30')][['opp_id', 'pts', 'pts_per_36']]

Unnamed: 0,opp_id,pts,pts_per_36
16,DET,33.0,29.0
20,SAS,23.0,30.0


In [118]:
df.to_csv('2018_19_Harden_James.csv', index=False)