In [7]:
import typing
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd 
import seaborn as sns

def load_data(file_path: str) -> pd.DataFrame:  # type: ignore
    
    df = pd.read_csv(file_path, skiprows=0)
    if df.columns.str.contains('Unnamed:').sum() >= 1:
        print("File's columns are not correctly imported, fixing by skiping one row.....")
        df = pd.read_csv(file_path, skiprows=1)
        print("Done! New columns name as below:")
        print(df.columns.values)
    else:
        df = df

    return df

In [9]:
path = "/Users/lucaslee/Documents/GitHub/kaggle-data-project/dataset/kaggle_curry/Stephen Curry Regularseason Stats.csv"

df = load_data(file_path=path)
display(df.head())

Unnamed: 0,Season_year,Season_div,Date,OPP,Result,T Score,O Score,MIN,FG,FGM,...,FTM,FTA,FT%,REB,AST,BLK,STL,PF,TO,PTS
0,2009-2010,Regular,Wed 4/14,POR,W,122,116,48.0,13-25,13,...,12,12,100.0,9,8,1,2,0,2,42
1,2009-2010,Regular,Tue 4/13,UTA,L,94,103,41.0,5-15,5,...,6,6,100.0,5,6,0,2,4,2,17
2,2009-2010,Regular,Sun 4/11,OKC,W,120,117,35.0,9-16,9,...,4,6,66.7,7,7,0,1,2,5,25
3,2009-2010,Regular,Sat 4/10,LAC,L,104,107,41.0,10-19,10,...,4,4,100.0,9,4,0,3,5,3,29
4,2009-2010,Regular,Wed 4/7,MIN,W,116,107,44.0,12-22,12,...,0,0,0.0,8,14,0,7,4,4,27


In [46]:
df.columns[df.columns.str.contains('FT')]

free_throw_columns = ['Season_year', 'FT', 'FTM', 'FTA', 'FT%']
df[free_throw_columns][df['FT%'] != 0]['FT%'].mean()


91.1144157814871

In [87]:
df['FTM'] = df['FTM'].astype(int)
df['FTA'] = df['FTA'].astype(int)

avg_ft_rate = df.FTM.sum() / df.FTA.sum()
avg_ft_rate

0.9069910686787804

In [61]:
# group by season

curry_by_season_ft = df.groupby('Season_year', as_index=False)[['FTM', 'FTA']].sum()

curry_by_season_ft['rate'] = curry_by_season_ft.FTM / curry_by_season_ft.FTA

curry_by_season_ft


Unnamed: 0,Season_year,FTM,FTA,rate
0,2009-2010,177,200,0.885
1,2010-2011,212,227,0.933921
2,2011-2012,38,47,0.808511
3,2012-2013,262,291,0.900344
4,2013-2014,308,348,0.885057
5,2014-2015,308,337,0.913947
6,2015-2016,363,400,0.9075
7,2016-2017,325,362,0.89779
8,2017-2018,278,302,0.92053
9,2018-2019,263,287,0.916376


In [76]:
# practice quantile

fta_stats = np.quantile(df['FTA'], np.linspace(0, 1, 5))
ftm_stats = np.quantile(df['FTM'], np.linspace(0, 1, 5))
print(ftm_stats)
print(df.FTM.describe())

interquantile = ftm_stats[3] - ftm_stats[1]

df[free_throw_columns].describe()

[ 0.  2.  3.  6. 18.]
count    766.000000
mean       3.844648
std        2.972755
min        0.000000
25%        2.000000
50%        3.000000
75%        6.000000
max       18.000000
Name: FTM, dtype: float64


Unnamed: 0,FTM,FTA,FT%
count,766.0,766.0,766.0
mean,3.844648,4.238903,78.386945
std,2.972755,3.160889,34.33452
min,0.0,0.0,0.0
25%,2.0,2.0,75.0
50%,3.0,4.0,100.0
75%,6.0,6.0,100.0
max,18.0,19.0,100.0


In [90]:
# self-asked question:
# We know that Curry's free throw rate is 0.91, mean fta = 4
# in the next 82 games, please generate the simulation of curry ftm, fta using the probability that given above
from scipy.stats import binom
np.random.seed(420)

ft_prob = 0.907
ft_every_game = 8

x = binom.rvs(ft_every_game, ft_prob, size=82)
x

array([8, 8, 8, 8, 6, 7, 8, 8, 7, 7, 8, 8, 8, 8, 7, 8, 8, 7, 7, 6, 7, 8,
       7, 8, 8, 7, 8, 8, 8, 8, 8, 7, 6, 7, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7,
       8, 6, 8, 7, 7, 6, 7, 8, 7, 8, 7, 7, 8, 8, 7, 8, 7, 8, 8, 7, 8, 8,
       7, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 7, 5, 8])

In [97]:
def double_arg(func):
    def wrapper(x, y):
        return func(x*2, y*2)        
    return wrapper

@double_arg
def addition(x, y):
    return x + y


In [100]:
addition(5, 5)


20