In [31]:
from scipy import stats, interpolate, integrate, linalg
import numpy as np
import pandas as pd

In [4]:
#filtering the data to only include regular season games
with open("C:/Users/chemi/OneDrive/Coding/players_stats_by_season_full_details.csv", 'r') as f:
    data = pd.read_csv(f)
reg_season = data[data['Stage'] == 'Regular_Season']

      League       Season           Stage            Player Team  GP     MIN  \
0        NBA  1999 - 2000  Regular_Season  Shaquille O'Neal  LAL  79  3163.0   
1        NBA  1999 - 2000  Regular_Season      Vince Carter  TOR  82  3126.0   
2        NBA  1999 - 2000  Regular_Season       Karl Malone  UTA  82  2947.0   
3        NBA  1999 - 2000  Regular_Season     Allen Iverson  PHI  70  2853.0   
4        NBA  1999 - 2000  Regular_Season       Gary Payton  SEA  82  3425.0   
...      ...          ...             ...               ...  ...  ..     ...   
51971    NBA  2019 - 2020  Regular_Season  Matt Dellavedova  CLE  57   820.7   
51972    NBA  2019 - 2020  Regular_Season      Goga Bitadze  IND  54   471.3   
51973    NBA  2019 - 2020  Regular_Season     Nassir Little  POR  48   572.5   
51974    NBA  2019 - 2020  Regular_Season       David Nwaba  BRK  20   268.5   
51975    NBA  2019 - 2020  Regular_Season   Al-Farouq Aminu  ORL  18   380.1   

       FGM   FGA  3PM  ...    birth_dat

In [50]:
#determing the player who has played the most games
class Player:
    def __init__(self, name, games):
        self.name = name
        self.games = games

        player_games = reg_season['Player'].value_counts()
        max_games = player_games.max()
        player_most_games = player_games[player_games == max_games].index[0]
    print(player_most_games)

Vince Carter


In [54]:
# calculating Vince Carter's three point accuracy for each season
class ThreePointAccuracy:
    def __init__(self, player, season, accuracy):
        self.player = player
        self.season = season
        self.accuracy = accuracy

    vc = reg_season[reg_season['Player'] == 'Vince Carter']
    vc_3pt_accuracy = vc['3PM'] / vc['3PA']
    # have to drop the rows with NaN values
    vc_3pt_accuracy = vc_3pt_accuracy.dropna()
    for season, accuracy in zip(vc['Season'], vc_3pt_accuracy):
        vc_3pt_accuracy_percent = accuracy * 100
        print("Vince Carter had a three point accuracy of", vc_3pt_accuracy_percent, "percent in", season)


Vince Carter had a three point accuracy of 40.25423728813559 percent in 1999 - 2000
Vince Carter had a three point accuracy of 40.80604534005038 percent in 2000 - 2001
Vince Carter had a three point accuracy of 38.65814696485623 percent in 2001 - 2002
Vince Carter had a three point accuracy of 38.2716049382716 percent in 2003 - 2004
Vince Carter had a three point accuracy of 40.57507987220447 percent in 2004 - 2005
Vince Carter had a three point accuracy of 34.05994550408719 percent in 2005 - 2006
Vince Carter had a three point accuracy of 35.6979405034325 percent in 2006 - 2007
Vince Carter had a three point accuracy of 35.8974358974359 percent in 2007 - 2008
Vince Carter had a three point accuracy of 38.52040816326531 percent in 2008 - 2009
Vince Carter had a three point accuracy of 36.7283950617284 percent in 2009 - 2010
Vince Carter had a three point accuracy of 36.13707165109034 percent in 2010 - 2011
Vince Carter had a three point accuracy of 36.09756097560975 percent in 2011 - 2

In [56]:
#perform linear regression for Vince Carter's three point accuracy across the years played
class LinearRegression:
    def __init__(self, slope, intercept, r_value, p_value, std_err):
        self.slope = slope
        self.intercept = intercept
        self.r_value = r_value
        self.p_value = p_value
        self.std_err = std_err
    vc = reg_season[reg_season['Player'] == 'Vince Carter']
    vc_3pt_accuracy = vc['3PM'] / vc['3PA']
    vc_3pt_accuracy = vc_3pt_accuracy.dropna()
    x = np.arange(0, len(vc_3pt_accuracy))
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, vc_3pt_accuracy)
    print("The slope is:", slope)
    print("The intercept is:", intercept)
    print("The r value is:", r_value)
    print("The p value is:", p_value)
    print("The standard error is:", std_err)
#Create a line of best fit for Vince Carter's three point accuracy
    line = slope * x + intercept
    print("Line:", line)

The slope is: -0.0027582745098713715
The intercept is: 0.39477458520913344
The r value is: -0.4833590654433425
The p value is: 0.036032411549838425
The standard error is: 0.0012116040287473214
Line: [0.39477459 0.39201631 0.38925804 0.38649976 0.38374149 0.38098321
 0.37822494 0.37546666 0.37270839 0.36995011 0.36719184 0.36443357
 0.36167529 0.35891702 0.35615874 0.35340047 0.35064219 0.34788392
 0.34512564]


In [70]:
#calculate average three point accuracy by integrating the fit line over played seasons and dividing by the number of seasons
class Integration:
    def __init__(self,  player, reg_seaon, intercept, slope):
        self.player = player
        self.reg_season = reg_season
        self.intercept = intercept
        self.slope = slope
    def f(self, x):
        return self.slope * x + self.intercept
    def calculate_avg_accuracy(self):
        vc = self.reg_season[self.reg_season['Player'] == self.player]
        return integrate.quad(self.f, 0, len(vc['3PM'] / vc['3PA'])) / len(vc['3PM'] / vc['3PA'])
    print(calculate_avg_accuracy)
#

<function Integration.calculate_avg_accuracy at 0x00000200B89B0EA0>


In [75]:
# using interpolation to determine the three point accuracy for a the seasons in which Vince Carter did not play
class Interpolation:
    def __init__(self, player, reg_season, intercept, slope):
        self.player = player
        self.reg_season = reg_season
        self.intercept = intercept
        self.slope = slope
    def f(self, x):
        return self.slope * x + self.intercept
    def interpolate(self):
        vc = self.reg_season[self.reg_season['Player'] == self.player]
        x = np.arange(0, len(vc['3PM'] / vc['3PA']))
        y = self.f(x)
        f = interpolate.interp1d(x, y, fill_value='extrapolate')
        return f
    def missing_years(self, missing_seasons):
        f = self.interpolate()
        estimations = {}
        for season in missing_seasons:
            x = season - min(self.reg_season[self.reg_season['Player'] == self.player]['Season'])
            estimations[season] = f(season)
        return estimations
    print(missing_years)
        

<function Interpolation.missing_years at 0x00000200B89CF6A0>


In [None]:
#