In [377]:
import pandas
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingRegressor

In [378]:
columns_to_select = ['Age', 'G', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%',
                     'FT', 'FTA', 'FT%', 'PTS']

In [379]:
file_path = '2019-2020 NBA Player Stats.csv'

In [380]:
df = pandas.read_csv(file_path, usecols=columns_to_select)
df

Unnamed: 0,Age,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,PTS
0,26,63,26.7,4.5,7.6,0.592,0.0,0.0,0.333,4.5,7.5,0.594,0.593,1.9,3.2,0.582,10.9
1,22,72,33.6,6.1,11.0,0.557,0.0,0.2,0.143,6.1,10.8,0.564,0.558,3.7,5.3,0.691,15.9
2,34,53,33.1,7.4,15.0,0.493,1.2,3.0,0.389,6.2,12.0,0.519,0.532,3.0,3.6,0.827,18.9
3,23,2,6.5,0.5,1.0,0.5,0.0,0.0,,0.5,1.0,0.5,0.5,0.0,0.0,,1.0
4,21,47,12.6,2.1,5.7,0.368,1.0,2.8,0.346,1.1,2.8,0.391,0.455,0.5,0.8,0.676,5.7
5,24,38,18.9,3.1,6.6,0.466,1.5,3.7,0.404,1.6,2.9,0.545,0.58,1.0,1.2,0.867,8.7
6,21,70,26.5,4.3,6.6,0.649,0.0,0.1,0.0,4.3,6.6,0.658,0.649,2.4,3.9,0.633,11.1
7,27,10,11.7,1.9,4.4,0.432,0.5,1.6,0.313,1.4,2.8,0.5,0.489,0.7,1.1,0.636,5.0
8,29,18,21.1,1.4,4.8,0.291,0.5,2.0,0.25,0.9,2.8,0.32,0.343,1.1,1.6,0.655,4.3
9,26,10,10.7,1.0,3.8,0.263,0.6,2.9,0.207,0.4,0.9,0.444,0.342,0.2,0.4,0.5,2.8


In [381]:
pandas.set_option('display.max_rows', None)
df

Unnamed: 0,Age,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,PTS
0,26,63,26.7,4.5,7.6,0.592,0.0,0.0,0.333,4.5,7.5,0.594,0.593,1.9,3.2,0.582,10.9
1,22,72,33.6,6.1,11.0,0.557,0.0,0.2,0.143,6.1,10.8,0.564,0.558,3.7,5.3,0.691,15.9
2,34,53,33.1,7.4,15.0,0.493,1.2,3.0,0.389,6.2,12.0,0.519,0.532,3.0,3.6,0.827,18.9
3,23,2,6.5,0.5,1.0,0.5,0.0,0.0,,0.5,1.0,0.5,0.5,0.0,0.0,,1.0
4,21,47,12.6,2.1,5.7,0.368,1.0,2.8,0.346,1.1,2.8,0.391,0.455,0.5,0.8,0.676,5.7
5,24,38,18.9,3.1,6.6,0.466,1.5,3.7,0.404,1.6,2.9,0.545,0.58,1.0,1.2,0.867,8.7
6,21,70,26.5,4.3,6.6,0.649,0.0,0.1,0.0,4.3,6.6,0.658,0.649,2.4,3.9,0.633,11.1
7,27,10,11.7,1.9,4.4,0.432,0.5,1.6,0.313,1.4,2.8,0.5,0.489,0.7,1.1,0.636,5.0
8,29,18,21.1,1.4,4.8,0.291,0.5,2.0,0.25,0.9,2.8,0.32,0.343,1.1,1.6,0.655,4.3
9,26,10,10.7,1.0,3.8,0.263,0.6,2.9,0.207,0.4,0.9,0.444,0.342,0.2,0.4,0.5,2.8


In [408]:
data = pandas.read_csv('2019-2020 NBA Player Stats.csv')
player_names = data['Player']

In [409]:
data = pandas.get_dummies(data, columns=['Player'])

In [410]:
features = data.drop(['PTS', 'Pos', 'Tm'], axis=1)
target = data['PTS']

In [411]:
model = HistGradientBoostingRegressor()

In [412]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.20, random_state=42)

In [413]:
encoder = OneHotEncoder(drop='first', sparse_output=False)
encoded_features = encoder.fit_transform(features)

In [414]:
imputer = SimpleImputer(strategy='mean')
imputed_features = imputer.fit_transform(features)

In [415]:
model.fit(X_train, y_train)

In [416]:
y_pred = model.predict(X_test)

In [417]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.19171253642229386


In [421]:
predicted_data = pandas.DataFrame({'Player': player_names.iloc[X_test.index], 'Predicted_PTS': y_pred})
predicted_data = pandas.concat([predicted_data, X_test.reset_index(drop=True)], axis=1)

In [422]:
print(predicted_data)

                    Player  Predicted_PTS   Age     G    GS    MP   FG   FGA  \
638    Nigel Williams-Goss       1.701798   NaN   NaN   NaN   NaN  NaN   NaN   
264             Joe Harris      14.967779   NaN   NaN   NaN   NaN  NaN   NaN   
259       Maurice Harkless       5.327635   NaN   NaN   NaN   NaN  NaN   NaN   
495           Jakob Poeltl       6.036494   NaN   NaN   NaN   NaN  NaN   NaN   
72           Avery Bradley       9.041972  22.0  16.0   3.0  17.9  2.6   5.8   
440       Johnathan Motley       2.231776   NaN   NaN   NaN   NaN  NaN   NaN   
250          Rui Hachimura      13.888430   NaN   NaN   NaN   NaN  NaN   NaN   
131        Jordan Clarkson      14.737251   NaN   NaN   NaN   NaN  NaN   NaN   
327          James Johnson       5.711038   NaN   NaN   NaN   NaN  NaN   NaN   
278       Juan Hernangómez      12.783542   NaN   NaN   NaN   NaN  NaN   NaN   
274            John Henson       7.341789   NaN   NaN   NaN   NaN  NaN   NaN   
490          Elfrid Payton      10.90910