In [10]:
import pandas as pd

from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

In [11]:
F_selection = pd.read_csv("F_Selection/top_N_features_SelectFromModel_RFAll.csv")

# Extract top 10, 20, 30, and 50 features
top_10 = F_selection["Feature"].head(10).tolist()
top_20 = F_selection["Feature"].head(20).tolist()
top_30 = F_selection["Feature"].head(30).tolist()
top_40 = F_selection["Feature"].head(40).tolist()
top_50 = F_selection["Feature"].head(50).tolist()

In [12]:
def Data_Organizer(raw_Data,top):
    
    All_players_dataSet = raw_Data.copy()
    
    All_players_dataSet = All_players_dataSet.drop(columns=All_players_dataSet.columns[All_players_dataSet.columns.str.contains('Unnamed:')])
    All_players_dataSet.reset_index(drop=True, inplace=True)
    
    All_players_dataSet.fillna(0, inplace=True)
    All_players_dataSet.replace('', 0, inplace=True)
    All_players_dataSet.replace('--', 0, inplace=True)
    
    # check if any NaN,empty Strings exists in the dataframe
    any_missing_values = All_players_dataSet.isna().any().any()
    any_empty_values = (All_players_dataSet.applymap(lambda x: x == '')).any().any()
    
    if any_missing_values or any_empty_values:
        print("DataFrame contains missing values or empty strings/spaces.")
    else:
        print("DataFrame does not contain missing values or empty strings/spaces.")
        
    ## seperating our dataFrame
    Y = All_players_dataSet['Points_won']  # Target
    X = All_players_dataSet.drop('Points_won', axis=1)  # Features
    
    X = X.apply(pd.to_numeric, errors='coerce')

    ## keep the specified columns
    columns_to_keep = top
    
    ## keep only wanted columns
    X = X[columns_to_keep]

    return X, Y

In [13]:
all_players_w_add_sorted = pd.read_csv("all_players_w_add_sorted.csv")

  all_players_w_add_sorted = pd.read_csv("all_players_w_add_sorted.csv")


In [14]:
top = top_10

rank_data_10 = pd.DataFrame()
    
for year in range(1998, 2024):
    print("Selected Features Top ", top, " Test year:", year)
    
    # Define the range of years for training data
    train_years = range(year - 9, year - 4)  # 5 to 9 years prior to the test year
    
    # Filter the data for training and testing
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'].isin(train_years)]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (RandomForest Regressor): {mse_rf}')
    print(f'R-squared (RandomForest Regressor): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)
    
    if rank_data_10.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_10 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_10 = pd.concat([rank_data_10.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_10)

Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 2.4570160931174088
R-squared (RandomForest Regressor): 0.6052487015218588


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13764,Gary Payton,2,PG,37.0,28.69
13894,Dikembe Mutombo,1,C,39.0,28.3
13740,Hakeem Olajuwon,-1,C,0.0,28.105
13506,Tom Chambers,-1.0,PF,0.0,7.88
13866,Sean Higgins,-1.0,SF,0.0,6.215
13889,Tim Duncan,5T,PF,4.0,4.12
13836,Michael Jordan,4,SG,6.0,2.1
13637,Dennis Rodman,5T,PF,4.0,1.66
13605,David Robinson,3,C,10.0,1.625
13628,Zydrunas Ilgauskas,-1.0,C,0.0,1.46


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 11.592800316455696
R-squared (RandomForest Regressor): 0.31748821314107367


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13182,Dikembe Mutombo,2,C,10.0,26.845
13424,Gary Payton,3,PG,6.0,25.49
13039,Hakeem Olajuwon,7T,C,1.0,24.395
13269,Alonzo Mourning,1,C,89.0,23.71
13434,David Robinson,4,C,3.0,2.275
13387,Gary Grant,-1.0,PG,0.0,1.735
13056,Trevor Winter,-1.0,C,0.0,1.39
13102,Adonis Jordan,-1.0,PG,0.0,0.895
13461,Peter Aluma,-1.0,C,0.0,0.835
13070,Tyson Wheeler,-1.0,PG,0.0,0.695


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 8.867781303418804
R-squared (RandomForest Regressor): 0.08541613843329254


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12708,Gary Payton,5T,PG,4.0,36.005
12909,Dikembe Mutombo,3T,C,11.0,28.48
12750,Hakeem Olajuwon,-1,C,0.0,28.425
12956,Alonzo Mourning,1,C,62.0,21.83
12987,David Robinson,-1,C,0.0,8.455
12725,Shaquille O'Neal,2,C,21.0,6.85
12843,Marcus Camby,-1.0,C,0.0,3.68
12830,Tim Duncan,-1,PF,0.0,2.755
12741,Eddie Jones,3T,SG,11.0,1.935
12841,Kevin Garnett,7,PF,2.0,1.56


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 9.585290204081632
R-squared (RandomForest Regressor): 0.16208379933219452


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12427,Gary Payton,-1,PG,0.0,35.545
12210,Alonzo Mourning,-1,C,0.0,27.43
12512,Dikembe Mutombo,1,C,48.0,23.155
12510,Dikembe Mutombo,1,C,48.0,23.155
12391,Hakeem Olajuwon,-1,C,0.0,23.125
12274,Shawn Marion,11T,SF,1.0,5.31
12188,Marcus Camby,-1.0,C,0.0,4.49
12271,Tim Duncan,3,PF,14.0,4.415
12418,Ben Wallace,5T,C,6.0,4.03
12106,David Robinson,5T,C,6.0,3.51


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 20.537302606382976
R-squared (RandomForest Regressor): 0.2813437357233144


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11941,Ben Wallace,1,C,116.0,40.555
11950,Gary Payton,-1,PG,0.0,36.405
12003,Dikembe Mutombo,3T,C,1.0,32.425
12035,Alonzo Mourning,-1,C,0.0,30.97
11892,Hakeem Olajuwon,-1,C,0.0,23.94
11852,Tim Duncan,-1,PF,0.0,8.995
11600,Rasheed Wallace,-1.0,PF,0.0,4.24
11862,David Robinson,-1,C,0.0,1.46
11855,Shawn Marion,-1,SF,0.0,1.17
12041,Eddie Jones,-1,SG,0.0,1.15


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 660.4571778508772
R-squared (RandomForest Regressor): 0.076757279150939


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11177,Gary Payton,11.0,PG,5.0,32.515
11176,Gary Payton,11.0,PG,5.0,31.83
11461,Dikembe Mutombo,-1.0,C,0.0,31.045
11137,Ben Wallace,1.0,C,531.0,22.82
11183,Kevin Garnett,3.0,PF,121.0,13.24
11405,Tim Duncan,4.0,PF,90.0,11.235
11559,Jason Kidd,-1.0,PG,0.0,5.155
11505,Dirk Nowitzki,-1.0,PF,0.0,4.365
11514,Metta World Peace,2.0,SF,122.0,4.045
11231,Tracy McGrady,-1.0,SG,0.0,2.575


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 585.7462591876209
R-squared (RandomForest Regressor): 0.15261344241496877


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10722,Metta World Peace,1,SF,476.0,37.99
10663,Ben Wallace,2,C,325.0,35.56
11106,Dikembe Mutombo,-1,C,0.0,19.3
10721,Gary Payton,-1,PG,0.0,19.14
10865,Alonzo Mourning,-1,C,0.0,18.275
11127,Kevin Garnett,6,PF,36.0,9.87
10818,Tim Duncan,7,PF,8.0,7.43
10871,Andrei Kirilenko,5,PF,67.0,3.52
10990,Jason Kidd,-1,PG,0.0,2.325
10852,Bruce Bowen,4,SF,76.0,2.21


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 383.4980967205323
R-squared (RandomForest Regressor): 0.0800258069613925


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10455,Ben Wallace,1,C,339.0,27.13
10261,Metta World Peace,-1,SF,0.0,17.275
10343,Kevin Garnett,9,PF,30.0,10.635
10240,Alonzo Mourning,-1,C,0.0,8.58
10380,Gary Payton,-1,PG,0.0,8.355
10276,Dikembe Mutombo,-1,C,0.0,6.435
10592,Andrei Kirilenko,10,PF,25.0,5.695
10151,Manu Ginóbili,18T,SG,2.0,5.62
10326,Tim Duncan,4,PF,81.0,4.415
10241,Alonzo Mourning,-1,PF,0.0,3.475


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 533.4860287597655
R-squared (RandomForest Regressor): 0.08680455310039303


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,27.85
10029,Dikembe Mutombo,-1,C,0.0,16.96
9730,Metta World Peace,4,SF,65.0,13.435
9731,Metta World Peace,4,SF,65.0,12.42
9745,Alonzo Mourning,8,C,29.0,7.915
9825,Dwyane Wade,-1,SG,0.0,7.85
9884,Vince Carter,-1.0,SG,0.0,7.815
9630,LeBron James,-1.0,SF,0.0,6.405
9704,Marcus Camby,5,C,55.0,5.455
10019,Shane Battier,10T,SF,3.0,5.24


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 528.2022031827516
R-squared (RandomForest Regressor): 0.047798563369676916


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9344,Dikembe Mutombo,-1,C,0.0,26.425
9414,Ben Wallace,6,C,42.0,23.095
9407,Marcus Camby,1,C,431.0,13.14
9438,Tim Duncan,3,C,158.0,7.24
9489,LeBron James,-1.0,SF,0.0,7.025
9418,Shawn Marion,4,SF,93.0,5.2
9253,Rasheed Wallace,-1,C,0.0,3.215
9203,Manu Ginóbili,-1,SG,0.0,2.55
9356,Kevin Garnett,13T,PF,7.0,2.495
9440,Dirk Nowitzki,-1.0,PF,0.0,1.565


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2008
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 526.4016310721063
R-squared (RandomForest Regressor): 0.11699143046130345


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8699,Chris Paul,7T,PG,24.0,79.275
8762,Marcus Camby,2,C,178.0,50.09
8731,Kevin Garnett,1,PF,493.0,27.075
8966,LeBron James,-1.0,SF,0.0,22.12
9081,Ben Wallace,-1,C,0.0,20.12
9030,Ben Wallace,-1,PF,0.0,19.67
8592,Dikembe Mutombo,-1,C,0.0,17.9
8682,Manu Ginóbili,-1,SG,0.0,12.095
8573,Tim Duncan,9,C,22.0,7.09
8949,Rasheed Wallace,12,C,9.0,5.185


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2009
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 241.10951519417475
R-squared (RandomForest Regressor): 0.6293004489071199


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8212,Dwight Howard,1,C,542.0,208.95
8484,LeBron James,2,SF,148.0,138.38
8556,Chris Paul,6,PG,49.0,74.905
8507,Dwyane Wade,3,SG,90.0,67.18
8365,Kevin Garnett,8,PF,31.0,47.665
8558,Jason Kidd,-1,PG,0.0,42.04
8483,Ben Wallace,-1,PF,0.0,21.11
8254,Gerald Wallace,-1,SF,0.0,8.96
8324,Chuck Hayes,-1.0,C,0.0,8.275
8093,Marcus Camby,16T,C,1.0,8.27


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2010
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 409.44965981445307
R-squared (RandomForest Regressor): 0.4337421117689463


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8018,Dwight Howard,1,C,576.0,156.21
8041,LeBron James,4,SF,61.0,69.67
7905,Raja Bell,-1,SG,0.0,24.67
7977,Ben Wallace,17T,C,1.0,14.945
7544,Dwyane Wade,10T,SG,13.0,13.155
7892,Andrew Bogut,7.0,C,23.0,11.55
7553,Josh Smith,2,PF,136.0,9.98
7564,Kobe Bryant,12,SG,9.0,9.935
7777,Kevin Durant,-1.0,SF,0.0,9.645
8050,Metta World Peace,6,SF,29.0,8.8


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2011
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 249.58411503690039
R-squared (RandomForest Regressor): 0.6251306798891441


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7366,Dwight Howard,1.0,C,585.0,249.885
7121,Kevin Garnett,2.0,PF,77.0,113.575
7527,Orien Greene,-1.0,SG,0.0,63.805
7400,Luol Deng,10.0,SF,24.0,58.13
7110,Hassan Whiteside,-1.0,C,0.0,44.08
7086,Chris Bosh,-1.0,PF,0.0,32.03
7523,Andrew Bogut,6.0,C,32.0,24.755
7091,Paul Pierce,-1.0,SF,0.0,22.64
7508,Josh Smith,25.0,PF,1.0,12.64
7228,Nenê,-1.0,C,0.0,11.995


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2012
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 306.73240961165044
R-squared (RandomForest Regressor): 0.3222128548759361


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6510,Dwight Howard,3,C,186.0,136.585
6527,Serge Ibaka,2.0,PF,294.0,98.185
6987,Kevin Garnett,5,C,44.0,93.51
6897,Morris Almond,-1.0,SG,0.0,61.785
6938,Marc Gasol,12T,C,5.0,33.85
6943,Luol Deng,9.0,SF,16.0,28.85
6523,Andre Iguodala,7,SF,33.0,28.215
6747,Josh Smith,10,PF,9.0,26.65
6795,Greg Stiemsma,-1.0,C,0.0,23.78
6971,Kyrylo Fesenko,-1.0,C,0.0,23.395


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2013
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 115.55174741873803
R-squared (RandomForest Regressor): 0.5196879635832896


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6217,Marc Gasol,1.0,C,212.0,210.845
6005,Serge Ibaka,3.0,PF,122.0,82.325
5996,Dwight Howard,14.0,C,9.0,72.465
6094,Mike Conley,21.0,PG,1.0,67.36
6242,Paul George,8.0,SF,57.0,60.865
6436,Zach Randolph,-1.0,PF,0.0,42.81
6315,Josh Smith,-1.0,PF,0.0,42.22
6419,Tim Duncan,6.0,C,94.0,31.95
6330,Joakim Noah,4.0,C,107.0,23.755
6265,Kevin Garnett,-1.0,C,0.0,23.205


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2014
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 189.4287660127737
R-squared (RandomForest Regressor): 0.7114850053759207


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5847,Joakim Noah,1.0,C,555.0,328.97
5952,Paul George,7.0,SF,30.0,104.025
5461,Kawhi Leonard,11.0,SF,9.0,42.91
5813,Andrew Bogut,10.0,C,11.0,41.08
5572,Draymond Green,-1.0,SF,0.0,35.195
5895,Jimmy Butler,-1.0,SG,0.0,30.67
5898,DeAndre Jordan,3.0,C,121.0,28.92
5828,Seth Curry,-1.0,PG,0.0,23.735
5730,Maalik Wayns,-1.0,PG,0.0,23.52
5805,Andre Iguodala,5.0,SF,47.0,22.095


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2015
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 255.3747543913043
R-squared (RandomForest Regressor): 0.49687487485300585


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5107,Draymond Green,2,PF,317.0,118.27
5314,Kawhi Leonard,1,SF,333.0,94.515
5393,Anthony Davis,4,PF,107.0,75.225
5011,DeAndre Jordan,3.0,C,261.0,55.555
4915,Tim Duncan,8,C,12.0,52.185
5134,Rudy Gobert,5.0,C,33.0,46.04
5073,Stephen Curry,-1.0,PG,0.0,40.415
5349,Chris Paul,15T,PG,1.0,35.565
5216,Tony Allen,7.0,SG,29.0,26.06
4890,Marc Gasol,10T,C,7.0,23.49


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2016
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 637.2632542140152
R-squared (RandomForest Regressor): 0.30496941906270714


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4570,Hassan Whiteside,3.0,C,83.0,227.6
4426,Kawhi Leonard,1.0,SF,547.0,111.655
4597,Draymond Green,2.0,PF,421.0,82.11
4571,Paul Millsap,5.0,PF,21.0,71.655
4816,Stephen Curry,-1.0,PG,0.0,50.97
4820,Andre Drummond,10.0,C,3.0,49.135
4474,DeAndre Jordan,4.0,C,50.0,46.865
4782,Tim Duncan,-1.0,C,0.0,25.0
4580,Danny Green,-1.0,SG,0.0,23.3
4720,Cole Aldrich,-1.0,C,0.0,18.37


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2017
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 227.97730096863458
R-squared (RandomForest Regressor): 0.5774033023849032


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3977,Rudy Gobert,2.0,C,269.0,262.305
3989,Draymond Green,1,PF,434.0,127.48
4054,Dwight Howard,-1,C,0.0,61.895
4268,Kawhi Leonard,3,SF,182.0,58.605
4113,Stephen Curry,-1.0,PG,0.0,35.18
3870,Hassan Whiteside,5T,C,3.0,34.355
4129,Anthony Davis,-1,C,0.0,32.345
4131,Edy Tavares,-1.0,C,0.0,26.665
3951,Marc Gasol,-1,C,0.0,26.4
3834,Andrew Bogut,-1.0,C,0.0,23.97


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2018
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 327.7407545041322
R-squared (RandomForest Regressor): 0.2523069148416227


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3403,Rudy Gobert,1.0,C,466.0,76.4
3324,Andre Drummond,15T,C,1.0,61.505
3270,Draymond Green,6,PF,20.0,48.945
3595,Ben Simmons,-1.0,PG,0.0,46.01
3177,Anthony Davis,3,PF,139.0,36.485
3304,James Harden,-1.0,SG,0.0,29.11
3583,Clint Capela,14.0,C,2.0,25.005
3363,Kyle Anderson,-1.0,SF,0.0,22.605
3469,Kawhi Leonard,-1,SF,0.0,21.795
3400,Jonathan Isaac,-1.0,PF,0.0,20.66


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2019
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 248.88106752411576
R-squared (RandomForest Regressor): 0.42560881577854126


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2825,Rudy Gobert,1.0,C,411.0,120.36
3114,Giannis Antetokounmpo,2.0,PF,280.0,117.11
2562,Myles Turner,5.0,C,15.0,89.715
3148,Andre Drummond,-1,C,0.0,75.41
2738,Brook Lopez,-1.0,C,0.0,62.17
2980,Marc Gasol,-1,C,0.0,56.24
2979,Marc Gasol,-1,C,0.0,53.225
3099,James Harden,-1.0,PG,0.0,49.295
2876,Draymond Green,6T,PF,6.0,46.77
2702,Kawhi Leonard,6T,SF,6.0,43.46


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2020
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 193.31697064297796
R-squared (RandomForest Regressor): 0.5634621290503126


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2127,Giannis Antetokounmpo,1.0,PF,432.0,166.22
2524,Rudy Gobert,3.0,C,187.0,121.825
2466,Jonathan Isaac,-1.0,PF,0.0,81.17
2017,Hassan Whiteside,11,C,3.0,79.01
2270,Anthony Davis,2,PF,200.0,66.165
2094,Brook Lopez,10.0,C,4.0,64.455
2198,Kawhi Leonard,8T,SF,5.0,53.85
2326,Myles Turner,-1.0,C,0.0,19.2
2324,Marc Gasol,-1,C,0.0,19.06
1987,Patrick Beverley,6T,PG,7.0,17.4


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2021
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 215.9038313897764
R-squared (RandomForest Regressor): 0.554605779021605


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1379,Rudy Gobert,1.0,C,464.0,327.49
1719,Myles Turner,9.0,C,3.0,139.78
1610,Spencer Dinwiddie,-1.0,SG,0.0,67.385
1389,Henry Ellenson,-1.0,PF,0.0,60.68
1895,Giannis Antetokounmpo,5.0,PF,12.0,59.1
1422,Draymond Green,3.0,PF,76.0,56.62
1554,Mike Conley,-1.0,PG,0.0,37.48
1371,Nerlens Noel,-1.0,C,0.0,29.855
1586,Kawhi Leonard,-1.0,SF,0.0,25.39
1517,LeBron James,-1.0,PG,0.0,17.09


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2022
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 222.9872642307692
R-squared (RandomForest Regressor): -0.036111205946715685


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1022,Rudy Gobert,3.0,C,136.0,117.495
1046,Sekou Doumbouya,-1.0,PF,0.0,96.065
1184,Nikola Jokić,-1.0,C,0.0,91.41
610,Michael Porter Jr.,-1.0,SF,0.0,69.705
1026,Myles Turner,-1.0,C,0.0,48.705
1123,Draymond Green,10.0,PF,2.0,23.175
1275,Stephen Curry,-1.0,PG,0.0,18.125
992,Paul George,-1.0,PF,0.0,17.36
813,Ricky Rubio,-1.0,PG,0.0,16.93
997,Jaren Jackson Jr.,5.0,PF,99.0,15.82


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced']  Test year: 2023
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 319.5929200328407
R-squared (RandomForest Regressor): 0.248930596208641


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
59,Jaren Jackson Jr.,1.0,C,391.0,56.665
76,Mac McClung,-1.0,SG,0.0,46.475
323,Brook Lopez,2.0,C,309.0,46.075
206,Nic Claxton,9T,C,7.0,44.44
208,Walker Kessler,-1.0,C,0.0,32.895
75,Myles Turner,-1.0,C,0.0,20.76
137,Evan Mobley,3.0,PF,101.0,18.38
197,Kawhi Leonard,-1,SF,0.0,13.24
396,Rudy Gobert,-1.0,C,0.0,8.44
564,Trey Murphy III,-1.0,SF,0.0,4.42


---------------------------------------------------------------------


Unnamed: 0,Rank_1998,Rank_1999,Rank_2000,Rank_2001,Rank_2002,Rank_2003,Rank_2004,Rank_2005,Rank_2006,Rank_2007,...,Rank_2014,Rank_2015,Rank_2016,Rank_2017,Rank_2018,Rank_2019,Rank_2020,Rank_2021,Rank_2022,Rank_2023
0,2,2,5T,-1,1,11.0,1,1,1,-1,...,1.0,2,3.0,2.0,1.0,1.0,1.0,1.0,3.0,1.0
1,1,3,3T,-1,-1,11.0,2,-1,-1,6,...,7.0,1,1.0,1,15T,2.0,3.0,9.0,-1.0,-1.0
2,-1,7T,-1,1,3T,-1.0,-1,9,4,1,...,11.0,4,2.0,-1,6,5.0,-1.0,-1.0,-1.0,2.0
3,-1.0,1,1,1,-1,1.0,-1,-1,4,3,...,10.0,3.0,5.0,3,-1.0,-1,11,-1.0,-1.0,9T
4,-1.0,4,-1,-1,-1,3.0,-1,-1,8,-1.0,...,-1.0,8,-1.0,-1.0,3,-1.0,2,5.0,-1.0,-1.0
5,5T,-1.0,2,11T,-1,4.0,6,-1,-1,4,...,-1.0,5.0,10.0,5T,-1.0,-1,10.0,3.0,10.0,-1.0
6,4,-1.0,-1.0,-1.0,-1.0,-1.0,7,10,-1.0,-1,...,3.0,-1.0,4.0,-1,14.0,-1,8T,-1.0,-1.0,3.0
7,5T,-1.0,-1,3,-1,-1.0,5,18T,-1.0,-1,...,-1.0,15T,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1
8,3,-1.0,3T,5T,-1,2.0,-1,4,5,13T,...,-1.0,7.0,-1.0,-1,-1,6T,-1,-1.0,-1.0,-1.0
9,-1.0,-1.0,7,5T,-1,-1.0,4,-1,10T,-1.0,...,5.0,10T,-1.0,-1.0,-1.0,6T,6T,-1.0,5.0,-1.0


In [15]:
top = top_20

rank_data_20 = pd.DataFrame()
    
for year in range(1998, 2024):
    print("Selected Features Top ", top, " Test year:", year)
    
    # Define the range of years for training data
    train_years = range(year - 9, year - 4)  # 5 to 9 years prior to the test year
    
    # Filter the data for training and testing
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'].isin(train_years)]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (RandomForest Regressor): {mse_rf}')
    print(f'R-squared (RandomForest Regressor): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)
    
    if rank_data_20.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_20 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_20 = pd.concat([rank_data_20.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_20)

Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 2.3706259615384617
R-squared (RandomForest Regressor): 0.6191283894539057


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13764,Gary Payton,2,PG,37.0,29.21
13740,Hakeem Olajuwon,-1,C,0.0,28.5
13894,Dikembe Mutombo,1,C,39.0,28.395
13506,Tom Chambers,-1.0,PF,0.0,6.65
13866,Sean Higgins,-1.0,SF,0.0,5.475
13889,Tim Duncan,5T,PF,4.0,5.075
13836,Michael Jordan,4,SG,6.0,4.005
13605,David Robinson,3,C,10.0,3.18
13666,Brevin Knight,11T,PG,1.0,2.04
13687,Joe Dumars,-1,SG,0.0,1.97


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 11.917808069620254
R-squared (RandomForest Regressor): 0.2983537834691955


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13182,Dikembe Mutombo,2,C,10.0,27.8
13424,Gary Payton,3,PG,6.0,27.535
13039,Hakeem Olajuwon,7T,C,1.0,27.135
13269,Alonzo Mourning,1,C,89.0,26.565
13488,Tim Duncan,5T,PF,2.0,6.325
13445,Kevin Garnett,7T,PF,1.0,6.04
13121,Bo Outlaw,-1,PF,0.0,5.995
13434,David Robinson,4,C,3.0,5.405
13087,Horace Grant,-1,PF,0.0,5.23
13450,Theo Ratliff,5T,PF,2.0,5.195


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 8.438983493589744
R-squared (RandomForest Regressor): 0.12964045377512612


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12708,Gary Payton,5T,PG,4.0,31.935
12909,Dikembe Mutombo,3T,C,11.0,29.065
12750,Hakeem Olajuwon,-1,C,0.0,25.23
12956,Alonzo Mourning,1,C,62.0,23.665
12725,Shaquille O'Neal,2,C,21.0,9.665
12841,Kevin Garnett,7,PF,2.0,9.19
12568,Dennis Rodman,-1,PF,0.0,9.115
13004,Horace Grant,-1,PF,0.0,8.92
12987,David Robinson,-1,C,0.0,7.985
12827,Scottie Pippen,8T,SF,1.0,7.42


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 7.111018367346938
R-squared (RandomForest Regressor): 0.3783769331564848


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12210,Alonzo Mourning,-1,C,0.0,29.62
12512,Dikembe Mutombo,1,C,48.0,24.935
12510,Dikembe Mutombo,1,C,48.0,24.26
12427,Gary Payton,-1,PG,0.0,23.805
12391,Hakeem Olajuwon,-1,C,0.0,17.71
12271,Tim Duncan,3,PF,14.0,11.095
12460,Kevin Garnett,2,PF,26.0,10.035
12106,David Robinson,5T,C,6.0,8.25
12411,Scottie Pippen,-1,SF,0.0,6.585
12274,Shawn Marion,11T,SF,1.0,6.4


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 15.990880372340424
R-squared (RandomForest Regressor): 0.4404354568301556


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11941,Ben Wallace,1,C,116.0,40.765
11950,Gary Payton,-1,PG,0.0,21.515
12035,Alonzo Mourning,-1,C,0.0,20.625
12003,Dikembe Mutombo,3T,C,1.0,20.32
11892,Hakeem Olajuwon,-1,C,0.0,16.185
11852,Tim Duncan,-1,PF,0.0,13.055
11936,Kevin Garnett,2,PF,2.0,5.295
11741,Jason Kidd,-1,PG,0.0,4.79
12005,Kobe Bryant,3T,SG,1.0,4.165
12041,Eddie Jones,-1,SG,0.0,4.0


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 644.9324908442984
R-squared (RandomForest Regressor): 0.09845899540591785


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11137,Ben Wallace,1,C,531.0,27.955
11177,Gary Payton,11,PG,5.0,25.4
11176,Gary Payton,11,PG,5.0,25.385
11461,Dikembe Mutombo,-1,C,0.0,22.825
11405,Tim Duncan,4,PF,90.0,13.93
11183,Kevin Garnett,3,PF,121.0,12.795
11463,Kobe Bryant,8,SG,16.0,8.445
11559,Jason Kidd,-1,PG,0.0,8.13
11494,Shaquille O'Neal,13T,C,3.0,5.35
11231,Tracy McGrady,-1.0,SG,0.0,4.875


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 581.6980411508704
R-squared (RandomForest Regressor): 0.15846991267441746


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10722,Metta World Peace,1,SF,476.0,38.215
10663,Ben Wallace,2,C,325.0,35.91
10865,Alonzo Mourning,-1,C,0.0,14.175
11127,Kevin Garnett,6,PF,36.0,14.125
10721,Gary Payton,-1,PG,0.0,13.875
11106,Dikembe Mutombo,-1,C,0.0,13.01
10818,Tim Duncan,7,PF,8.0,12.26
10764,Shaquille O'Neal,10T,C,1.0,3.965
10871,Andrei Kirilenko,5,PF,67.0,3.57
11115,Kobe Bryant,10T,SG,1.0,3.395


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 379.0443944391635
R-squared (RandomForest Regressor): 0.09070980043456556


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10455,Ben Wallace,1,C,339.0,29.835
10261,Metta World Peace,-1,SF,0.0,21.66
10240,Alonzo Mourning,-1,C,0.0,9.485
10343,Kevin Garnett,9,PF,30.0,9.445
10241,Alonzo Mourning,-1,PF,0.0,8.03
10326,Tim Duncan,4,PF,81.0,7.3
10592,Andrei Kirilenko,10,PF,25.0,7.29
10338,Shawn Marion,5,PF,57.0,6.23
10151,Manu Ginóbili,18T,SG,2.0,6.115
10276,Dikembe Mutombo,-1,C,0.0,5.805


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 528.7746736328126
R-squared (RandomForest Regressor): 0.09486922174909729


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,29.355
10029,Dikembe Mutombo,-1,C,0.0,18.97
9730,Metta World Peace,4,SF,65.0,16.945
9731,Metta World Peace,4,SF,65.0,16.79
10028,Kevin Garnett,13T,PF,1.0,14.765
10044,Tim Duncan,6,PF,42.0,12.71
9745,Alonzo Mourning,8,C,29.0,11.275
9825,Dwyane Wade,-1,SG,0.0,10.975
9863,Shawn Marion,7,PF,33.0,10.765
10009,Gary Payton,-1,PG,0.0,9.665


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 529.8015548767967
R-squared (RandomForest Regressor): 0.04491537777982069


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9344,Dikembe Mutombo,-1,C,0.0,20.91
9414,Ben Wallace,6,C,42.0,17.585
9356,Kevin Garnett,13T,PF,7.0,17.04
9438,Tim Duncan,3,C,158.0,16.55
9489,LeBron James,-1.0,SF,0.0,10.785
9499,Gary Payton,-1,PG,0.0,9.925
9407,Marcus Camby,1,C,431.0,9.105
9418,Shawn Marion,4,SF,93.0,4.585
9214,Kobe Bryant,24T,SG,1.0,3.01
9566,Metta World Peace,8,SF,20.0,2.815


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2008
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 536.1301176470588
R-squared (RandomForest Regressor): 0.10067245174379924


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8699,Chris Paul,7T,PG,24.0,63.765
8762,Marcus Camby,2,C,178.0,47.255
8571,Kobe Bryant,5,SG,40.0,29.15
8731,Kevin Garnett,1,PF,493.0,18.175
8966,LeBron James,-1.0,SF,0.0,16.235
9081,Ben Wallace,-1,C,0.0,15.595
8592,Dikembe Mutombo,-1,C,0.0,15.26
9030,Ben Wallace,-1,PF,0.0,15.1
9035,Jason Kidd,-1,PG,0.0,10.8
8949,Rasheed Wallace,12,C,9.0,10.66


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2009
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 222.08565320388342
R-squared (RandomForest Regressor): 0.6585491373886779


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8212,Dwight Howard,1,C,542.0,224.6
8484,LeBron James,2,SF,148.0,100.445
8556,Chris Paul,6,PG,49.0,64.91
8507,Dwyane Wade,3,SG,90.0,41.445
8365,Kevin Garnett,8,PF,31.0,31.59
8558,Jason Kidd,-1,PG,0.0,25.475
8483,Ben Wallace,-1,PF,0.0,21.8
8442,Metta World Peace,5,SF,54.0,17.345
8194,Rajon Rondo,10,PG,8.0,14.875
8093,Marcus Camby,16T,C,1.0,8.785


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2010
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 460.8320481933594
R-squared (RandomForest Regressor): 0.3626816479529734


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8018,Dwight Howard,1,C,576.0,134.28
7911,Raja Bell,-1,SG,0.0,71.03
7905,Raja Bell,-1,SG,0.0,67.51
8041,LeBron James,4,SF,61.0,63.905
8016,Rajon Rondo,5,PG,55.0,34.445
7564,Kobe Bryant,12,SG,9.0,29.77
7683,Tim Duncan,14,C,7.0,25.585
7895,Tayshaun Prince,-1,SF,0.0,24.485
7936,Shane Battier,15,SG,5.0,24.21
7977,Ben Wallace,17T,C,1.0,23.065


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2011
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 253.1645085332103
R-squared (RandomForest Regressor): 0.6197530152269015


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7366,Dwight Howard,1.0,C,585.0,244.815
7121,Kevin Garnett,2.0,PF,77.0,87.72
7400,Luol Deng,10.0,SF,24.0,83.85
7527,Orien Greene,-1.0,SG,0.0,75.96
7321,Rajon Rondo,5.0,PG,45.0,53.995
7523,Andrew Bogut,6.0,C,32.0,37.605
7110,Hassan Whiteside,-1.0,C,0.0,37.45
7091,Paul Pierce,-1.0,SF,0.0,22.99
7288,Dwyane Wade,16.0,SG,8.0,21.155
7186,LeBron James,9.0,SF,25.0,20.635


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2012
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 301.13720941747573
R-squared (RandomForest Regressor): 0.3345765785881084


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6510,Dwight Howard,3,C,186.0,132.845
6987,Kevin Garnett,5,C,44.0,114.89
6527,Serge Ibaka,2.0,PF,294.0,114.295
6943,Luol Deng,9.0,SF,16.0,85.785
6523,Andre Iguodala,7,SF,33.0,80.95
6484,Rajon Rondo,-1,PG,0.0,80.775
6572,LeBron James,4,SF,112.0,67.395
6779,Dwyane Wade,18T,SG,1.0,49.585
6938,Marc Gasol,12T,C,5.0,44.225
6747,Josh Smith,10,PF,9.0,38.445


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2013
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 123.96829770554494
R-squared (RandomForest Regressor): 0.4847030282779813


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6217,Marc Gasol,1.0,C,212.0,145.655
6265,Kevin Garnett,-1.0,C,0.0,124.635
6419,Tim Duncan,6.0,C,94.0,110.15
6005,Serge Ibaka,3.0,PF,122.0,86.28
5996,Dwight Howard,14.0,C,9.0,84.885
6242,Paul George,8.0,SF,57.0,73.505
6094,Mike Conley,21.0,PG,1.0,49.21
6330,Joakim Noah,4.0,C,107.0,35.055
6315,Josh Smith,-1.0,PF,0.0,34.395
6084,LeBron James,2.0,PF,149.0,27.645


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2014
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 269.03216354927
R-squared (RandomForest Regressor): 0.5902427342271346


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5847,Joakim Noah,1.0,C,555.0,261.515
5952,Paul George,7.0,SF,30.0,93.635
5805,Andre Iguodala,5,SF,47.0,62.425
5514,Tim Duncan,13T,C,5.0,60.575
5641,Chris Paul,13T,PG,5.0,51.13
5708,Kevin Garnett,-1,C,0.0,45.415
5461,Kawhi Leonard,11,SF,9.0,41.005
5813,Andrew Bogut,10.0,C,11.0,34.635
5932,Tony Mitchell,-1.0,SF,0.0,34.445
5730,Maalik Wayns,-1.0,PG,0.0,31.79


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2015
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 303.65230278260873
R-squared (RandomForest Regressor): 0.4017611364803133


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5107,Draymond Green,2,PF,317.0,107.04
5314,Kawhi Leonard,1,SF,333.0,80.285
4915,Tim Duncan,8,C,12.0,79.83
5393,Anthony Davis,4,PF,107.0,70.915
5349,Chris Paul,15T,PG,1.0,51.79
5073,Stephen Curry,-1.0,PG,0.0,45.345
5134,Rudy Gobert,5.0,C,33.0,44.715
5011,DeAndre Jordan,3.0,C,261.0,35.83
4890,Marc Gasol,10T,C,7.0,33.865
5216,Tony Allen,7.0,SG,29.0,33.75


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2016
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 627.7460339488636
R-squared (RandomForest Regressor): 0.3153493665742815


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4570,Hassan Whiteside,3,C,83.0,225.515
4426,Kawhi Leonard,1,SF,547.0,128.585
4571,Paul Millsap,5.0,PF,21.0,72.635
4597,Draymond Green,2,PF,421.0,72.515
4524,LeBron James,11T,SF,2.0,59.405
4816,Stephen Curry,-1.0,PG,0.0,58.065
4474,DeAndre Jordan,4.0,C,50.0,49.405
4820,Andre Drummond,10,C,3.0,32.985
4703,Kevin Durant,-1.0,SF,0.0,32.685
4782,Tim Duncan,-1,C,0.0,29.55


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2017
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 318.1582223247232
R-squared (RandomForest Regressor): 0.4102368371664513


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3977,Rudy Gobert,2.0,C,269.0,200.115
4147,LeBron James,5T,SF,3.0,86.215
3989,Draymond Green,1,PF,434.0,83.55
4113,Stephen Curry,-1.0,PG,0.0,69.26
4133,Chris Paul,-1,PG,0.0,63.75
4129,Anthony Davis,-1,C,0.0,42.19
4268,Kawhi Leonard,3,SF,182.0,39.01
3870,Hassan Whiteside,5T,C,3.0,36.12
3982,Andre Drummond,-1,C,0.0,28.55
4054,Dwight Howard,-1,C,0.0,28.535


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2018
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 328.1828914876033
R-squared (RandomForest Regressor): 0.25129824332094397


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3403,Rudy Gobert,1.0,C,466.0,77.465
3270,Draymond Green,6,PF,20.0,77.235
3324,Andre Drummond,15T,C,1.0,66.275
3177,Anthony Davis,3,PF,139.0,46.915
3655,Robert Covington,8.0,SF,8.0,28.045
3304,James Harden,-1.0,SG,0.0,26.62
3469,Kawhi Leonard,-1,SF,0.0,21.805
3598,Victor Oladipo,15T,SG,1.0,19.365
3595,Ben Simmons,-1.0,PG,0.0,16.735
3627,Kyle Lowry,-1,PG,0.0,16.71


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2019
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 254.96950361736336
R-squared (RandomForest Regressor): 0.4115573491384833


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2825,Rudy Gobert,1.0,C,411.0,119.07
3114,Giannis Antetokounmpo,2.0,PF,280.0,107.655
2562,Myles Turner,5.0,C,15.0,82.125
2738,Brook Lopez,-1.0,C,0.0,74.01
3148,Andre Drummond,-1,C,0.0,68.27
2671,Anthony Davis,-1,C,0.0,51.325
3099,James Harden,-1.0,PG,0.0,50.33
2876,Draymond Green,6T,PF,6.0,38.115
2702,Kawhi Leonard,6T,SF,6.0,35.88
2980,Marc Gasol,-1,C,0.0,33.205


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2020
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 206.49224953468698
R-squared (RandomForest Regressor): 0.5337104307000574


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2127,Giannis Antetokounmpo,1.0,PF,432.0,171.815
2017,Hassan Whiteside,11,C,3.0,82.055
2037,Kris Dunn,-1.0,PG,0.0,75.46
2524,Rudy Gobert,3.0,C,187.0,73.44
2270,Anthony Davis,2,PF,200.0,68.24
2466,Jonathan Isaac,-1.0,PF,0.0,56.99
2094,Brook Lopez,10.0,C,4.0,52.64
2259,LeBron James,-1,PG,0.0,38.565
2198,Kawhi Leonard,8T,SF,5.0,36.935
2324,Marc Gasol,-1,C,0.0,25.71


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2021
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 207.4031577875399
R-squared (RandomForest Regressor): 0.572142062988815


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1379,Rudy Gobert,1.0,C,464.0,325.36
1719,Myles Turner,9.0,C,3.0,128.865
1389,Henry Ellenson,-1.0,PF,0.0,62.415
1610,Spencer Dinwiddie,-1.0,SG,0.0,58.85
1554,Mike Conley,-1.0,PG,0.0,37.125
1895,Giannis Antetokounmpo,5.0,PF,12.0,34.915
1422,Draymond Green,3.0,PF,76.0,30.935
1517,LeBron James,-1.0,PG,0.0,26.735
1371,Nerlens Noel,-1.0,C,0.0,22.775
1586,Kawhi Leonard,-1.0,SF,0.0,22.02


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2022
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 215.30761237762238
R-squared (RandomForest Regressor): -0.00042767321105130485


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1022,Rudy Gobert,3.0,C,136.0,105.39
1046,Sekou Doumbouya,-1.0,PF,0.0,97.845
1184,Nikola Jokić,-1.0,C,0.0,93.105
610,Michael Porter Jr.,-1.0,SF,0.0,75.725
1026,Myles Turner,-1.0,C,0.0,47.41
992,Paul George,-1.0,PF,0.0,38.265
933,Al Horford,9.0,C,3.0,35.82
1123,Draymond Green,10.0,PF,2.0,29.975
1212,Jayson Tatum,-1.0,SF,0.0,27.16
1007,Joel Embiid,-1.0,C,0.0,24.745


---------------------------------------------------------------------
Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor']  Test year: 2023
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 319.5479682676519
R-squared (RandomForest Regressor): 0.24903623651968498


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
76,Mac McClung,-1.0,SG,0.0,64.075
59,Jaren Jackson Jr.,1.0,C,391.0,57.5
323,Brook Lopez,2.0,C,309.0,46.58
206,Nic Claxton,9T,C,7.0,42.47
208,Walker Kessler,-1.0,C,0.0,20.695
137,Evan Mobley,3.0,PF,101.0,18.57
75,Myles Turner,-1.0,C,0.0,17.48
197,Kawhi Leonard,-1,SF,0.0,12.255
396,Rudy Gobert,-1.0,C,0.0,8.42
497,Joel Embiid,9T,C,7.0,5.235


---------------------------------------------------------------------


Unnamed: 0,Rank_1998,Rank_1999,Rank_2000,Rank_2001,Rank_2002,Rank_2003,Rank_2004,Rank_2005,Rank_2006,Rank_2007,...,Rank_2014,Rank_2015,Rank_2016,Rank_2017,Rank_2018,Rank_2019,Rank_2020,Rank_2021,Rank_2022,Rank_2023
0,2,2,5T,-1,1,1,1,1,1,-1,...,1.0,2,3,2.0,1.0,1.0,1.0,1.0,3.0,-1.0
1,-1,3,3T,1,-1,11,2,-1,-1,6,...,7.0,1,1,5T,6,2.0,11,9.0,-1.0,1.0
2,1,7T,-1,1,-1,11,-1,-1,4,13T,...,5,8,5.0,1,15T,5.0,-1.0,-1.0,-1.0,2.0
3,-1.0,1,1,-1,3T,-1,6,9,4,3,...,13T,4,2,-1.0,3,-1.0,3.0,-1.0,-1.0,9T
4,-1.0,5T,2,-1,-1,4,-1,-1,13T,-1.0,...,13T,15T,11T,-1,8.0,-1,2,-1.0,-1.0,-1.0
5,5T,7T,7,3,-1,3,-1,4,6,-1,...,-1,-1.0,-1.0,-1,-1.0,-1,-1.0,5.0,-1.0,3.0
6,4,-1,-1,2,2,8,7,10,8,1,...,11,5.0,4.0,3,-1,-1.0,10.0,3.0,9.0,-1.0
7,3,4,-1,5T,-1,-1,10T,5,-1,4,...,10.0,3.0,10,5T,15T,6T,-1,-1.0,10.0,-1
8,11T,-1,-1,-1,3T,13T,5,18T,7,24T,...,-1.0,10T,-1.0,-1,-1.0,6T,8T,-1.0,-1.0,-1.0
9,-1,5T,8T,11T,-1,-1.0,10T,-1,-1,8,...,-1.0,7.0,-1,-1,-1,-1,-1,-1.0,-1.0,9T


In [16]:
top = top_30

rank_data_30 = pd.DataFrame()

for year in range(1998, 2024):
    print("Selected Features Top ", top, " Test year:", year)
    
    # Define the range of years for training data
    train_years = range(year - 9, year - 4)  # 5 to 9 years prior to the test year
    
    # Filter the data for training and testing
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'].isin(train_years)]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (RandomForest Regressor): {mse_rf}')
    print(f'R-squared (RandomForest Regressor): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_30.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_30 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_30 = pd.concat([rank_data_30.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_30)

Selected Features Top  ['D-LEBRON_bball', 'DWS_advanced', 'Dpoy_titles', 'predator_defense_raptor', 'DEF WS_nba', 'BLK_nba', 'TOV_100_poss', 'war_reg_season_raptor', 'predator_total_raptor', 'DBPM_advanced', 'ORB_opponent', 'Age', 'WINS_espn', 'war_total_raptor', 'Dpoy_votes', 'STL%_nba', 'SOS_misc', 'OPP PTS OFF TOV_nba', 'RPM_espn', 'pace_impact_raptor', 'TOV%.1_misc', 'won_dpoy_last_season', 'W_nba', 'DRPM_espn', 'PER_advanced', 'STL_nba', 'DREB_nba', 'DEF RTG_nba', 'OPP PTS PAINT_nba', 'AST_opponent']  Test year: 1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (RandomForest Regressor): 2.475951012145749
R-squared (RandomForest Regressor): 0.6022065627691036


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13764,Gary Payton,2,PG,37.0,31.605
13740,Hakeem Olajuwon,-1,C,0.0,31.17
13894,Dikembe Mutombo,1,C,39.0,31.1
13506,Tom Chambers,-1.0,PF,0.0,5.645
13889,Tim Duncan,5T,PF,4.0,4.885
13836,Michael Jordan,4,SG,6.0,4.29
13866,Sean Higgins,-1.0,SF,0.0,4.015
13605,David Robinson,3,C,10.0,3.375
13783,Mario Elie,-1,SF,0.0,2.045
13956,Charles Oakley,-1,PF,0.0,1.98


NameError: name 'rank_data_30' is not defined

In [None]:
top = top_40

rank_data_40 = pd.DataFrame()

for year in range(1998, 2024):
    print("Selected Features Top ", top, " Test year:", year)
    
    # Define the range of years for training data
    train_years = range(year - 9, year - 4)  # 5 to 9 years prior to the test year
    
    # Filter the data for training and testing
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'].isin(train_years)]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (RandomForest Regressor): {mse_rf}')
    print(f'R-squared (RandomForest Regressor): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_40.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_40 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_40 = pd.concat([rank_data_40.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_40)

In [None]:
top = top_50

rank_data_50 = pd.DataFrame()

for year in range(1998, 2024):
    print("Selected Features Top ", top, " Test year:", year)
    
    # Define the range of years for training data
    train_years = range(year - 9, year - 4)  # 5 to 9 years prior to the test year
    
    # Filter the data for training and testing
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'].isin(train_years)]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (RandomForest Regressor): {mse_rf}')
    print(f'R-squared (RandomForest Regressor): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_50.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_50 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_50 = pd.concat([rank_data_50.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_50)

In [None]:
rank_data_10.to_csv('RDF+RDF_Period_rank_data_10.csv', index=False)
rank_data_20.to_csv('RDF+RDF_Period_rank_data_20.csv', index=False)
rank_data_30.to_csv('RDF+RDF_Period_rank_data_30.csv', index=False)
rank_data_40.to_csv('RDF+RDF_Period_rank_data_40.csv', index=False)
rank_data_50.to_csv('RDF+RDF_Period_rank_data_50.csv', index=False)