In [59]:
import pandas as pd

from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

In [60]:
F_selection = pd.read_csv("F_Selection/top_N_features_Mutual_Information_ScoreAll.csv")

# Extract top 10, 20, 30, and 50 features
top_10 = F_selection["Feature"].head(10).tolist()
top_20 = F_selection["Feature"].head(20).tolist()
top_30 = F_selection["Feature"].head(30).tolist()
top_40 = F_selection["Feature"].head(40).tolist()
top_50 = F_selection["Feature"].head(50).tolist()

In [61]:
def Data_Organizer(raw_Data,top):
    
    All_players_dataSet = raw_Data.copy()
    
    All_players_dataSet = All_players_dataSet.drop(columns=All_players_dataSet.columns[All_players_dataSet.columns.str.contains('Unnamed:')])
    All_players_dataSet.reset_index(drop=True, inplace=True)
    
    All_players_dataSet.fillna(0, inplace=True)
    All_players_dataSet.replace('', 0, inplace=True)
    All_players_dataSet.replace('--', 0, inplace=True)
    
    # check if any NaN,empty Strings exists in the dataframe
    any_missing_values = All_players_dataSet.isna().any().any()
    any_empty_values = (All_players_dataSet.applymap(lambda x: x == '')).any().any()
    
    if any_missing_values or any_empty_values:
        print("DataFrame contains missing values or empty strings/spaces.")
    else:
        print("DataFrame does not contain missing values or empty strings/spaces.")
        
    ## seperating our dataFrame
    Y = All_players_dataSet['Points_won']  # Target
    X = All_players_dataSet.drop('Points_won', axis=1)  # Features
    
    X = X.apply(pd.to_numeric, errors='coerce')

    ## keep the specified columns
    columns_to_keep = top
    
    ## keep only wanted columns
    X = X[columns_to_keep]

    return X, Y

In [62]:
all_players_w_add_sorted = pd.read_csv("all_players_w_add_sorted.csv")

  all_players_w_add_sorted = pd.read_csv("all_players_w_add_sorted.csv")


In [63]:
top = top_10

rank_data_10 = pd.DataFrame()

for year in range(1994,2024):
    
    print("Selected Features Top ",10," Test year : ", year)
    
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_rf}')
    print(f'R-squared (XGBoost): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)
    
    if rank_data_10.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_10 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_10 = pd.concat([rank_data_10.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)
        
    print("---------------------------------------------------------------------")
    
display(rank_data_10)

Selected Features Top  10  Test year :  1994
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 3.144670495495496
R-squared (XGBoost): 0.1486508487704119


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15651,Hakeem Olajuwon,1,C,23.0,29.46
15539,David Robinson,2,C,22.0,26.3
15643,John Stockton,-1.0,PG,0.0,17.225
15739,Patrick Ewing,-1,C,0.0,14.11
15677,Nate McMillan,7T,PG,3.0,13.13
15741,Tracy Moore,-1.0,SG,0.0,12.63
15646,Dell Demps,-1.0,PG,0.0,10.585
15616,Shaquille O'Neal,-1.0,C,0.0,6.135
15477,Scottie Pippen,4,SF,11.0,5.225
15649,Charles Oakley,9,PF,2.0,3.705


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1995
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.014223662790698
R-squared (XGBoost): 0.044077043208789246


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15113,David Robinson,4,C,12.0,33.01
15264,John Stockton,-1.0,PG,0.0,17.375
15199,Scottie Pippen,2,SF,16.0,13.25
15051,Patrick Ewing,-1,C,0.0,10.265
15026,Dikembe Mutombo,1,C,45.0,5.83
14994,Hakeem Olajuwon,3,C,13.0,5.62
15151,Gary Payton,6T,PG,2.0,5.505
15249,Nate McMillan,6T,PG,2.0,5.045
15077,Mookie Blaylock,-1,PG,0.0,4.895
15045,Charles Oakley,-1,PF,0.0,4.835


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1996
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.147749846625766
R-squared (XGBoost): 0.18061002032511264


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14887,Dikembe Mutombo,3,C,11.0,22.585
14909,Dennis Rodman,7,PF,4.0,18.67
14897,David Robinson,4,C,9.0,18.32
14619,Michael Jordan,6,SG,7.0,16.065
14838,Charles Oakley,-1,PF,0.0,14.85
14949,Patrick Ewing,-1,C,0.0,10.9
14749,Scottie Pippen,2,SF,15.0,10.51
14844,Gary Payton,1,PG,56.0,10.5
14797,Hakeem Olajuwon,5,C,8.0,8.77
14555,Horace Grant,8T,PF,1.0,4.455


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1997
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 7.407246966731899
R-squared (XGBoost): 0.16867912100003224


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14269,Dennis Rodman,-1,PF,0.0,19.72
14333,Mookie Blaylock,3,PG,18.0,13.4
14081,Michael Jordan,5,SG,3.0,12.77
14164,Scottie Pippen,4,SF,4.0,12.51
14210,Hakeem Olajuwon,-1,C,0.0,9.95
14149,Gary Payton,2,PG,25.0,9.64
14279,Horace Grant,6T,PF,1.0,8.41
14489,Dikembe Mutombo,1,C,60.0,8.41
14189,Charles Oakley,-1,PF,0.0,6.06
14277,David Robinson,-1,C,0.0,5.595


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 3.0879892206477733
R-squared (XGBoost): 0.5038747373483474


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13836,Michael Jordan,4,SG,6.0,16.1
13764,Gary Payton,2,PG,37.0,14.09
13605,David Robinson,3,C,10.0,14.04
13894,Dikembe Mutombo,1,C,39.0,13.77
13637,Dennis Rodman,5T,PF,4.0,11.695
13889,Tim Duncan,5T,PF,4.0,7.805
13744,Bo Outlaw,5T,PF,4.0,7.745
13815,Mookie Blaylock,5T,PG,4.0,7.57
13842,Scottie Pippen,9T,SF,2.0,6.925
13956,Charles Oakley,-1,PF,0.0,5.945


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 17.187988238396624
R-squared (XGBoost): -0.011921558628627205


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13406,Dennis Rodman,-1,PF,0.0,14.415
13121,Bo Outlaw,-1,PF,0.0,13.81
13050,Charles Oakley,-1,PF,0.0,13.615
13087,Horace Grant,-1,PF,0.0,12.985
13434,David Robinson,4,C,3.0,10.465
13424,Gary Payton,3,PG,6.0,9.12
13039,Hakeem Olajuwon,7T,C,1.0,6.9
13182,Dikembe Mutombo,2,C,10.0,6.6
13397,Scottie Pippen,7T,SF,1.0,6.27
13488,Tim Duncan,5T,PF,2.0,5.465


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 10.558752831196582
R-squared (XGBoost): -0.08898320868165244


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12909,Dikembe Mutombo,3T,C,11.0,31.2
12837,Anthony Mason,8T,SF,1.0,25.695
12725,Shaquille O'Neal,2,C,21.0,24.075
12708,Gary Payton,5T,PG,4.0,24.045
12987,David Robinson,-1,C,0.0,21.955
12830,Tim Duncan,-1,PF,0.0,13.24
12989,Clifford Robinson,8T,SF,1.0,13.0
12956,Alonzo Mourning,1,C,62.0,12.33
12841,Kevin Garnett,7,PF,2.0,10.955
12827,Scottie Pippen,8T,SF,1.0,10.815


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 12.19856581632653
R-squared (XGBoost): -0.06636061139390459


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12354,Anthony Mason,8T,PF,2.0,28.04
12106,David Robinson,5T,C,6.0,22.64
12271,Tim Duncan,3,PF,14.0,14.95
12113,Theo Ratliff,7,C,4.0,13.42
12427,Gary Payton,-1,PG,0.0,11.265
12460,Kevin Garnett,2,PF,26.0,10.655
12383,Bo Outlaw,-1,PF,0.0,10.035
12403,Shaquille O'Neal,8T,C,2.0,9.685
12492,Kobe Bryant,11T,SG,1.0,9.635
12488,Eddie Jones,-1,SG,0.0,9.11


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 24.946818829787244
R-squared (XGBoost): 0.12704272954374685


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12003,Dikembe Mutombo,3T,C,1.0,34.94
11941,Ben Wallace,1,C,116.0,19.35
11936,Kevin Garnett,2,PF,2.0,16.715
11852,Tim Duncan,-1,PF,0.0,14.085
12041,Eddie Jones,-1,SG,0.0,12.655
11927,Shaquille O'Neal,-1,C,0.0,11.785
12035,Alonzo Mourning,-1,C,0.0,10.595
11741,Jason Kidd,-1,PG,0.0,10.405
11862,David Robinson,-1,C,0.0,10.28
11950,Gary Payton,-1,PG,0.0,9.97


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 548.6691073464913
R-squared (XGBoost): 0.2330240680240231


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11137,Ben Wallace,1,C,531.0,71.645
11176,Gary Payton,11,PG,5.0,28.795
11461,Dikembe Mutombo,-1,C,0.0,24.82
11405,Tim Duncan,4,PF,90.0,24.41
11177,Gary Payton,11,PG,5.0,20.225
11183,Kevin Garnett,3,PF,121.0,12.58
11559,Jason Kidd,-1,PG,0.0,12.435
11463,Kobe Bryant,8,SG,16.0,11.885
11427,Eddie Jones,-1,SG,0.0,9.26
11412,Shawn Marion,13T,SF,3.0,8.805


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 474.3809750967118
R-squared (XGBoost): 0.31372321177338813


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10663,Ben Wallace,2,C,325.0,285.91
11127,Kevin Garnett,6,PF,36.0,142.635
10818,Tim Duncan,7,PF,8.0,34.97
10722,Metta World Peace,1,SF,476.0,21.31
10721,Gary Payton,-1,PG,0.0,14.75
11110,James Posey,10T,SG,1.0,6.625
10764,Shaquille O'Neal,10T,C,1.0,6.31
10852,Bruce Bowen,4,SF,76.0,6.2
10871,Andrei Kirilenko,5,PF,67.0,6.05
11106,Dikembe Mutombo,-1,C,0.0,5.72


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 382.6698259505703
R-squared (XGBoost): 0.08201274702636108


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10338,Shawn Marion,5,PF,57.0,42.175
10097,Theo Ratliff,-1,C,0.0,37.15
10202,Eddie Jones,-1,SF,0.0,32.245
10592,Andrei Kirilenko,10,PF,25.0,26.885
10276,Dikembe Mutombo,-1,C,0.0,22.33
10455,Ben Wallace,1,C,339.0,22.305
10343,Kevin Garnett,9,PF,30.0,20.16
10153,Dirk Nowitzki,-1.0,PF,0.0,15.905
10151,Manu Ginóbili,18T,SG,2.0,12.515
10326,Tim Duncan,4,PF,81.0,12.125


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 359.26824301757813
R-squared (XGBoost): 0.38502208857841946


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,138.895
9704,Marcus Camby,5,C,55.0,45.3
10044,Tim Duncan,6,PF,42.0,38.75
9863,Shawn Marion,7,PF,33.0,38.32
9691,Andrei Kirilenko,3,SF,121.0,28.845
9860,Allen Iverson,-1,PG,0.0,28.345
10037,Gerald Wallace,9,SF,27.0,22.41
10019,Shane Battier,10T,SF,3.0,22.08
10028,Kevin Garnett,13T,PF,1.0,19.665
9726,Rasheed Wallace,13T,PF,1.0,19.085


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 459.15927340862424
R-squared (XGBoost): 0.1722637331928032


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9414,Ben Wallace,6,C,42.0,136.825
9339,Bruce Bowen,2,SF,206.0,39.315
9438,Tim Duncan,3,C,158.0,34.43
9356,Kevin Garnett,13T,PF,7.0,33.62
9407,Marcus Camby,1,C,431.0,31.245
9418,Shawn Marion,4,SF,93.0,30.065
9566,Metta World Peace,8,SF,20.0,24.715
9344,Dikembe Mutombo,-1,C,0.0,19.25
9559,Emeka Okafor,11T,C,10.0,16.925
9203,Manu Ginóbili,-1,SG,0.0,15.89


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2008
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 494.36943723908917
R-squared (XGBoost): 0.1707235999419946


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8949,Rasheed Wallace,12,C,9.0,168.99
8630,Josh Smith,6,PF,34.0,86.245
8588,Shawn Marion,-1,PF,0.0,81.22
8731,Kevin Garnett,1,PF,493.0,71.22
8762,Marcus Camby,2,C,178.0,68.0
9030,Ben Wallace,-1,PF,0.0,56.02
8659,Metta World Peace,16T,SF,1.0,47.33
8573,Tim Duncan,9,C,22.0,47.2
8568,Shawn Marion,-1,PF,0.0,38.51
9081,Ben Wallace,-1,C,0.0,31.46


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2009
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 678.9920866990291
R-squared (XGBoost): -0.0439325114657656


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8365,Kevin Garnett,8,PF,31.0,303.8
8558,Jason Kidd,-1,PG,0.0,68.9
8556,Chris Paul,6,PG,49.0,57.835
8212,Dwight Howard,1,C,542.0,55.055
8352,Emeka Okafor,-1,C,0.0,44.415
8442,Metta World Peace,5,SF,54.0,26.72
8426,Kobe Bryant,7,SG,43.0,26.42
8254,Gerald Wallace,-1,SF,0.0,25.035
8255,Joel Przybilla,16T,C,1.0,24.525
8274,Tayshaun Prince,-1,SF,0.0,21.42


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2010
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 386.03374667968745
R-squared (XGBoost): 0.46612569105608714


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8018,Dwight Howard,1,C,576.0,200.695
7683,Tim Duncan,14,C,7.0,150.365
7939,Kevin Garnett,-1,PF,0.0,109.62
8016,Rajon Rondo,5,PG,55.0,33.54
7799,Marcus Camby,10T,C,13.0,33.24
8041,LeBron James,4,SF,61.0,31.975
7544,Dwyane Wade,10T,SG,13.0,31.72
8050,Metta World Peace,6,SF,29.0,30.995
7797,Marcus Camby,10T,PF,13.0,29.135
7713,Jason Kidd,-1,PG,0.0,28.325


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2011
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 240.51575498154978
R-squared (XGBoost): 0.6387511379377948


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7366,Dwight Howard,1,C,585.0,246.225
7121,Kevin Garnett,2,PF,77.0,98.38
7508,Josh Smith,25,PF,1.0,86.7
7523,Andrew Bogut,6.0,C,32.0,73.87
7321,Rajon Rondo,5,PG,45.0,54.295
7391,Tyson Chandler,3,C,70.0,50.735
7258,Tim Duncan,17,C,7.0,39.53
7231,Chris Paul,12T,PG,13.0,35.51
7288,Dwyane Wade,16,SG,8.0,20.54
7186,LeBron James,9,SF,25.0,20.115


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2012
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 392.3181282038834
R-squared (XGBoost): 0.1330939419398517


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6987,Kevin Garnett,5,C,44.0,64.095
6510,Dwight Howard,3,C,186.0,53.43
6572,LeBron James,4,SF,112.0,38.25
6938,Marc Gasol,12T,C,5.0,34.08
6511,Chris Paul,12T,PG,5.0,28.275
6916,Tim Duncan,-1,C,0.0,26.53
6747,Josh Smith,10,PF,9.0,25.75
6779,Dwyane Wade,18T,SG,1.0,23.965
6523,Andre Iguodala,7,SF,33.0,21.365
6487,Elton Brand,-1,C,0.0,15.64


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2013
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 201.59292194072657
R-squared (XGBoost): 0.16204203720381272


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6315,Josh Smith,-1.0,PF,0.0,113.655
5996,Dwight Howard,14.0,C,9.0,111.82
6419,Tim Duncan,6.0,C,94.0,69.21
6084,LeBron James,2.0,PF,149.0,66.87
6124,Kirk Hinrich,-1.0,PG,0.0,60.99
6265,Kevin Garnett,-1.0,C,0.0,52.83
6373,Tony Allen,5.0,SG,102.0,43.75
6094,Mike Conley,21.0,PG,1.0,41.155
6291,Chris Paul,11.0,PG,19.0,36.615
6002,Andre Iguodala,9.0,SG,43.0,32.2


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2014
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 333.80960296532845
R-squared (XGBoost): 0.4915815700424646


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5847,Joakim Noah,1.0,C,555.0,197.73
5813,Andrew Bogut,10.0,C,11.0,123.69
5641,Chris Paul,13T,PG,5.0,77.29
5804,Serge Ibaka,4.0,PF,79.0,54.74
5514,Tim Duncan,13T,C,5.0,45.075
5461,Kawhi Leonard,11,SF,9.0,43.58
5948,Roy Hibbert,2.0,C,166.0,41.63
5640,Kirk Hinrich,-1,PG,0.0,37.155
5952,Paul George,7.0,SF,30.0,31.61
5754,Dwight Howard,8T,C,25.0,22.325


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2015
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 389.3468613478261
R-squared (XGBoost): 0.23293048755689916


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4915,Tim Duncan,8,C,12.0,84.725
5314,Kawhi Leonard,1,SF,333.0,66.455
5393,Anthony Davis,4,PF,107.0,64.855
4890,Marc Gasol,10T,C,7.0,52.345
5076,Nerlens Noel,15T,C,1.0,51.305
5169,Larry Sanders,-1.0,C,0.0,48.72
5107,Draymond Green,2,PF,317.0,46.915
5216,Tony Allen,7.0,SG,29.0,46.46
4856,Roy Hibbert,-1.0,C,0.0,44.185
4860,Trevor Ariza,13T,SF,3.0,42.025


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2016
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 358.87955970643947
R-squared (XGBoost): 0.6085883389323458


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4597,Draymond Green,2,PF,421.0,239.41
4426,Kawhi Leonard,1,SF,547.0,172.54
4782,Tim Duncan,-1,C,0.0,68.585
4626,Chuck Hayes,-1,C,0.0,50.085
4474,DeAndre Jordan,4.0,C,50.0,46.415
4362,Chris Paul,13T,PG,1.0,41.445
4570,Hassan Whiteside,3,C,83.0,34.795
4648,Thabo Sefolosha,-1.0,SF,0.0,33.065
4524,LeBron James,11T,SF,2.0,31.19
4354,Kevin Garnett,-1,PF,0.0,27.015


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2017
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 86.87569972324722
R-squared (XGBoost): 0.8389603541665925


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3989,Draymond Green,1,PF,434.0,360.515
3977,Rudy Gobert,2.0,C,269.0,355.615
4129,Anthony Davis,-1,C,0.0,60.35
4054,Dwight Howard,-1,C,0.0,54.905
4268,Kawhi Leonard,3,SF,182.0,53.21
4143,Robert Covington,4.0,SF,4.0,52.885
4127,Michael Kidd-Gilchrist,-1.0,SF,0.0,43.84
3970,Andre Roberson,5T,SF,3.0,41.99
4154,DeAndre Jordan,-1.0,C,0.0,38.8
3988,Russell Westbrook,-1,PG,0.0,29.415


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2018
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 374.22384570247937
R-squared (XGBoost): 0.1462624715181966


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3312,Dwight Howard,-1,C,0.0,62.17
3655,Robert Covington,8.0,SF,8.0,47.14
3583,Clint Capela,14.0,C,2.0,46.84
3403,Rudy Gobert,1.0,C,466.0,46.68
3270,Draymond Green,6,PF,20.0,34.19
3177,Anthony Davis,3,PF,139.0,34.075
3650,Joakim Noah,-1.0,C,0.0,33.515
3339,Luol Deng,-1.0,SF,0.0,32.085
3598,Victor Oladipo,15T,SG,1.0,31.735
3324,Andre Drummond,15T,C,1.0,29.155


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2019
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 235.40250831993566
R-squared (XGBoost): 0.4567159050397117


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2932,Paul George,3.0,SF,150.0,216.505
2825,Rudy Gobert,1.0,C,411.0,152.205
3139,Russell Westbrook,-1,PG,0.0,51.165
2980,Marc Gasol,-1,C,0.0,45.715
2876,Draymond Green,6T,PF,6.0,45.065
3120,Hassan Whiteside,-1,C,0.0,37.605
2979,Marc Gasol,-1,C,0.0,34.96
3114,Giannis Antetokounmpo,2.0,PF,280.0,30.055
2963,Joel Embiid,4,C,26.0,22.405
3148,Andre Drummond,-1,C,0.0,21.46


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2020
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 305.05983472927244
R-squared (XGBoost): 0.3111304697044849


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2524,Rudy Gobert,3.0,C,187.0,169.28
2270,Anthony Davis,2,PF,200.0,114.825
2198,Kawhi Leonard,8T,SF,5.0,26.995
2259,LeBron James,-1,PG,0.0,23.245
2094,Brook Lopez,10.0,C,4.0,23.015
2127,Giannis Antetokounmpo,1.0,PF,432.0,20.53
2314,Robert Covington,-1.0,PF,0.0,17.795
2311,Robert Covington,-1.0,C,0.0,17.755
2340,Joel Embiid,-1,C,0.0,16.475
2206,Ben Simmons,4.0,PG,32.0,14.485


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2021
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 176.54042180511186
R-squared (XGBoost): 0.635809688346233


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1379,Rudy Gobert,1.0,C,464.0,280.03
1719,Myles Turner,9.0,C,3.0,49.055
1422,Draymond Green,3.0,PF,76.0,46.685
1716,Clint Capela,6.0,C,10.0,45.05
1399,Joel Embiid,7.0,C,7.0,35.785
1788,Bam Adebayo,4.0,C,31.0,32.855
1785,Ben Simmons,2.0,PG,287.0,20.695
1820,Russell Westbrook,-1.0,PG,0.0,19.045
1402,Marc Gasol,-1.0,C,0.0,10.67
1895,Giannis Antetokounmpo,5.0,PF,12.0,8.985


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2022
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 182.43854297202796
R-squared (XGBoost): 0.15229858789474982


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1022,Rudy Gobert,3.0,C,136.0,170.59
1184,Nikola Jokić,-1.0,C,0.0,100.915
1007,Joel Embiid,-1.0,C,0.0,46.085
1123,Draymond Green,10.0,PF,2.0,44.44
1178,Matisse Thybulle,11.0,SG,1.0,41.33
713,Marcus Smart,1.0,PG,257.0,31.365
933,Al Horford,9.0,C,3.0,22.385
1306,Bam Adebayo,4.0,C,128.0,20.9
981,Robert Williams,7.0,C,8.0,15.045
678,Chris Paul,-1.0,PG,0.0,14.705


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2023
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 384.2328073891626
R-squared (XGBoost): 0.09702159380375519


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
497,Joel Embiid,9T,C,7.0,61.605
176,Giannis Antetokounmpo,6.0,PF,14.0,41.735
524,Bam Adebayo,5.0,C,18.0,34.475
408,Jarrett Allen,-1,C,0.0,34.03
137,Evan Mobley,3.0,PF,101.0,33.445
323,Brook Lopez,2.0,C,309.0,23.205
301,Anthony Davis,-1,C,0.0,22.515
396,Rudy Gobert,-1.0,C,0.0,21.41
206,Nic Claxton,9T,C,7.0,20.315
59,Jaren Jackson Jr.,1.0,C,391.0,16.44


---------------------------------------------------------------------


Unnamed: 0,Rank_1994,Rank_1995,Rank_1996,Rank_1997,Rank_1998,Rank_1999,Rank_2000,Rank_2001,Rank_2002,Rank_2003,...,Rank_2014,Rank_2015,Rank_2016,Rank_2017,Rank_2018,Rank_2019,Rank_2020,Rank_2021,Rank_2022,Rank_2023
0,1,4,3,-1,4,-1,3T,8T,3T,1,...,1.0,8,2,1,-1,3.0,3.0,1.0,3.0,9T
1,2,-1.0,7,3,2,-1,8T,5T,1,11,...,10.0,1,1,2.0,8.0,1.0,2,9.0,-1.0,6.0
2,-1.0,2,4,5,3,-1,2,3,2,-1,...,13T,4,-1,-1,14.0,-1,8T,3.0,-1.0,5.0
3,-1,-1,6,4,1,-1,5T,7,-1,4,...,4.0,10T,-1,-1,1.0,-1,-1,6.0,10.0,-1
4,7T,1,-1,-1,5T,4,-1,-1,-1,11,...,13T,15T,4.0,3,6,6T,10.0,7.0,11.0,3.0
5,-1.0,3,-1,2,5T,3,-1,2,-1,3,...,11,-1.0,13T,4.0,3,-1,1.0,4.0,1.0,2.0
6,-1.0,6T,2,6T,5T,7T,8T,-1,-1,-1,...,2.0,2,3,-1.0,-1.0,-1,-1.0,2.0,9.0,-1
7,-1.0,6T,1,1,5T,2,1,8T,-1,8,...,-1,7.0,-1.0,5T,-1.0,2.0,-1.0,-1.0,4.0,-1.0
8,4,-1,5,-1,9T,7T,7,11T,-1,-1,...,7.0,-1.0,11T,-1.0,15T,4,-1,-1.0,7.0,9T
9,9,-1,8T,-1,-1,5T,8T,-1,-1,13T,...,8T,13T,-1,-1,15T,-1,4.0,5.0,-1.0,1.0


In [64]:
top = top_20

rank_data_20 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",20," Test year : ", year)
    
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_rf}')
    print(f'R-squared (XGBoost): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)
    
    if rank_data_20.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_20 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_20 = pd.concat([rank_data_20.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_20)

Selected Features Top  20  Test year :  1994
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 3.7304938063063062
R-squared (XGBoost): -0.009947700471455656


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15539,David Robinson,2,C,22.0,32.215
15651,Hakeem Olajuwon,1,C,23.0,30.255
15613,Karl Malone,-1.0,PF,0.0,15.73
15710,Latrell Sprewell,-1.0,SG,0.0,14.325
15741,Tracy Moore,-1.0,SG,0.0,11.54
15643,John Stockton,-1.0,PG,0.0,10.845
15646,Dell Demps,-1.0,PG,0.0,10.68
15739,Patrick Ewing,-1,C,0.0,9.765
15616,Shaquille O'Neal,-1.0,C,0.0,9.405
15677,Nate McMillan,7T,PG,3.0,7.465


---------------------------------------------------------------------
Selected Features Top  20  Test year :  1995
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 5.547170523255815
R-squared (XGBoost): 0.11831219693029804


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15113,David Robinson,4,C,12.0,26.325
15264,John Stockton,-1.0,PG,0.0,17.19
15199,Scottie Pippen,2,SF,16.0,10.84
15051,Patrick Ewing,-1,C,0.0,7.12
14994,Hakeem Olajuwon,3,C,13.0,5.925
15151,Gary Payton,6T,PG,2.0,5.735
15026,Dikembe Mutombo,1,C,45.0,5.285
15045,Charles Oakley,-1,PF,0.0,4.685
15121,Dennis Rodman,5,PF,11.0,4.605
15077,Mookie Blaylock,-1,PG,0.0,4.505


---------------------------------------------------------------------
Selected Features Top  20  Test year :  1996
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 4.979816513292434
R-squared (XGBoost): 0.3362755718092666


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14619,Michael Jordan,6.0,SG,7.0,19.445
14897,David Robinson,4.0,C,9.0,18.86
14887,Dikembe Mutombo,3.0,C,11.0,15.785
14844,Gary Payton,1.0,PG,56.0,14.41
14909,Dennis Rodman,7.0,PF,4.0,14.13
14838,Charles Oakley,-1.0,PF,0.0,10.425
14949,Patrick Ewing,-1.0,C,0.0,10.115
14797,Hakeem Olajuwon,5.0,C,8.0,9.8
14749,Scottie Pippen,2.0,SF,15.0,9.36
14830,Karl Malone,-1.0,PF,0.0,4.305


---------------------------------------------------------------------
Selected Features Top  20  Test year :  1997
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.634055528375734
R-squared (XGBoost): 0.25545497565377395


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14149,Gary Payton,2,PG,25.0,20.915
14081,Michael Jordan,5,SG,3.0,16.24
14164,Scottie Pippen,4,SF,4.0,13.05
14333,Mookie Blaylock,3,PG,18.0,12.79
14269,Dennis Rodman,-1,PF,0.0,11.18
14210,Hakeem Olajuwon,-1,C,0.0,10.235
14489,Dikembe Mutombo,1,C,60.0,8.43
14279,Horace Grant,6T,PF,1.0,8.04
14277,David Robinson,-1,C,0.0,5.905
14189,Charles Oakley,-1,PF,0.0,4.455


---------------------------------------------------------------------
Selected Features Top  20  Test year :  1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 4.977903137651822
R-squared (XGBoost): 0.20023571160525955


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13836,Michael Jordan,4,SG,6.0,29.95
13889,Tim Duncan,5T,PF,4.0,15.145
13764,Gary Payton,2,PG,37.0,13.895
13605,David Robinson,3,C,10.0,8.86
13815,Mookie Blaylock,5T,PG,4.0,6.43
13637,Dennis Rodman,5T,PF,4.0,6.355
13705,Shawn Kemp,-1,PF,0.0,5.855
13894,Dikembe Mutombo,1,C,39.0,5.495
13601,Kevin Garnett,11T,PF,1.0,4.815
13842,Scottie Pippen,9T,SF,2.0,4.285


---------------------------------------------------------------------
Selected Features Top  20  Test year :  1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 13.259234440928273
R-squared (XGBoost): 0.21937896421680703


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13488,Tim Duncan,5T,PF,2.0,13.79
13269,Alonzo Mourning,1,C,89.0,13.415
13406,Dennis Rodman,-1,PF,0.0,10.25
13434,David Robinson,4,C,3.0,8.765
13050,Charles Oakley,-1,PF,0.0,7.985
13121,Bo Outlaw,-1,PF,0.0,7.885
13087,Horace Grant,-1,PF,0.0,7.625
13424,Gary Payton,3,PG,6.0,7.595
13397,Scottie Pippen,7T,SF,1.0,6.455
13182,Dikembe Mutombo,2,C,10.0,6.14


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 11.349825053418803
R-squared (XGBoost): -0.17057090948559783


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12725,Shaquille O'Neal,2,C,21.0,28.68
12708,Gary Payton,5T,PG,4.0,23.325
12830,Tim Duncan,-1,PF,0.0,21.96
12987,David Robinson,-1,C,0.0,20.985
12837,Anthony Mason,8T,SF,1.0,19.505
12841,Kevin Garnett,7,PF,2.0,18.2
12826,Anfernee Hardaway,-1.0,SG,0.0,17.215
12909,Dikembe Mutombo,3T,C,11.0,16.445
12827,Scottie Pippen,8T,SF,1.0,9.51
12956,Alonzo Mourning,1,C,62.0,8.225


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 12.027979234693879
R-squared (XGBoost): -0.05144846399687797


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12271,Tim Duncan,3,PF,14.0,36.43
12106,David Robinson,5T,C,6.0,25.15
12354,Anthony Mason,8T,PF,2.0,21.5
12427,Gary Payton,-1,PG,0.0,15.135
12460,Kevin Garnett,2,PF,26.0,11.465
12113,Theo Ratliff,7,C,4.0,10.72
12156,Jason Kidd,11T,PG,1.0,9.305
12274,Shawn Marion,11T,SF,1.0,7.655
12492,Kobe Bryant,11T,SG,1.0,7.16
12403,Shaquille O'Neal,8T,C,2.0,6.755


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 28.23195553191489
R-squared (XGBoost): 0.012086831233346307


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12003,Dikembe Mutombo,3T,C,1.0,34.14
11852,Tim Duncan,-1,PF,0.0,28.115
11936,Kevin Garnett,2,PF,2.0,19.055
12041,Eddie Jones,-1,SG,0.0,17.045
11941,Ben Wallace,1,C,116.0,15.33
11927,Shaquille O'Neal,-1,C,0.0,12.665
11950,Gary Payton,-1,PG,0.0,11.17
12005,Kobe Bryant,3T,SG,1.0,8.335
11831,Anthony Mason,-1,PF,0.0,7.865
12035,Alonzo Mourning,-1,C,0.0,7.78


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 551.7724252741227
R-squared (XGBoost): 0.22868598860986744


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11137,Ben Wallace,1,C,531.0,69.68
11176,Gary Payton,11,PG,5.0,28.395
11461,Dikembe Mutombo,-1,C,0.0,24.82
11405,Tim Duncan,4,PF,90.0,24.06
11177,Gary Payton,11,PG,5.0,19.975
11183,Kevin Garnett,3,PF,121.0,15.73
11559,Jason Kidd,-1,PG,0.0,11.695
11463,Kobe Bryant,8,SG,16.0,10.545
11412,Shawn Marion,13T,SF,3.0,8.63
11411,Allen Iverson,6,SG,29.0,8.625


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 513.9914392166344
R-squared (XGBoost): 0.25641960238887607


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10663,Ben Wallace,2,C,325.0,238.11
11127,Kevin Garnett,6,PF,36.0,163.855
10818,Tim Duncan,7,PF,8.0,79.435
10852,Bruce Bowen,4,SF,76.0,20.405
10722,Metta World Peace,1,SF,476.0,13.66
10721,Gary Payton,-1,PG,0.0,12.14
11110,James Posey,10T,SG,1.0,9.24
10990,Jason Kidd,-1,PG,0.0,8.98
10871,Andrei Kirilenko,5,PF,67.0,7.4
11009,Bo Outlaw,-1,PF,0.0,5.91


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 367.6891176806083
R-squared (XGBoost): 0.1179499918775353


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10326,Tim Duncan,4,PF,81.0,37.34
10097,Theo Ratliff,-1,C,0.0,35.655
10455,Ben Wallace,1,C,339.0,25.93
10343,Kevin Garnett,9,PF,30.0,19.2
10202,Eddie Jones,-1,SF,0.0,17.765
10352,Tyson Chandler,18T,C,2.0,14.5
10592,Andrei Kirilenko,10,PF,25.0,14.47
10261,Metta World Peace,-1,SF,0.0,12.93
10223,Rasheed Wallace,16T,PF,3.0,11.255
10338,Shawn Marion,5,PF,57.0,10.94


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 362.2279070800782
R-squared (XGBoost): 0.3799558795297763


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,133.825
10044,Tim Duncan,6,PF,42.0,70.0
9704,Marcus Camby,5,C,55.0,56.08
9863,Shawn Marion,7,PF,33.0,33.47
9731,Metta World Peace,4,SF,65.0,28.16
10019,Shane Battier,10T,SF,3.0,27.34
10037,Gerald Wallace,9,SF,27.0,25.905
10028,Kevin Garnett,13T,PF,1.0,23.85
9691,Andrei Kirilenko,3,SF,121.0,23.47
9730,Metta World Peace,4,SF,65.0,23.36


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 389.76861622176597
R-squared (XGBoost): 0.297355758678331


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9414,Ben Wallace,6,C,42.0,103.365
9438,Tim Duncan,3,C,158.0,82.41
9339,Bruce Bowen,2,SF,206.0,74.95
9407,Marcus Camby,1,C,431.0,38.905
9418,Shawn Marion,4,SF,93.0,32.515
9356,Kevin Garnett,13T,PF,7.0,31.155
9355,Shane Battier,5,SF,86.0,25.985
9526,Kirk Hinrich,13T,PG,7.0,16.62
9566,Metta World Peace,8,SF,20.0,14.675
9344,Dikembe Mutombo,-1,C,0.0,12.59


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2008
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 421.06026954459196
R-squared (XGBoost): 0.29369552760899564


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8731,Kevin Garnett,1,PF,493.0,88.97
8630,Josh Smith,6,PF,34.0,64.365
8949,Rasheed Wallace,12,C,9.0,63.32
8588,Shawn Marion,-1,PF,0.0,53.49
8573,Tim Duncan,9,C,22.0,42.46
9030,Ben Wallace,-1,PF,0.0,38.74
8762,Marcus Camby,2,C,178.0,34.23
8568,Shawn Marion,-1,PF,0.0,30.225
8722,Shane Battier,3,SF,175.0,28.625
8659,Metta World Peace,16T,SF,1.0,25.935


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2009
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 612.9828516504855
R-squared (XGBoost): 0.0575549548450236


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8365,Kevin Garnett,8,PF,31.0,282.095
8484,LeBron James,2,SF,148.0,77.92
8212,Dwight Howard,1,C,542.0,60.885
8556,Chris Paul,6,PG,49.0,51.28
8558,Jason Kidd,-1,PG,0.0,39.08
8254,Gerald Wallace,-1,SF,0.0,34.31
8352,Emeka Okafor,-1,C,0.0,33.695
8442,Metta World Peace,5,SF,54.0,33.2
8426,Kobe Bryant,7,SG,43.0,25.025
8255,Joel Przybilla,16T,C,1.0,21.175


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2010
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 453.6796019531251
R-squared (XGBoost): 0.3725732891025023


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8018,Dwight Howard,1.0,C,576.0,160.03
7683,Tim Duncan,14.0,C,7.0,140.81
7939,Kevin Garnett,-1.0,PF,0.0,111.47
7911,Raja Bell,-1.0,SG,0.0,69.555
7905,Raja Bell,-1.0,SG,0.0,67.415
7813,Gerald Wallace,3.0,SF,113.0,57.79
8050,Metta World Peace,6.0,SF,29.0,48.32
7564,Kobe Bryant,12.0,SG,9.0,39.555
8041,LeBron James,4.0,SF,61.0,34.095
7892,Andrew Bogut,7.0,C,23.0,31.735


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2011
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 331.2633148523985
R-squared (XGBoost): 0.5024504920995182


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7366,Dwight Howard,1,C,585.0,187.345
7121,Kevin Garnett,2,PF,77.0,103.765
7523,Andrew Bogut,6.0,C,32.0,90.15
7400,Luol Deng,10.0,SF,24.0,88.315
7508,Josh Smith,25,PF,1.0,77.45
7321,Rajon Rondo,5,PG,45.0,61.565
7231,Chris Paul,12T,PG,13.0,31.255
7186,LeBron James,9,SF,25.0,28.695
7258,Tim Duncan,17,C,7.0,28.03
7391,Tyson Chandler,3,C,70.0,22.59


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2012
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 392.59852383495144
R-squared (XGBoost): 0.13247435122060736


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6987,Kevin Garnett,5,C,44.0,61.03
6510,Dwight Howard,3,C,186.0,53.88
6572,LeBron James,4,SF,112.0,46.555
6747,Josh Smith,10,PF,9.0,41.67
6484,Rajon Rondo,-1,PG,0.0,34.75
6523,Andre Iguodala,7,SF,33.0,34.55
6943,Luol Deng,9.0,SF,16.0,34.335
6779,Dwyane Wade,18T,SG,1.0,32.595
6938,Marc Gasol,12T,C,5.0,25.26
6511,Chris Paul,12T,PG,5.0,24.62


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2013
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 180.60406625239003
R-squared (XGBoost): 0.24928606633293515


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6094,Mike Conley,21.0,PG,1.0,125.84
5996,Dwight Howard,14.0,C,9.0,86.725
6315,Josh Smith,-1.0,PF,0.0,74.56
6265,Kevin Garnett,-1.0,C,0.0,59.6
6217,Marc Gasol,1.0,C,212.0,59.24
6084,LeBron James,2.0,PF,149.0,58.545
6419,Tim Duncan,6.0,C,94.0,46.7
6373,Tony Allen,5.0,SG,102.0,42.72
6330,Joakim Noah,4.0,C,107.0,37.255
6253,Emeka Okafor,-1.0,C,0.0,25.255


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2014
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 311.02846313868616
R-squared (XGBoost): 0.526279048007194


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5847,Joakim Noah,1.0,C,555.0,202.45
5813,Andrew Bogut,10.0,C,11.0,76.29
5641,Chris Paul,13T,PG,5.0,73.955
5514,Tim Duncan,13T,C,5.0,56.195
5804,Serge Ibaka,4.0,PF,79.0,52.26
5952,Paul George,7.0,SF,30.0,46.995
5948,Roy Hibbert,2.0,C,166.0,45.815
5754,Dwight Howard,8T,C,25.0,40.03
5895,Jimmy Butler,-1,SG,0.0,39.805
5461,Kawhi Leonard,11,SF,9.0,38.375


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2015
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 364.95612504347827
R-squared (XGBoost): 0.28098375846381474


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5393,Anthony Davis,4,PF,107.0,104.27
5314,Kawhi Leonard,1,SF,333.0,77.15
4915,Tim Duncan,8,C,12.0,51.935
4890,Marc Gasol,10T,C,7.0,49.645
5107,Draymond Green,2,PF,317.0,44.625
5169,Larry Sanders,-1.0,C,0.0,43.125
5076,Nerlens Noel,15T,C,1.0,40.26
5216,Tony Allen,7.0,SG,29.0,37.8
5134,Rudy Gobert,5.0,C,33.0,35.805
5349,Chris Paul,15T,PG,1.0,34.83


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2016
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 245.84706827651513
R-squared (XGBoost): 0.7318671215450748


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4597,Draymond Green,2,PF,421.0,259.435
4426,Kawhi Leonard,1,SF,547.0,245.765
4474,DeAndre Jordan,4.0,C,50.0,58.48
4626,Chuck Hayes,-1,C,0.0,52.44
4782,Tim Duncan,-1,C,0.0,49.735
4524,LeBron James,11T,SF,2.0,38.245
4570,Hassan Whiteside,3,C,83.0,38.015
4362,Chris Paul,13T,PG,1.0,35.9
4571,Paul Millsap,5.0,PF,21.0,33.545
4354,Kevin Garnett,-1,PF,0.0,22.055


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2017
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 107.67427061808115
R-squared (XGBoost): 0.8004064835052342


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3989,Draymond Green,1,PF,434.0,341.97
3977,Rudy Gobert,2.0,C,269.0,332.34
4129,Anthony Davis,-1,C,0.0,94.215
3870,Hassan Whiteside,5T,C,3.0,54.31
3988,Russell Westbrook,-1,PG,0.0,52.91
3970,Andre Roberson,5T,SF,3.0,47.37
4143,Robert Covington,4.0,SF,4.0,45.405
4154,DeAndre Jordan,-1.0,C,0.0,42.685
4127,Michael Kidd-Gilchrist,-1.0,SF,0.0,41.425
4268,Kawhi Leonard,3,SF,182.0,39.745


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2018
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 360.8766473553719
R-squared (XGBoost): 0.17671217230523661


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3324,Andre Drummond,15T,C,1.0,93.695
3403,Rudy Gobert,1.0,C,466.0,62.14
3655,Robert Covington,8.0,SF,8.0,48.07
3583,Clint Capela,14.0,C,2.0,44.315
3177,Anthony Davis,3,PF,139.0,39.675
3598,Victor Oladipo,15T,SG,1.0,37.93
3650,Joakim Noah,-1.0,C,0.0,36.395
3339,Luol Deng,-1.0,SF,0.0,35.48
3312,Dwight Howard,-1,C,0.0,28.67
3270,Draymond Green,6,PF,20.0,26.625


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2019
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 237.28133179260448
R-squared (XGBoost): 0.45237977915378125


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2932,Paul George,3.0,SF,150.0,199.1
2825,Rudy Gobert,1.0,C,411.0,154.755
3148,Andre Drummond,-1,C,0.0,58.305
2876,Draymond Green,6T,PF,6.0,48.945
2980,Marc Gasol,-1,C,0.0,45.595
2979,Marc Gasol,-1,C,0.0,39.975
2963,Joel Embiid,4,C,26.0,38.435
3139,Russell Westbrook,-1,PG,0.0,38.345
2671,Anthony Davis,-1,C,0.0,32.875
3114,Giannis Antetokounmpo,2.0,PF,280.0,29.505


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2020
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 238.19841201353637
R-squared (XGBoost): 0.46211329870245577


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2270,Anthony Davis,2,PF,200.0,112.26
2524,Rudy Gobert,3.0,C,187.0,89.75
2127,Giannis Antetokounmpo,1.0,PF,432.0,86.08
2198,Kawhi Leonard,8T,SF,5.0,40.295
2094,Brook Lopez,10.0,C,4.0,28.47
2187,Andre Drummond,8T,C,5.0,22.73
2340,Joel Embiid,-1,C,0.0,20.02
2206,Ben Simmons,4.0,PG,32.0,14.845
2259,LeBron James,-1,PG,0.0,14.215
2324,Marc Gasol,-1,C,0.0,13.9


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2021
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 145.47917388178914
R-squared (XGBoost): 0.6998868296937125


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1379,Rudy Gobert,1.0,C,464.0,377.83
1554,Mike Conley,-1.0,PG,0.0,43.245
1716,Clint Capela,6.0,C,10.0,39.185
1895,Giannis Antetokounmpo,5.0,PF,12.0,30.505
1399,Joel Embiid,7.0,C,7.0,29.71
1422,Draymond Green,3.0,PF,76.0,29.475
1517,LeBron James,-1.0,PG,0.0,28.185
1610,Spencer Dinwiddie,-1.0,SG,0.0,19.47
1820,Russell Westbrook,-1.0,PG,0.0,16.93
1719,Myles Turner,9.0,C,3.0,16.27


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2022
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 184.66940968531466
R-squared (XGBoost): 0.1419328568805437


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1022,Rudy Gobert,3.0,C,136.0,167.315
1184,Nikola Jokić,-1.0,C,0.0,79.39
1046,Sekou Doumbouya,-1.0,PF,0.0,71.615
1007,Joel Embiid,-1.0,C,0.0,62.68
1306,Bam Adebayo,4.0,C,128.0,36.585
678,Chris Paul,-1.0,PG,0.0,34.195
1111,Giannis Antetokounmpo,6.0,PF,58.0,29.585
992,Paul George,-1.0,PF,0.0,28.675
933,Al Horford,9.0,C,3.0,27.53
713,Marcus Smart,1.0,PG,257.0,27.005


---------------------------------------------------------------------
Selected Features Top  20  Test year :  2023
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 362.5988993021346
R-squared (XGBoost): 0.1478630406259548


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
176,Giannis Antetokounmpo,6.0,PF,14.0,54.405
497,Joel Embiid,9T,C,7.0,43.965
323,Brook Lopez,2.0,C,309.0,33.97
408,Jarrett Allen,-1,C,0.0,32.805
137,Evan Mobley,3.0,PF,101.0,31.01
396,Rudy Gobert,-1.0,C,0.0,29.29
59,Jaren Jackson Jr.,1.0,C,391.0,25.945
524,Bam Adebayo,5.0,C,18.0,25.895
301,Anthony Davis,-1,C,0.0,23.595
206,Nic Claxton,9T,C,7.0,16.67


---------------------------------------------------------------------


Unnamed: 0,Rank_1994,Rank_1995,Rank_1996,Rank_1997,Rank_1998,Rank_1999,Rank_2000,Rank_2001,Rank_2002,Rank_2003,...,Rank_2014,Rank_2015,Rank_2016,Rank_2017,Rank_2018,Rank_2019,Rank_2020,Rank_2021,Rank_2022,Rank_2023
0,2,4,6.0,2,4,5T,2,3,3T,1,...,1.0,4,2,1,15T,3.0,2,1.0,3.0,6.0
1,1,-1.0,4.0,5,5T,1,5T,5T,-1,11,...,10.0,1,1,2.0,1.0,1.0,3.0,-1.0,-1.0,9T
2,-1.0,2,3.0,4,2,-1,-1,8T,2,-1,...,13T,8,4.0,-1,8.0,-1,1.0,6.0,-1.0,2.0
3,-1.0,-1,1.0,3,3,4,-1,-1,-1,4,...,13T,10T,-1,5T,14.0,6T,8T,5.0,-1.0,-1
4,-1.0,3,7.0,-1,5T,-1,8T,2,1,11,...,4.0,2,-1,-1,3,-1,10.0,7.0,4.0,3.0
5,-1.0,6T,-1.0,-1,5T,-1,7,7,-1,3,...,7.0,-1.0,11T,5T,15T,-1,8T,3.0,-1.0,-1.0
6,-1.0,1,-1.0,1,-1,-1,-1.0,11T,-1,-1,...,2.0,15T,3,4.0,-1.0,4,-1,-1.0,6.0,1.0
7,-1,-1,5.0,6T,1,3,3T,11T,3T,8,...,8T,7.0,13T,-1.0,-1.0,-1,4.0,-1.0,-1.0,5.0
8,-1.0,5,2.0,-1,11T,7T,8T,11T,-1,13T,...,-1,5.0,5.0,-1.0,-1,-1,-1,-1.0,9.0,-1
9,7T,-1,-1.0,-1,9T,2,1,8T,-1,6,...,11,15T,-1,3,6,2.0,-1,9.0,1.0,9T


In [65]:
top = top_30

rank_data_30 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",30," Test year : ", year)
    
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_rf}')
    print(f'R-squared (XGBoost): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)
    
    if rank_data_30.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_30 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_30 = pd.concat([rank_data_30.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_30)

Selected Features Top  30  Test year :  1994
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 3.417408614864865
R-squared (XGBoost): 0.074813107498101


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15539,David Robinson,2,C,22.0,30.815
15651,Hakeem Olajuwon,1,C,23.0,30.425
15613,Karl Malone,-1.0,PF,0.0,13.22
15741,Tracy Moore,-1.0,SG,0.0,12.185
15710,Latrell Sprewell,-1.0,SG,0.0,11.64
15643,John Stockton,-1.0,PG,0.0,11.525
15646,Dell Demps,-1.0,PG,0.0,11.005
15739,Patrick Ewing,-1,C,0.0,10.07
15616,Shaquille O'Neal,-1.0,C,0.0,9.77
15677,Nate McMillan,7T,PG,3.0,8.255


---------------------------------------------------------------------
Selected Features Top  30  Test year :  1995
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 5.526855
R-squared (XGBoost): 0.12154122134779832


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15113,David Robinson,4,C,12.0,27.405
15264,John Stockton,-1.0,PG,0.0,16.915
15199,Scottie Pippen,2,SF,16.0,12.55
15051,Patrick Ewing,-1,C,0.0,7.045
15151,Gary Payton,6T,PG,2.0,6.09
14994,Hakeem Olajuwon,3,C,13.0,5.345
15026,Dikembe Mutombo,1,C,45.0,5.17
15121,Dennis Rodman,5,PF,11.0,5.065
15045,Charles Oakley,-1,PF,0.0,3.935
15249,Nate McMillan,6T,PG,2.0,3.755


---------------------------------------------------------------------
Selected Features Top  30  Test year :  1996
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 5.196320603271983
R-squared (XGBoost): 0.3074192770162635


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14897,David Robinson,4,C,9.0,18.785
14619,Michael Jordan,6,SG,7.0,17.435
14887,Dikembe Mutombo,3,C,11.0,15.695
14909,Dennis Rodman,7,PF,4.0,13.935
14844,Gary Payton,1,PG,56.0,12.03
14797,Hakeem Olajuwon,5,C,8.0,9.825
14838,Charles Oakley,-1,PF,0.0,9.16
14949,Patrick Ewing,-1,C,0.0,9.06
14749,Scottie Pippen,2,SF,15.0,8.27
14555,Horace Grant,8T,PF,1.0,3.475


---------------------------------------------------------------------
Selected Features Top  30  Test year :  1997
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.671022113502936
R-squared (XGBoost): 0.25130618809753424


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14149,Gary Payton,2,PG,25.0,21.235
14081,Michael Jordan,5,SG,3.0,17.945
14164,Scottie Pippen,4,SF,4.0,14.485
14333,Mookie Blaylock,3,PG,18.0,12.675
14269,Dennis Rodman,-1,PF,0.0,10.38
14210,Hakeem Olajuwon,-1,C,0.0,8.965
14489,Dikembe Mutombo,1,C,60.0,8.02
14279,Horace Grant,6T,PF,1.0,6.995
14277,David Robinson,-1,C,0.0,4.825
14189,Charles Oakley,-1,PF,0.0,4.15


---------------------------------------------------------------------
Selected Features Top  30  Test year :  1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 4.2524202429149796
R-squared (XGBoost): 0.316793887810646


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13836,Michael Jordan,4,SG,6.0,27.35
13889,Tim Duncan,5T,PF,4.0,17.285
13764,Gary Payton,2,PG,37.0,15.55
13637,Dennis Rodman,5T,PF,4.0,10.445
13894,Dikembe Mutombo,1,C,39.0,9.155
13605,David Robinson,3,C,10.0,8.73
13815,Mookie Blaylock,5T,PG,4.0,7.245
13705,Shawn Kemp,-1,PF,0.0,4.745
13842,Scottie Pippen,9T,SF,2.0,3.84
13601,Kevin Garnett,11T,PF,1.0,3.395


---------------------------------------------------------------------
Selected Features Top  30  Test year :  1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 13.147983333333334
R-squared (XGBoost): 0.22592873564061688


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13488,Tim Duncan,5T,PF,2.0,14.42
13269,Alonzo Mourning,1,C,89.0,13.35
13406,Dennis Rodman,-1,PF,0.0,11.43
13182,Dikembe Mutombo,2,C,10.0,8.815
13087,Horace Grant,-1,PF,0.0,7.65
13050,Charles Oakley,-1,PF,0.0,7.14
13121,Bo Outlaw,-1,PF,0.0,6.96
13424,Gary Payton,3,PG,6.0,6.89
13434,David Robinson,4,C,3.0,6.255
13397,Scottie Pippen,7T,SF,1.0,4.375


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 11.563085149572649
R-squared (XGBoost): -0.19256561544246176


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12725,Shaquille O'Neal,2,C,21.0,34.03
12708,Gary Payton,5T,PG,4.0,27.285
12830,Tim Duncan,-1,PF,0.0,22.355
12987,David Robinson,-1,C,0.0,21.185
12841,Kevin Garnett,7,PF,2.0,15.62
12837,Anthony Mason,8T,SF,1.0,13.265
12909,Dikembe Mutombo,3T,C,11.0,11.87
12741,Eddie Jones,3T,SG,11.0,6.165
12826,Anfernee Hardaway,-1.0,SG,0.0,5.73
12956,Alonzo Mourning,1,C,62.0,4.41


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 10.83779943877551
R-squared (XGBoost): 0.05259334501197621


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12271,Tim Duncan,3,PF,14.0,28.73
12106,David Robinson,5T,C,6.0,21.06
12354,Anthony Mason,8T,PF,2.0,18.005
12403,Shaquille O'Neal,8T,C,2.0,16.265
12427,Gary Payton,-1,PG,0.0,9.975
12153,John Stockton,-1.0,PG,0.0,9.9
12274,Shawn Marion,11T,SF,1.0,9.085
12418,Ben Wallace,5T,C,6.0,8.945
12156,Jason Kidd,11T,PG,1.0,6.84
12510,Dikembe Mutombo,1,C,48.0,6.55


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 26.427199255319156
R-squared (XGBoost): 0.07524017851202369


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12003,Dikembe Mutombo,3T,C,1.0,35.795
11852,Tim Duncan,-1,PF,0.0,29.38
11941,Ben Wallace,1,C,116.0,19.49
11927,Shaquille O'Neal,-1,C,0.0,17.795
11936,Kevin Garnett,2,PF,2.0,17.13
12041,Eddie Jones,-1,SG,0.0,13.145
11950,Gary Payton,-1,PG,0.0,8.57
11741,Jason Kidd,-1,PG,0.0,7.165
11884,Antoine Walker,-1.0,PF,0.0,6.625
12005,Kobe Bryant,3T,SG,1.0,6.07


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 541.2889037828947
R-squared (XGBoost): 0.24334073872877127


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11137,Ben Wallace,1,C,531.0,73.29
11176,Gary Payton,11,PG,5.0,25.37
11405,Tim Duncan,4,PF,90.0,24.765
11461,Dikembe Mutombo,-1,C,0.0,23.245
11183,Kevin Garnett,3,PF,121.0,22.1
11177,Gary Payton,11,PG,5.0,16.92
11559,Jason Kidd,-1,PG,0.0,8.81
11411,Allen Iverson,6,SG,29.0,7.57
11463,Kobe Bryant,8,SG,16.0,7.495
11412,Shawn Marion,13T,SF,3.0,7.16


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 637.2413321566731
R-squared (XGBoost): 0.07811662415725618


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11127,Kevin Garnett,6,PF,36.0,191.145
10663,Ben Wallace,2,C,325.0,74.93
10818,Tim Duncan,7,PF,8.0,37.565
11020,Baron Davis,-1.0,PG,0.0,16.695
10852,Bruce Bowen,4,SF,76.0,12.275
10722,Metta World Peace,1,SF,476.0,9.46
10721,Gary Payton,-1,PG,0.0,7.195
10990,Jason Kidd,-1,PG,0.0,7.155
10764,Shaquille O'Neal,10T,C,1.0,6.81
10871,Andrei Kirilenko,5,PF,67.0,4.545


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 378.11779724334605
R-squared (XGBoost): 0.09293261591861524


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10326,Tim Duncan,4,PF,81.0,88.79
10343,Kevin Garnett,9,PF,30.0,37.535
10097,Theo Ratliff,-1,C,0.0,30.47
10261,Metta World Peace,-1,SF,0.0,21.685
10455,Ben Wallace,1,C,339.0,16.255
10592,Andrei Kirilenko,10,PF,25.0,13.72
10338,Shawn Marion,5,PF,57.0,11.215
10276,Dikembe Mutombo,-1,C,0.0,10.05
10202,Eddie Jones,-1,SF,0.0,9.97
10337,Allen Iverson,11,PG,10.0,8.555


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 412.19564526367185
R-squared (XGBoost): 0.294423534648675


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,100.285
10044,Tim Duncan,6,PF,42.0,72.34
9704,Marcus Camby,5,C,55.0,49.78
9863,Shawn Marion,7,PF,33.0,42.595
10028,Kevin Garnett,13T,PF,1.0,25.815
9731,Metta World Peace,4,SF,65.0,24.46
9764,Pavel Podkolzin,-1.0,C,0.0,20.45
9730,Metta World Peace,4,SF,65.0,19.39
9880,Eddie Jones,13T,SG,1.0,16.525
10037,Gerald Wallace,9,SF,27.0,15.435


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 393.5774345995893
R-squared (XGBoost): 0.2904895201254173


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9414,Ben Wallace,6,C,42.0,95.4
9339,Bruce Bowen,2,SF,206.0,75.59
9438,Tim Duncan,3,C,158.0,60.31
9407,Marcus Camby,1,C,431.0,43.25
9488,Renaldo Major,-1.0,PF,0.0,42.71
9356,Kevin Garnett,13T,PF,7.0,38.115
9355,Shane Battier,5,SF,86.0,32.435
9418,Shawn Marion,4,SF,93.0,28.225
9489,LeBron James,-1.0,SF,0.0,19.52
9526,Kirk Hinrich,13T,PG,7.0,19.44


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2008
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 387.3835281309298
R-squared (XGBoost): 0.3501863314118605


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8731,Kevin Garnett,1,PF,493.0,103.675
8588,Shawn Marion,-1,PF,0.0,46.375
8630,Josh Smith,6,PF,34.0,42.82
8573,Tim Duncan,9,C,22.0,42.13
8762,Marcus Camby,2,C,178.0,36.035
8891,Tayshaun Prince,10,SF,13.0,34.39
8568,Shawn Marion,-1,PF,0.0,32.155
8659,Metta World Peace,16T,SF,1.0,28.575
8784,Bruce Bowen,4,SF,80.0,26.705
8722,Shane Battier,3,SF,175.0,25.915


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2009
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 576.7776605825243
R-squared (XGBoost): 0.1132194858168375


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8365,Kevin Garnett,8.0,PF,31.0,288.19
8212,Dwight Howard,1.0,C,542.0,80.515
8484,LeBron James,2.0,SF,148.0,77.47
8556,Chris Paul,6.0,PG,49.0,52.6
8442,Metta World Peace,5.0,SF,54.0,26.025
8170,Tim Duncan,11.0,C,7.0,22.385
8558,Jason Kidd,-1.0,PG,0.0,21.71
8194,Rajon Rondo,10.0,PG,8.0,20.7
8254,Gerald Wallace,-1.0,SF,0.0,19.25
8428,Troy Murphy,-1.0,PF,0.0,18.38


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2010
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 488.4510352539062
R-squared (XGBoost): 0.32448533025405724


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8018,Dwight Howard,1,C,576.0,150.685
7683,Tim Duncan,14,C,7.0,145.91
7939,Kevin Garnett,-1,PF,0.0,108.21
7905,Raja Bell,-1,SG,0.0,101.765
7911,Raja Bell,-1,SG,0.0,97.83
7813,Gerald Wallace,3,SF,113.0,67.06
8050,Metta World Peace,6,SF,29.0,58.44
8041,LeBron James,4,SF,61.0,40.345
7564,Kobe Bryant,12,SG,9.0,33.405
7544,Dwyane Wade,10T,SG,13.0,30.21


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2011
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 209.40038685424352
R-squared (XGBoost): 0.685485670274349


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7366,Dwight Howard,1,C,585.0,268.73
7121,Kevin Garnett,2,PF,77.0,95.275
7321,Rajon Rondo,5,PG,45.0,65.615
7400,Luol Deng,10.0,SF,24.0,63.28
7523,Andrew Bogut,6.0,C,32.0,58.605
7508,Josh Smith,25,PF,1.0,45.64
7231,Chris Paul,12T,PG,13.0,38.525
7258,Tim Duncan,17,C,7.0,29.745
7428,Metta World Peace,18,SF,6.0,22.565
7391,Tyson Chandler,3,C,70.0,20.255


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2012
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 379.25427902912617
R-squared (XGBoost): 0.16196115244330211


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6510,Dwight Howard,3,C,186.0,90.825
6987,Kevin Garnett,5,C,44.0,41.59
6943,Luol Deng,9.0,SF,16.0,36.995
6572,LeBron James,4,SF,112.0,36.945
6484,Rajon Rondo,-1,PG,0.0,36.465
6511,Chris Paul,12T,PG,5.0,29.955
6523,Andre Iguodala,7,SF,33.0,27.96
6779,Dwyane Wade,18T,SG,1.0,25.85
6747,Josh Smith,10,PF,9.0,25.665
6916,Tim Duncan,-1,C,0.0,19.065


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2013
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 206.31413221797325
R-squared (XGBoost): 0.14241746056804538


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6094,Mike Conley,21.0,PG,1.0,159.115
5996,Dwight Howard,14.0,C,9.0,70.34
6084,LeBron James,2.0,PF,149.0,54.565
6373,Tony Allen,5.0,SG,102.0,52.115
6265,Kevin Garnett,-1.0,C,0.0,41.19
6242,Paul George,8.0,SF,57.0,31.675
6419,Tim Duncan,6.0,C,94.0,31.535
6330,Joakim Noah,4.0,C,107.0,31.025
6253,Emeka Okafor,-1.0,C,0.0,30.44
6217,Marc Gasol,1.0,C,212.0,29.31


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2014
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 332.262211359489
R-squared (XGBoost): 0.4939383698582329


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5847,Joakim Noah,1.0,C,555.0,188.85
5641,Chris Paul,13T,PG,5.0,66.69
5813,Andrew Bogut,10.0,C,11.0,63.68
5514,Tim Duncan,13T,C,5.0,63.35
5754,Dwight Howard,8T,C,25.0,60.37
5952,Paul George,7.0,SF,30.0,48.515
5640,Kirk Hinrich,-1,PG,0.0,47.92
5804,Serge Ibaka,4.0,PF,79.0,36.745
5948,Roy Hibbert,2.0,C,166.0,30.55
5461,Kawhi Leonard,11,SF,9.0,28.995


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2015
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 365.0473762608696
R-squared (XGBoost): 0.28080398039499477


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5314,Kawhi Leonard,1,SF,333.0,61.625
5393,Anthony Davis,4,PF,107.0,59.43
4915,Tim Duncan,8,C,12.0,45.93
5107,Draymond Green,2,PF,317.0,44.13
5011,DeAndre Jordan,3.0,C,261.0,39.965
4856,Roy Hibbert,-1.0,C,0.0,39.565
4890,Marc Gasol,10T,C,7.0,39.24
5134,Rudy Gobert,5.0,C,33.0,30.675
5349,Chris Paul,15T,PG,1.0,30.19
5076,Nerlens Noel,15T,C,1.0,28.965


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2016
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 286.24187613636366
R-squared (XGBoost): 0.6878105615786467


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4426,Kawhi Leonard,1,SF,547.0,249.285
4597,Draymond Green,2,PF,421.0,229.925
4474,DeAndre Jordan,4.0,C,50.0,116.92
4820,Andre Drummond,10,C,3.0,99.855
4626,Chuck Hayes,-1,C,0.0,45.5
4782,Tim Duncan,-1,C,0.0,45.185
4524,LeBron James,11T,SF,2.0,42.32
4362,Chris Paul,13T,PG,1.0,30.935
4570,Hassan Whiteside,3,C,83.0,24.315
4648,Thabo Sefolosha,-1.0,SF,0.0,19.35


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2017
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 157.52973699261995
R-squared (XGBoost): 0.7079904606888257


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3989,Draymond Green,1,PF,434.0,316.475
3977,Rudy Gobert,2.0,C,269.0,298.74
3982,Andre Drummond,-1,C,0.0,136.88
4129,Anthony Davis,-1,C,0.0,95.025
3870,Hassan Whiteside,5T,C,3.0,93.865
4154,DeAndre Jordan,-1.0,C,0.0,67.29
3988,Russell Westbrook,-1,PG,0.0,38.215
4143,Robert Covington,4.0,SF,4.0,34.325
4054,Dwight Howard,-1,C,0.0,31.73
4268,Kawhi Leonard,3,SF,182.0,27.58


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2018
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 387.8623429338843
R-squared (XGBoost): 0.11514821449727253


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3324,Andre Drummond,15T,C,1.0,124.44
3598,Victor Oladipo,15T,SG,1.0,51.86
3177,Anthony Davis,3,PF,139.0,51.26
3403,Rudy Gobert,1.0,C,466.0,48.065
3270,Draymond Green,6,PF,20.0,42.905
3692,DeAndre Jordan,-1.0,C,0.0,42.24
3339,Luol Deng,-1.0,SF,0.0,36.825
3650,Joakim Noah,-1.0,C,0.0,36.355
3583,Clint Capela,14.0,C,2.0,33.1
3655,Robert Covington,8.0,SF,8.0,32.24


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2019
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 216.29867596463018
R-squared (XGBoost): 0.5008055298508431


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2932,Paul George,3.0,SF,150.0,171.92
2825,Rudy Gobert,1.0,C,411.0,150.74
3148,Andre Drummond,-1,C,0.0,92.71
3114,Giannis Antetokounmpo,2.0,PF,280.0,54.735
2963,Joel Embiid,4,C,26.0,49.995
2876,Draymond Green,6T,PF,6.0,30.21
3120,Hassan Whiteside,-1,C,0.0,28.695
2671,Anthony Davis,-1,C,0.0,25.91
2981,Jusuf Nurkić,-1,C,0.0,25.15
3139,Russell Westbrook,-1,PG,0.0,23.695


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2020
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 219.304163071066
R-squared (XGBoost): 0.504779264236191


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2127,Giannis Antetokounmpo,1.0,PF,432.0,104.445
2524,Rudy Gobert,3.0,C,187.0,103.435
2270,Anthony Davis,2,PF,200.0,102.295
2198,Kawhi Leonard,8T,SF,5.0,39.73
2173,Clint Capela,-1.0,C,0.0,35.835
2187,Andre Drummond,8T,C,5.0,29.12
2340,Joel Embiid,-1,C,0.0,27.475
2183,Russell Westbrook,-1,PG,0.0,22.205
2132,Kyle Lowry,-1,PG,0.0,14.08
2259,LeBron James,-1,PG,0.0,13.87


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2021
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 158.77758686102234
R-squared (XGBoost): 0.6724531512313718


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1379,Rudy Gobert,1.0,C,464.0,352.19
1716,Clint Capela,6.0,C,10.0,51.11
1517,LeBron James,-1.0,PG,0.0,33.4
1554,Mike Conley,-1.0,PG,0.0,32.105
1399,Joel Embiid,7.0,C,7.0,28.525
1895,Giannis Antetokounmpo,5.0,PF,12.0,21.985
1422,Draymond Green,3.0,PF,76.0,19.34
1719,Myles Turner,9.0,C,3.0,18.815
1820,Russell Westbrook,-1.0,PG,0.0,17.745
1788,Bam Adebayo,4.0,C,31.0,12.185


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2022
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 191.36157419580422
R-squared (XGBoost): 0.11083768799151983


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1022,Rudy Gobert,3.0,C,136.0,190.895
1184,Nikola Jokić,-1.0,C,0.0,69.39
1046,Sekou Doumbouya,-1.0,PF,0.0,65.16
1007,Joel Embiid,-1.0,C,0.0,59.975
1111,Giannis Antetokounmpo,6.0,PF,58.0,35.125
1083,Russell Westbrook,-1.0,PG,0.0,33.515
1306,Bam Adebayo,4.0,C,128.0,31.45
1178,Matisse Thybulle,11.0,SG,1.0,30.24
992,Paul George,-1.0,PF,0.0,27.39
678,Chris Paul,-1.0,PG,0.0,23.94


---------------------------------------------------------------------
Selected Features Top  30  Test year :  2023
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 322.7679867405583
R-squared (XGBoost): 0.24146893072832254


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
137,Evan Mobley,3.0,PF,101.0,67.12
323,Brook Lopez,2.0,C,309.0,55.785
497,Joel Embiid,9T,C,7.0,55.01
176,Giannis Antetokounmpo,6.0,PF,14.0,44.645
59,Jaren Jackson Jr.,1.0,C,391.0,43.94
301,Anthony Davis,-1,C,0.0,38.405
396,Rudy Gobert,-1.0,C,0.0,32.025
524,Bam Adebayo,5.0,C,18.0,30.38
408,Jarrett Allen,-1,C,0.0,29.095
392,Jacob Gilyard,-1.0,PG,0.0,28.145


---------------------------------------------------------------------


Unnamed: 0,Rank_1994,Rank_1995,Rank_1996,Rank_1997,Rank_1998,Rank_1999,Rank_2000,Rank_2001,Rank_2002,Rank_2003,...,Rank_2014,Rank_2015,Rank_2016,Rank_2017,Rank_2018,Rank_2019,Rank_2020,Rank_2021,Rank_2022,Rank_2023
0,2,4,4,2,4,5T,2,3,3T,1,...,1.0,1,1,1,15T,3.0,1.0,1.0,3.0,3.0
1,1,-1.0,6,5,5T,1,5T,5T,-1,11,...,13T,4,2,2.0,15T,1.0,3.0,6.0,-1.0,2.0
2,-1.0,2,3,4,2,-1,-1,8T,1,4,...,10.0,8,4.0,-1,3,-1,2,-1.0,-1.0,9T
3,-1.0,-1,7,3,5T,2,-1,8T,-1,-1,...,13T,2,10,-1,1.0,2.0,8T,-1.0,-1.0,6.0
4,-1.0,6T,1,-1,1,-1,7,-1,2,3,...,8T,3.0,-1,5T,6,4,-1.0,7.0,6.0,1.0
5,-1.0,3,5,-1,3,-1,8T,-1.0,-1,11,...,7.0,-1.0,-1,-1.0,-1.0,6T,8T,5.0,-1.0,-1
6,-1.0,1,-1,1,5T,-1,3T,11T,-1,-1,...,-1,10T,11T,-1,-1.0,-1,-1,3.0,4.0,-1.0
7,-1,5,-1,6T,-1,3,3T,5T,-1,6,...,4.0,5.0,13T,4.0,-1.0,-1,-1,9.0,11.0,5.0
8,-1.0,-1,2,-1,9T,4,-1.0,11T,-1.0,8,...,2.0,15T,3,-1,14.0,-1,-1,-1.0,-1.0,-1
9,7T,6T,8T,-1,11T,7T,1,1,3T,13T,...,11,15T,-1.0,3,8.0,-1,-1,4.0,-1.0,-1.0


In [66]:
top = top_40

rank_data_40 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",40," Test year : ", year)
    
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_rf}')
    print(f'R-squared (XGBoost): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_40.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_40 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_40 = pd.concat([rank_data_40.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_40)

Selected Features Top  40  Test year :  1994
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 1.6963472972972975
R-squared (XGBoost): 0.5407519376629937


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15539,David Robinson,2.0,C,22.0,26.07
15651,Hakeem Olajuwon,1.0,C,23.0,24.195
15743,Dikembe Mutombo,3.0,C,19.0,17.095
15613,Karl Malone,-1.0,PF,0.0,13.145
15710,Latrell Sprewell,-1.0,SG,0.0,11.68
15616,Shaquille O'Neal,-1.0,C,0.0,8.08
15741,Tracy Moore,-1.0,SG,0.0,7.44
15739,Patrick Ewing,-1.0,C,0.0,7.235
15643,John Stockton,-1.0,PG,0.0,7.145
15646,Dell Demps,-1.0,PG,0.0,5.855


---------------------------------------------------------------------
Selected Features Top  40  Test year :  1995
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 2.488118139534884
R-squared (XGBoost): 0.6045292988511184


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15026,Dikembe Mutombo,1,C,45.0,16.49
15113,David Robinson,4,C,12.0,15.115
15199,Scottie Pippen,2,SF,16.0,10.47
15151,Gary Payton,6T,PG,2.0,6.79
14994,Hakeem Olajuwon,3,C,13.0,6.675
15264,John Stockton,-1.0,PG,0.0,5.945
15121,Dennis Rodman,5,PF,11.0,5.095
15249,Nate McMillan,6T,PG,2.0,4.855
15077,Mookie Blaylock,-1,PG,0.0,4.32
15051,Patrick Ewing,-1,C,0.0,3.425


---------------------------------------------------------------------
Selected Features Top  40  Test year :  1996
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.923379805725971
R-squared (XGBoost): 0.0772318035339441


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14887,Dikembe Mutombo,3,C,11.0,39.86
14897,David Robinson,4,C,9.0,14.415
14619,Michael Jordan,6,SG,7.0,11.515
14839,Shawn Bradley,-1.0,C,0.0,9.57
14797,Hakeem Olajuwon,5,C,8.0,8.49
14749,Scottie Pippen,2,SF,15.0,8.425
14844,Gary Payton,1,PG,56.0,7.895
14909,Dennis Rodman,7,PF,4.0,7.29
14802,Bo Outlaw,8T,PF,1.0,3.02
14555,Horace Grant,8T,PF,1.0,2.76


---------------------------------------------------------------------
Selected Features Top  40  Test year :  1997
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.728096624266145
R-squared (XGBoost): 0.24490067297575357


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14149,Gary Payton,2,PG,25.0,17.655
14081,Michael Jordan,5,SG,3.0,16.05
14141,Shawn Bradley,-1.0,C,0.0,15.44
14164,Scottie Pippen,4,SF,4.0,12.815
14210,Hakeem Olajuwon,-1,C,0.0,9.255
14489,Dikembe Mutombo,1,C,60.0,9.175
14333,Mookie Blaylock,3,PG,18.0,8.84
14279,Horace Grant,6T,PF,1.0,6.99
14277,David Robinson,-1,C,0.0,6.43
14269,Dennis Rodman,-1,PF,0.0,6.125


---------------------------------------------------------------------
Selected Features Top  40  Test year :  1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 2.0394713562753033
R-squared (XGBoost): 0.672332644318508


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13894,Dikembe Mutombo,1,C,39.0,26.19
13836,Michael Jordan,4,SG,6.0,19.17
13764,Gary Payton,2,PG,37.0,12.78
13605,David Robinson,3,C,10.0,7.07
13889,Tim Duncan,5T,PF,4.0,6.63
13815,Mookie Blaylock,5T,PG,4.0,6.595
13637,Dennis Rodman,5T,PF,4.0,6.055
13578,Marcus Camby,-1.0,C,0.0,3.91
13842,Scottie Pippen,9T,SF,2.0,3.4
13601,Kevin Garnett,11T,PF,1.0,3.01


---------------------------------------------------------------------
Selected Features Top  40  Test year :  1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 9.25856671940928
R-squared (XGBoost): 0.45491333043606075


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13269,Alonzo Mourning,1,C,89.0,23.89
13424,Gary Payton,3,PG,6.0,7.365
13182,Dikembe Mutombo,2,C,10.0,6.97
13406,Dennis Rodman,-1,PF,0.0,6.275
13434,David Robinson,4,C,3.0,5.89
13237,Eddie Jones,7T,SF,1.0,5.495
13488,Tim Duncan,5T,PF,2.0,5.295
13397,Scottie Pippen,7T,SF,1.0,4.775
13380,Mookie Blaylock,-1,PG,0.0,3.39
13345,Kendall Gill,-1,SF,0.0,2.955


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.031665865384616
R-squared (XGBoost): 0.3779205789934411


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12725,Shaquille O'Neal,2,C,21.0,25.545
12708,Gary Payton,5T,PG,4.0,20.115
12956,Alonzo Mourning,1,C,62.0,13.515
12909,Dikembe Mutombo,3T,C,11.0,8.235
12841,Kevin Garnett,7,PF,2.0,7.955
12987,David Robinson,-1,C,0.0,5.42
12741,Eddie Jones,3T,SG,11.0,5.08
12830,Tim Duncan,-1,PF,0.0,4.855
12837,Anthony Mason,8T,SF,1.0,4.34
12714,Shawn Bradley,-1.0,C,0.0,3.765


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.573886581632653
R-squared (XGBoost): 0.42533132009324937


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12462,Shawn Bradley,-1.0,C,0.0,35.47
12510,Dikembe Mutombo,1,C,48.0,28.58
12512,Dikembe Mutombo,1,C,48.0,27.535
12113,Theo Ratliff,7,C,4.0,22.15
12403,Shaquille O'Neal,8T,C,2.0,11.005
12271,Tim Duncan,3,PF,14.0,8.655
12427,Gary Payton,-1,PG,0.0,6.455
12335,Ray Allen,-1.0,SG,0.0,5.275
12153,John Stockton,-1.0,PG,0.0,4.68
12460,Kevin Garnett,2,PF,26.0,4.13


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 22.517323244680842
R-squared (XGBoost): 0.21205741013410928


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12003,Dikembe Mutombo,3T,C,1.0,18.845
11941,Ben Wallace,1,C,116.0,18.68
11852,Tim Duncan,-1,PF,0.0,16.55
12041,Eddie Jones,-1,SG,0.0,11.65
11936,Kevin Garnett,2,PF,2.0,11.375
11741,Jason Kidd,-1,PG,0.0,9.455
11950,Gary Payton,-1,PG,0.0,7.255
11884,Antoine Walker,-1.0,PF,0.0,7.195
11927,Shaquille O'Neal,-1,C,0.0,5.795
12005,Kobe Bryant,3T,SG,1.0,5.255


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 590.7123888706141
R-squared (XGBoost): 0.1742524247903744


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11137,Ben Wallace,1,C,531.0,50.89
11405,Tim Duncan,4,PF,90.0,21.225
11183,Kevin Garnett,3,PF,121.0,12.12
11559,Jason Kidd,-1,PG,0.0,10.785
11463,Kobe Bryant,8,SG,16.0,10.745
11461,Dikembe Mutombo,-1,C,0.0,7.555
11176,Gary Payton,11,PG,5.0,7.495
11412,Shawn Marion,13T,SF,3.0,6.38
11505,Dirk Nowitzki,-1.0,PF,0.0,4.835
11411,Allen Iverson,6,SG,29.0,4.79


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 614.84360188588
R-squared (XGBoost): 0.11051894044043387


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11127,Kevin Garnett,6.0,PF,36.0,105.4
10663,Ben Wallace,2.0,C,325.0,64.065
10664,Theo Ratliff,3.0,C,90.0,25.55
11009,Bo Outlaw,-1.0,PF,0.0,23.525
11017,Rasheed Wallace,-1.0,PF,0.0,20.33
10650,Theo Ratliff,3.0,C,90.0,18.68
10818,Tim Duncan,7.0,PF,8.0,13.735
11020,Baron Davis,-1.0,PG,0.0,7.16
10852,Bruce Bowen,4.0,SF,76.0,4.64
10871,Andrei Kirilenko,5.0,PF,67.0,4.31


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 392.19879980988605
R-squared (XGBoost): 0.05915367650769221


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10326,Tim Duncan,4.0,PF,81.0,64.955
10276,Dikembe Mutombo,-1.0,C,0.0,24.515
10343,Kevin Garnett,9.0,PF,30.0,23.185
10592,Andrei Kirilenko,10.0,PF,25.0,14.18
10261,Metta World Peace,-1.0,SF,0.0,12.885
10390,Richard Hamilton,-1.0,SG,0.0,12.435
10338,Shawn Marion,5.0,PF,57.0,9.655
10337,Allen Iverson,11.0,PG,10.0,8.735
10225,Bruce Bowen,2.0,SF,247.0,6.04
10515,Tayshaun Prince,7.0,SF,46.0,5.57


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 257.90027719726567
R-squared (XGBoost): 0.5585388441414204


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,169.14
9610,Bruce Bowen,2,SF,308.0,78.41
9704,Marcus Camby,5,C,55.0,65.82
10044,Tim Duncan,6,PF,42.0,37.025
9691,Andrei Kirilenko,3,SF,121.0,33.97
9764,Pavel Podkolzin,-1.0,C,0.0,29.49
9863,Shawn Marion,7,PF,33.0,29.32
9625,Tayshaun Prince,13T,SF,1.0,19.65
9868,Dirk Nowitzki,-1.0,PF,0.0,17.63
9726,Rasheed Wallace,13T,PF,1.0,16.275


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 343.44216155030796
R-squared (XGBoost): 0.38086945177985243


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9339,Bruce Bowen,2,SF,206.0,99.405
9407,Marcus Camby,1,C,431.0,75.125
9414,Ben Wallace,6,C,42.0,60.865
9438,Tim Duncan,3,C,158.0,56.775
9488,Renaldo Major,-1.0,PF,0.0,56.755
9210,Eddie Jones,-1,SG,0.0,41.535
9577,Theo Ratliff,-1,C,0.0,36.265
9499,Gary Payton,-1,PG,0.0,30.55
9356,Kevin Garnett,13T,PF,7.0,24.835
9489,LeBron James,-1.0,SF,0.0,24.68


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2008
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 342.0327494307401
R-squared (XGBoost): 0.4262596637569024


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8731,Kevin Garnett,1,PF,493.0,106.495
8762,Marcus Camby,2,C,178.0,103.42
8891,Tayshaun Prince,10,SF,13.0,49.345
8722,Shane Battier,3,SF,175.0,39.275
8784,Bruce Bowen,4,SF,80.0,34.99
8738,James Posey,-1,PF,0.0,34.73
8949,Rasheed Wallace,12,C,9.0,28.595
8573,Tim Duncan,9,C,22.0,25.925
8630,Josh Smith,6,PF,34.0,25.06
8571,Kobe Bryant,5,SG,40.0,22.975


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2009
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 608.3387736893204
R-squared (XGBoost): 0.0646951028149525


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8365,Kevin Garnett,8,PF,31.0,294.19
8484,LeBron James,2,SF,148.0,77.08
8212,Dwight Howard,1,C,542.0,67.955
8426,Kobe Bryant,7,SG,43.0,56.145
8556,Chris Paul,6,PG,49.0,45.76
8098,Bruce Bowen,-1,SF,0.0,45.365
8255,Joel Przybilla,16T,C,1.0,17.335
8370,Shane Battier,4,SG,71.0,13.625
8442,Metta World Peace,5,SF,54.0,13.485
8194,Rajon Rondo,10,PG,8.0,13.305


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2010
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 596.6887330566406
R-squared (XGBoost): 0.17479549973242048


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7683,Tim Duncan,14,C,7.0,136.31
7939,Kevin Garnett,-1,PF,0.0,106.14
8018,Dwight Howard,1,C,576.0,87.675
7911,Raja Bell,-1,SG,0.0,85.405
7905,Raja Bell,-1,SG,0.0,75.06
8041,LeBron James,4,SF,61.0,52.005
8050,Metta World Peace,6,SF,29.0,49.905
7564,Kobe Bryant,12,SG,9.0,33.125
7813,Gerald Wallace,3,SF,113.0,30.32
8016,Rajon Rondo,5,PG,55.0,16.27


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2011
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 258.65907158671587
R-squared (XGBoost): 0.6115003140649332


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7366,Dwight Howard,1,C,585.0,230.925
7400,Luol Deng,10.0,SF,24.0,77.645
7121,Kevin Garnett,2,PF,77.0,74.66
7258,Tim Duncan,17,C,7.0,51.36
7428,Metta World Peace,18,SF,6.0,39.83
7321,Rajon Rondo,5,PG,45.0,35.285
7186,LeBron James,9,SF,25.0,27.055
7270,Shawn Marion,-1,SF,0.0,23.85
7523,Andrew Bogut,6.0,C,32.0,18.995
7231,Chris Paul,12T,PG,13.0,18.705


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2012
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 350.49823509708733
R-squared (XGBoost): 0.22550343330770561


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6510,Dwight Howard,3,C,186.0,45.03
6527,Serge Ibaka,2.0,PF,294.0,43.21
6987,Kevin Garnett,5,C,44.0,40.795
6572,LeBron James,4,SF,112.0,37.155
6943,Luol Deng,9.0,SF,16.0,27.15
6484,Rajon Rondo,-1,PG,0.0,25.085
6523,Andre Iguodala,7,SF,33.0,22.31
6511,Chris Paul,12T,PG,5.0,16.43
6916,Tim Duncan,-1,C,0.0,15.605
6779,Dwyane Wade,18T,SG,1.0,13.23


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2013
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 163.85646457934993
R-squared (XGBoost): 0.3189005450783198


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6094,Mike Conley,21.0,PG,1.0,122.045
6084,LeBron James,2.0,PF,149.0,62.74
6005,Serge Ibaka,3.0,PF,122.0,54.045
6245,Shane Battier,-1.0,SF,0.0,49.285
6217,Marc Gasol,1.0,C,212.0,44.7
5996,Dwight Howard,14.0,C,9.0,41.275
6419,Tim Duncan,6.0,C,94.0,39.125
6265,Kevin Garnett,-1.0,C,0.0,37.515
6242,Paul George,8.0,SF,57.0,32.81
6291,Chris Paul,11.0,PG,19.0,25.045


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2014
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 390.34342604926997
R-squared (XGBoost): 0.4054760856091123


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5847,Joakim Noah,1.0,C,555.0,140.085
5641,Chris Paul,13T,PG,5.0,49.515
5813,Andrew Bogut,10.0,C,11.0,48.715
5514,Tim Duncan,13T,C,5.0,38.625
5805,Andre Iguodala,5,SF,47.0,34.145
5804,Serge Ibaka,4.0,PF,79.0,30.71
5952,Paul George,7.0,SF,30.0,30.58
5616,LeBron James,6,PF,31.0,28.63
5895,Jimmy Butler,-1,SG,0.0,23.67
5948,Roy Hibbert,2.0,C,166.0,19.075


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2015
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 386.8579682608695
R-squared (XGBoost): 0.23783396616752883


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5393,Anthony Davis,4,PF,107.0,87.61
5314,Kawhi Leonard,1,SF,333.0,48.545
5011,DeAndre Jordan,3.0,C,261.0,40.515
4915,Tim Duncan,8,C,12.0,38.235
5073,Stephen Curry,-1.0,PG,0.0,35.64
5285,Andrew Bogut,6.0,C,31.0,34.995
4890,Marc Gasol,10T,C,7.0,28.145
5227,Andre Iguodala,-1,SF,0.0,25.19
4860,Trevor Ariza,13T,SF,3.0,24.8
5216,Tony Allen,7.0,SG,29.0,24.715


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2016
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 302.08194228219696
R-squared (XGBoost): 0.6705346080341382


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4426,Kawhi Leonard,1,SF,547.0,242.965
4597,Draymond Green,2,PF,421.0,184.2
4570,Hassan Whiteside,3,C,83.0,125.66
4474,DeAndre Jordan,4.0,C,50.0,81.085
4820,Andre Drummond,10,C,3.0,45.12
4782,Tim Duncan,-1,C,0.0,45.03
4524,LeBron James,11T,SF,2.0,30.55
4773,Andrew Bogut,-1.0,C,0.0,30.195
4626,Chuck Hayes,-1,C,0.0,28.96
4362,Chris Paul,13T,PG,1.0,21.64


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2017
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 129.6774551199262
R-squared (XGBoost): 0.7596196460964754


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3989,Draymond Green,1,PF,434.0,303.94
3977,Rudy Gobert,2.0,C,269.0,271.79
4131,Edy Tavares,-1.0,C,0.0,91.11
3982,Andre Drummond,-1,C,0.0,62.6
3870,Hassan Whiteside,5T,C,3.0,61.555
4134,Edy Tavares,-1.0,C,0.0,57.245
4154,DeAndre Jordan,-1.0,C,0.0,52.43
3988,Russell Westbrook,-1,PG,0.0,38.025
4129,Anthony Davis,-1,C,0.0,36.375
4133,Chris Paul,-1,PG,0.0,26.33


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2018
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 378.222684338843
R-squared (XGBoost): 0.13713970007160936


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3324,Andre Drummond,15T,C,1.0,94.69
3403,Rudy Gobert,1.0,C,466.0,46.53
3177,Anthony Davis,3,PF,139.0,36.325
3692,DeAndre Jordan,-1.0,C,0.0,33.19
3598,Victor Oladipo,15T,SG,1.0,30.455
3337,Andre Ingram,-1.0,SG,0.0,27.6
3312,Dwight Howard,-1,C,0.0,27.515
3655,Robert Covington,8.0,SF,8.0,27.355
3469,Kawhi Leonard,-1,SF,0.0,25.99
3270,Draymond Green,6,PF,20.0,22.66


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2019
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 178.06205060289392
R-squared (XGBoost): 0.5890516175932619


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2932,Paul George,3.0,SF,150.0,153.345
3114,Giannis Antetokounmpo,2.0,PF,280.0,153.18
2825,Rudy Gobert,1.0,C,411.0,113.875
3148,Andre Drummond,-1,C,0.0,45.53
2963,Joel Embiid,4,C,26.0,37.84
2671,Anthony Davis,-1,C,0.0,33.88
2562,Myles Turner,5.0,C,15.0,32.57
3120,Hassan Whiteside,-1,C,0.0,23.62
2543,Kevin Love,-1.0,PF,0.0,18.745
2876,Draymond Green,6T,PF,6.0,18.665


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2020
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 180.35653426395936
R-squared (XGBoost): 0.5927286817210786


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2127,Giannis Antetokounmpo,1.0,PF,432.0,160.05
2270,Anthony Davis,2,PF,200.0,91.765
2524,Rudy Gobert,3.0,C,187.0,60.905
2017,Hassan Whiteside,11,C,3.0,35.28
2340,Joel Embiid,-1,C,0.0,32.05
2324,Marc Gasol,-1,C,0.0,20.045
2466,Jonathan Isaac,-1.0,PF,0.0,19.85
2094,Brook Lopez,10.0,C,4.0,19.25
2198,Kawhi Leonard,8T,SF,5.0,18.005
2259,LeBron James,-1,PG,0.0,17.84


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2021
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 160.9176055111821
R-squared (XGBoost): 0.6680384452327256


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1379,Rudy Gobert,1.0,C,464.0,354.98
1719,Myles Turner,9.0,C,3.0,43.97
1517,LeBron James,-1,PG,0.0,37.6
1716,Clint Capela,6.0,C,10.0,21.98
1554,Mike Conley,-1,PG,0.0,20.825
1741,Matisse Thybulle,11T,SG,1.0,18.7
1399,Joel Embiid,7,C,7.0,18.43
1610,Spencer Dinwiddie,-1.0,SG,0.0,17.845
1422,Draymond Green,3,PF,76.0,17.565
1371,Nerlens Noel,-1,C,0.0,14.945


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2022
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 211.59903311188813
R-squared (XGBoost): 0.016804255027649817


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1022,Rudy Gobert,3.0,C,136.0,203.415
1184,Nikola Jokić,-1.0,C,0.0,102.655
1046,Sekou Doumbouya,-1.0,PF,0.0,83.62
1007,Joel Embiid,-1.0,C,0.0,66.59
627,Clint Capela,-1.0,C,0.0,55.6
933,Al Horford,9.0,C,3.0,53.72
992,Paul George,-1.0,PF,0.0,38.275
1111,Giannis Antetokounmpo,6.0,PF,58.0,34.515
1176,Jae Crowder,-1.0,PF,0.0,32.925
678,Chris Paul,-1.0,PG,0.0,30.085


---------------------------------------------------------------------
Selected Features Top  40  Test year :  2023
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 320.8448412561576
R-squared (XGBoost): 0.24598847932227896


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
497,Joel Embiid,9T,C,7.0,87.85
524,Bam Adebayo,5.0,C,18.0,73.47
59,Jaren Jackson Jr.,1.0,C,391.0,63.78
323,Brook Lopez,2.0,C,309.0,63.17
176,Giannis Antetokounmpo,6.0,PF,14.0,62.13
301,Anthony Davis,-1,C,0.0,58.08
137,Evan Mobley,3.0,PF,101.0,50.97
191,Jayson Tatum,-1.0,SF,0.0,41.595
206,Nic Claxton,9T,C,7.0,41.135
408,Jarrett Allen,-1,C,0.0,40.705


---------------------------------------------------------------------


Unnamed: 0,Rank_1994,Rank_1995,Rank_1996,Rank_1997,Rank_1998,Rank_1999,Rank_2000,Rank_2001,Rank_2002,Rank_2003,...,Rank_2014,Rank_2015,Rank_2016,Rank_2017,Rank_2018,Rank_2019,Rank_2020,Rank_2021,Rank_2022,Rank_2023
0,2.0,1,3,2,1,1,2,-1.0,3T,1,...,1.0,4,1,1,15T,3.0,1.0,1.0,3.0,9T
1,1.0,4,4,5,4,3,5T,1,1,4,...,13T,1,2,2.0,1.0,2.0,2,9.0,-1.0,5.0
2,3.0,2,6,-1.0,2,2,1,1,-1,3,...,10.0,3.0,3,-1.0,3,1.0,3.0,-1,-1.0,1.0
3,-1.0,6T,-1.0,4,3,-1,3T,7,-1,-1,...,13T,8,4.0,-1,-1.0,-1,11,6.0,-1.0,2.0
4,-1.0,3,5,-1,5T,4,7,8T,2,8,...,5,-1.0,10,5T,15T,4,-1,-1,-1.0,6.0
5,-1.0,-1.0,2,1,5T,7T,-1,3,-1,-1,...,4.0,6.0,-1,-1.0,-1.0,-1,-1,11T,9.0,-1
6,-1.0,5,1,3,5T,5T,3T,-1,-1,11,...,7.0,10T,11T,-1.0,-1,5.0,-1.0,7,-1.0,3.0
7,-1.0,6T,7,6T,-1.0,7T,-1,-1.0,-1.0,13T,...,6,-1,-1.0,-1,8.0,-1,10.0,-1.0,6.0,-1.0
8,-1.0,-1,8T,-1,9T,-1,8T,-1.0,-1,-1.0,...,-1,13T,-1,-1,-1,-1.0,8T,3,-1.0,9T
9,-1.0,-1,8T,-1,11T,-1,-1.0,2,3T,6,...,2.0,7.0,13T,-1,6,6T,-1,-1,-1.0,-1


In [67]:
top = top_50

rank_data_50 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",50," Test year : ", year)
    
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a RandomForest Regressor model
    rf_model = RandomForestRegressor(n_estimators=200,random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_rf}')
    print(f'R-squared (XGBoost): {r2_rf}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_rf
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_50.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_50 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_50 = pd.concat([rank_data_50.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_50)

Selected Features Top  50  Test year :  1994
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 1.6601403716216219
R-squared (XGBoost): 0.550554152390029


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15651,Hakeem Olajuwon,1.0,C,23.0,25.795
15539,David Robinson,2.0,C,22.0,23.415
15743,Dikembe Mutombo,3.0,C,19.0,17.26
15613,Karl Malone,-1.0,PF,0.0,10.795
15710,Latrell Sprewell,-1.0,SG,0.0,9.205
15739,Patrick Ewing,-1.0,C,0.0,9.09
15616,Shaquille O'Neal,-1.0,C,0.0,8.995
15741,Tracy Moore,-1.0,SG,0.0,8.39
15646,Dell Demps,-1.0,PG,0.0,7.305
15643,John Stockton,-1.0,PG,0.0,7.19


---------------------------------------------------------------------
Selected Features Top  50  Test year :  1995
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 2.5389855813953486
R-squared (XGBoost): 0.5964442394728814


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15113,David Robinson,4,C,12.0,16.095
15026,Dikembe Mutombo,1,C,45.0,15.625
15199,Scottie Pippen,2,SF,16.0,11.145
14994,Hakeem Olajuwon,3,C,13.0,6.21
15151,Gary Payton,6T,PG,2.0,5.765
15264,John Stockton,-1.0,PG,0.0,5.065
15121,Dennis Rodman,5,PF,11.0,5.06
15249,Nate McMillan,6T,PG,2.0,4.3
15051,Patrick Ewing,-1,C,0.0,3.275
15045,Charles Oakley,-1,PF,0.0,3.25


---------------------------------------------------------------------
Selected Features Top  50  Test year :  1996
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 7.085261758691205
R-squared (XGBoost): 0.055655706025240725


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14887,Dikembe Mutombo,3,C,11.0,40.93
14897,David Robinson,4,C,9.0,14.305
14619,Michael Jordan,6,SG,7.0,11.145
14839,Shawn Bradley,-1.0,C,0.0,10.065
14749,Scottie Pippen,2,SF,15.0,9.05
14797,Hakeem Olajuwon,5,C,8.0,8.775
14909,Dennis Rodman,7,PF,4.0,8.3
14844,Gary Payton,1,PG,56.0,7.81
14555,Horace Grant,8T,PF,1.0,3.78
14802,Bo Outlaw,8T,PF,1.0,2.965


---------------------------------------------------------------------
Selected Features Top  50  Test year :  1997
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.826649266144814
R-squared (XGBoost): 0.2338400361099503


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14149,Gary Payton,2,PG,25.0,17.6
14081,Michael Jordan,5,SG,3.0,16.565
14141,Shawn Bradley,-1.0,C,0.0,16.335
14164,Scottie Pippen,4,SF,4.0,13.0
14333,Mookie Blaylock,3,PG,18.0,9.885
14489,Dikembe Mutombo,1,C,60.0,9.385
14210,Hakeem Olajuwon,-1,C,0.0,9.325
14277,David Robinson,-1,C,0.0,8.0
14279,Horace Grant,6T,PF,1.0,7.955
14269,Dennis Rodman,-1,PF,0.0,6.585


---------------------------------------------------------------------
Selected Features Top  50  Test year :  1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 1.8945124493927128
R-squared (XGBoost): 0.695622160768224


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13894,Dikembe Mutombo,1,C,39.0,28.325
13836,Michael Jordan,4,SG,6.0,19.135
13764,Gary Payton,2,PG,37.0,12.985
13815,Mookie Blaylock,5T,PG,4.0,7.17
13605,David Robinson,3,C,10.0,6.54
13637,Dennis Rodman,5T,PF,4.0,5.235
13842,Scottie Pippen,9T,SF,2.0,4.36
13889,Tim Duncan,5T,PF,4.0,3.79
13578,Marcus Camby,-1.0,C,0.0,3.26
13613,Horace Grant,11T,C,1.0,3.005


---------------------------------------------------------------------
Selected Features Top  50  Test year :  1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 8.97783639240506
R-squared (XGBoost): 0.4714409813813778


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13269,Alonzo Mourning,1,C,89.0,24.745
13182,Dikembe Mutombo,2,C,10.0,7.28
13424,Gary Payton,3,PG,6.0,6.85
13488,Tim Duncan,5T,PF,2.0,5.88
13434,David Robinson,4,C,3.0,5.73
13237,Eddie Jones,7T,SF,1.0,5.45
13406,Dennis Rodman,-1,PF,0.0,4.47
13397,Scottie Pippen,7T,SF,1.0,4.39
13380,Mookie Blaylock,-1,PG,0.0,4.025
13039,Hakeem Olajuwon,7T,C,1.0,3.23


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.026996527777777
R-squared (XGBoost): 0.3784021538849841


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12725,Shaquille O'Neal,2,C,21.0,23.01
12708,Gary Payton,5T,PG,4.0,21.13
12956,Alonzo Mourning,1,C,62.0,13.985
12841,Kevin Garnett,7,PF,2.0,7.68
12909,Dikembe Mutombo,3T,C,11.0,7.48
12987,David Robinson,-1,C,0.0,5.805
12830,Tim Duncan,-1,PF,0.0,5.435
12837,Anthony Mason,8T,SF,1.0,5.02
12741,Eddie Jones,3T,SG,11.0,4.87
12904,Kobe Bryant,5T,SG,4.0,4.225


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 6.990727857142855
R-squared (XGBoost): 0.3888923547181269


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12462,Shawn Bradley,-1.0,C,0.0,32.355
12113,Theo Ratliff,7,C,4.0,26.56
12510,Dikembe Mutombo,1,C,48.0,26.355
12512,Dikembe Mutombo,1,C,48.0,24.345
12403,Shaquille O'Neal,8T,C,2.0,10.88
12271,Tim Duncan,3,PF,14.0,8.865
12427,Gary Payton,-1,PG,0.0,5.875
12153,John Stockton,-1.0,PG,0.0,5.255
12106,David Robinson,5T,C,6.0,4.95
12335,Ray Allen,-1.0,SG,0.0,4.785


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 22.571587553191492
R-squared (XGBoost): 0.21015855389260363


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11941,Ben Wallace,1,C,116.0,18.765
12003,Dikembe Mutombo,3T,C,1.0,17.99
11852,Tim Duncan,-1,PF,0.0,17.965
11936,Kevin Garnett,2,PF,2.0,12.455
12041,Eddie Jones,-1,SG,0.0,10.645
11741,Jason Kidd,-1,PG,0.0,9.66
11884,Antoine Walker,-1.0,PF,0.0,8.285
11927,Shaquille O'Neal,-1,C,0.0,6.89
11950,Gary Payton,-1,PG,0.0,6.87
12005,Kobe Bryant,3T,SG,1.0,4.78


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 592.5626688048245
R-squared (XGBoost): 0.17166594751662967


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11137,Ben Wallace,1,C,531.0,50.47
11405,Tim Duncan,4,PF,90.0,20.4
11183,Kevin Garnett,3,PF,121.0,11.64
11559,Jason Kidd,-1,PG,0.0,10.44
11463,Kobe Bryant,8,SG,16.0,9.32
11176,Gary Payton,11,PG,5.0,8.895
11461,Dikembe Mutombo,-1,C,0.0,8.225
11177,Gary Payton,11,PG,5.0,5.935
11412,Shawn Marion,13T,SF,3.0,5.755
11231,Tracy McGrady,-1.0,SG,0.0,3.94


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 617.4934409090909
R-squared (XGBoost): 0.10668547512535476


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11127,Kevin Garnett,6.0,PF,36.0,113.68
10663,Ben Wallace,2.0,C,325.0,62.165
10664,Theo Ratliff,3.0,C,90.0,26.165
11009,Bo Outlaw,-1.0,PF,0.0,24.015
10650,Theo Ratliff,3.0,C,90.0,18.285
11017,Rasheed Wallace,-1.0,PF,0.0,18.27
10818,Tim Duncan,7.0,PF,8.0,13.585
11020,Baron Davis,-1.0,PG,0.0,8.3
10852,Bruce Bowen,4.0,SF,76.0,5.12
10871,Andrei Kirilenko,5.0,PF,67.0,4.005


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 392.88979876425867
R-squared (XGBoost): 0.05749603801906433


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10326,Tim Duncan,4,PF,81.0,39.08
10276,Dikembe Mutombo,-1,C,0.0,27.21
10343,Kevin Garnett,9,PF,30.0,23.88
10592,Andrei Kirilenko,10,PF,25.0,16.56
10261,Metta World Peace,-1,SF,0.0,13.105
10338,Shawn Marion,5,PF,57.0,10.485
10337,Allen Iverson,11,PG,10.0,8.245
10225,Bruce Bowen,2,SF,247.0,6.7
10515,Tayshaun Prince,7,SF,46.0,5.28
10455,Ben Wallace,1,C,339.0,5.025


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 256.65780532226563
R-squared (XGBoost): 0.5606656470902972


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,179.2
9704,Marcus Camby,5,C,55.0,68.615
9610,Bruce Bowen,2,SF,308.0,64.465
9691,Andrei Kirilenko,3,SF,121.0,34.34
9863,Shawn Marion,7,PF,33.0,31.205
9731,Metta World Peace,4,SF,65.0,29.62
10044,Tim Duncan,6,PF,42.0,23.18
9730,Metta World Peace,4,SF,65.0,22.575
9764,Pavel Podkolzin,-1.0,C,0.0,22.055
9625,Tayshaun Prince,13T,SF,1.0,18.1


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 355.38711940451753
R-squared (XGBoost): 0.35933602014944377


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9488,Renaldo Major,-1.0,PF,0.0,121.585
9339,Bruce Bowen,2,SF,206.0,97.235
9407,Marcus Camby,1,C,431.0,84.035
9414,Ben Wallace,6,C,42.0,69.895
9438,Tim Duncan,3,C,158.0,56.86
9210,Eddie Jones,-1,SG,0.0,36.12
9499,Gary Payton,-1,PG,0.0,29.03
9577,Theo Ratliff,-1,C,0.0,23.765
9356,Kevin Garnett,13T,PF,7.0,23.665
9418,Shawn Marion,4,SF,93.0,21.035


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2008
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 373.2114147058824
R-squared (XGBoost): 0.3739592395187449


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8762,Marcus Camby,2,C,178.0,114.465
8731,Kevin Garnett,1,PF,493.0,90.81
8949,Rasheed Wallace,12,C,9.0,65.955
8738,James Posey,-1,PF,0.0,54.68
8891,Tayshaun Prince,10,SF,13.0,48.375
8722,Shane Battier,3,SF,175.0,36.925
8784,Bruce Bowen,4,SF,80.0,34.55
8699,Chris Paul,7T,PG,24.0,31.785
8571,Kobe Bryant,5,SG,40.0,26.155
8630,Josh Smith,6,PF,34.0,24.58


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2009
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 630.6310582524271
R-squared (XGBoost): 0.030421300415560237


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
8365,Kevin Garnett,8,PF,31.0,295.13
8212,Dwight Howard,1,C,542.0,59.545
8484,LeBron James,2,SF,148.0,59.22
8426,Kobe Bryant,7,SG,43.0,53.595
8098,Bruce Bowen,-1,SF,0.0,46.225
8556,Chris Paul,6,PG,49.0,44.765
8483,Ben Wallace,-1,PF,0.0,16.81
8255,Joel Przybilla,16T,C,1.0,14.27
8442,Metta World Peace,5,SF,54.0,13.295
8370,Shane Battier,4,SG,71.0,13.045


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2010
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 560.6082243652343
R-squared (XGBoost): 0.2246938747051983


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7683,Tim Duncan,14,C,7.0,150.655
7939,Kevin Garnett,-1,PF,0.0,118.07
8018,Dwight Howard,1,C,576.0,111.975
7911,Raja Bell,-1,SG,0.0,77.5
7905,Raja Bell,-1,SG,0.0,74.355
8050,Metta World Peace,6,SF,29.0,54.29
8041,LeBron James,4,SF,61.0,48.235
7564,Kobe Bryant,12,SG,9.0,30.745
7813,Gerald Wallace,3,SF,113.0,30.19
7544,Dwyane Wade,10T,SG,13.0,16.495


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2011
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 177.6216235239852
R-squared (XGBoost): 0.7332166062027692


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
7366,Dwight Howard,1,C,585.0,340.535
7121,Kevin Garnett,2,PF,77.0,233.54
7400,Luol Deng,10.0,SF,24.0,53.565
7258,Tim Duncan,17,C,7.0,51.085
7321,Rajon Rondo,5,PG,45.0,40.175
7231,Chris Paul,12T,PG,13.0,32.995
7428,Metta World Peace,18,SF,6.0,29.075
7270,Shawn Marion,-1,SF,0.0,27.825
7186,LeBron James,9,SF,25.0,25.845
7523,Andrew Bogut,6.0,C,32.0,25.54


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2012
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 344.28610228155344
R-squared (XGBoost): 0.23923039411860625


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6527,Serge Ibaka,2.0,PF,294.0,63.92
6510,Dwight Howard,3,C,186.0,33.095
6987,Kevin Garnett,5,C,44.0,32.83
6572,LeBron James,4,SF,112.0,28.835
6484,Rajon Rondo,-1,PG,0.0,27.655
6943,Luol Deng,9.0,SF,16.0,25.815
6523,Andre Iguodala,7,SF,33.0,23.045
6916,Tim Duncan,-1,C,0.0,20.265
6779,Dwyane Wade,18T,SG,1.0,18.38
6511,Chris Paul,12T,PG,5.0,15.32


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2013
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 127.75910721797322
R-squared (XGBoost): 0.468945833105639


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
6206,Larry Sanders,7.0,C,90.0,143.46
6005,Serge Ibaka,3.0,PF,122.0,86.745
6094,Mike Conley,21.0,PG,1.0,70.32
6242,Paul George,8.0,SF,57.0,69.33
6419,Tim Duncan,6.0,C,94.0,68.885
5996,Dwight Howard,14.0,C,9.0,63.815
6084,LeBron James,2.0,PF,149.0,59.36
6245,Shane Battier,-1.0,SF,0.0,48.17
6217,Marc Gasol,1.0,C,212.0,40.055
6265,Kevin Garnett,-1.0,C,0.0,39.265


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2014
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 416.3604785583941
R-squared (XGBoost): 0.36585005666534476


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5847,Joakim Noah,1.0,C,555.0,117.74
5952,Paul George,7.0,SF,30.0,85.095
5813,Andrew Bogut,10.0,C,11.0,53.105
5804,Serge Ibaka,4.0,PF,79.0,46.62
5805,Andre Iguodala,5,SF,47.0,43.97
5641,Chris Paul,13T,PG,5.0,43.115
5514,Tim Duncan,13T,C,5.0,39.0
5898,DeAndre Jordan,3.0,C,121.0,33.08
5948,Roy Hibbert,2.0,C,166.0,27.11
5616,LeBron James,6,PF,31.0,26.375


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2015
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 298.6082389130434
R-squared (XGBoost): 0.4116986703280635


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
5393,Anthony Davis,4,PF,107.0,93.96
5314,Kawhi Leonard,1,SF,333.0,76.6
5011,DeAndre Jordan,3.0,C,261.0,74.605
5107,Draymond Green,2,PF,317.0,69.94
5285,Andrew Bogut,6.0,C,31.0,61.34
4915,Tim Duncan,8,C,12.0,48.555
5227,Andre Iguodala,-1,SF,0.0,41.32
4959,DeMarcus Cousins,-1.0,C,0.0,34.95
5073,Stephen Curry,-1.0,PG,0.0,31.64
5349,Chris Paul,15T,PG,1.0,31.295


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2016
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 301.9360070075757
R-squared (XGBoost): 0.670693772206917


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
4597,Draymond Green,2,PF,421.0,230.115
4426,Kawhi Leonard,1,SF,547.0,223.84
4570,Hassan Whiteside,3,C,83.0,118.665
4474,DeAndre Jordan,4.0,C,50.0,118.59
4820,Andre Drummond,10,C,3.0,69.33
4782,Tim Duncan,-1,C,0.0,50.565
4330,Paul George,-1.0,SF,0.0,35.325
4373,Rudy Gobert,7.0,C,13.0,34.605
4773,Andrew Bogut,-1.0,C,0.0,28.65
4524,LeBron James,11T,SF,2.0,27.52


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2017
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 177.22482984317347
R-squared (XGBoost): 0.6714820839227909


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3977,Rudy Gobert,2.0,C,269.0,392.45
3989,Draymond Green,1,PF,434.0,290.195
4129,Anthony Davis,-1,C,0.0,79.46
3870,Hassan Whiteside,5T,C,3.0,77.65
4154,DeAndre Jordan,-1.0,C,0.0,77.555
3982,Andre Drummond,-1,C,0.0,69.03
4131,Edy Tavares,-1.0,C,0.0,60.03
4134,Edy Tavares,-1.0,C,0.0,51.595
3988,Russell Westbrook,-1,PG,0.0,44.68
3837,Andrew Bogut,-1.0,C,0.0,35.86


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2018
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 317.5078967355371
R-squared (XGBoost): 0.2756516984544014


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
3403,Rudy Gobert,1.0,C,466.0,85.465
3655,Robert Covington,8.0,SF,8.0,50.695
3270,Draymond Green,6,PF,20.0,42.88
3469,Kawhi Leonard,-1,SF,0.0,34.15
3177,Anthony Davis,3,PF,139.0,30.67
3324,Andre Drummond,15T,C,1.0,29.92
3598,Victor Oladipo,15T,SG,1.0,24.62
3312,Dwight Howard,-1,C,0.0,21.385
3339,Luol Deng,-1.0,SF,0.0,20.025
3650,Joakim Noah,-1.0,C,0.0,18.215


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2019
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 68.20974971864953
R-squared (XGBoost): 0.8425791109540781


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2825,Rudy Gobert,1.0,C,411.0,296.275
3114,Giannis Antetokounmpo,2.0,PF,280.0,212.59
2562,Myles Turner,5.0,C,15.0,132.675
2932,Paul George,3.0,SF,150.0,95.825
2671,Anthony Davis,-1,C,0.0,54.995
2963,Joel Embiid,4,C,26.0,43.38
3148,Andre Drummond,-1,C,0.0,29.85
2738,Brook Lopez,-1.0,C,0.0,24.005
3120,Hassan Whiteside,-1,C,0.0,23.225
2876,Draymond Green,6T,PF,6.0,22.355


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2020
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 138.42512732656516
R-squared (XGBoost): 0.6874159047284185


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
2127,Giannis Antetokounmpo,1.0,PF,432.0,195.515
2524,Rudy Gobert,3.0,C,187.0,119.6
2270,Anthony Davis,2,PF,200.0,72.73
2017,Hassan Whiteside,11,C,3.0,41.705
2466,Jonathan Isaac,-1.0,PF,0.0,29.355
2094,Brook Lopez,10.0,C,4.0,26.695
2340,Joel Embiid,-1,C,0.0,25.325
2259,LeBron James,-1,PG,0.0,16.57
2198,Kawhi Leonard,8T,SF,5.0,10.905
2324,Marc Gasol,-1,C,0.0,10.77


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2021
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 153.22553897763578
R-squared (XGBoost): 0.6839066304305965


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1379,Rudy Gobert,1.0,C,464.0,375.26
1719,Myles Turner,9.0,C,3.0,55.205
1517,LeBron James,-1,PG,0.0,32.39
1554,Mike Conley,-1,PG,0.0,22.82
1422,Draymond Green,3,PF,76.0,22.425
1741,Matisse Thybulle,11T,SG,1.0,18.135
1716,Clint Capela,6.0,C,10.0,17.575
1610,Spencer Dinwiddie,-1.0,SG,0.0,15.64
1895,Giannis Antetokounmpo,5.0,PF,12.0,13.91
1399,Joel Embiid,7,C,7.0,13.055


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2022
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 175.93070639860142
R-squared (XGBoost): 0.18253727629454553


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
1022,Rudy Gobert,3.0,C,136.0,194.975
1184,Nikola Jokić,-1.0,C,0.0,95.11
933,Al Horford,9.0,C,3.0,65.4
1007,Joel Embiid,-1.0,C,0.0,55.525
1306,Bam Adebayo,4.0,C,128.0,47.975
1111,Giannis Antetokounmpo,6.0,PF,58.0,44.345
678,Chris Paul,-1.0,PG,0.0,42.82
627,Clint Capela,-1.0,C,0.0,38.005
713,Marcus Smart,1.0,PG,257.0,37.14
1123,Draymond Green,10.0,PF,2.0,36.72


---------------------------------------------------------------------
Selected Features Top  50  Test year :  2023
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 277.3736577586207
R-squared (XGBoost): 0.34814930274804434


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
323,Brook Lopez,2.0,C,309.0,80.785
59,Jaren Jackson Jr.,1.0,C,391.0,76.16
301,Anthony Davis,-1,C,0.0,66.05
497,Joel Embiid,9T,C,7.0,61.2
137,Evan Mobley,3.0,PF,101.0,54.35
176,Giannis Antetokounmpo,6.0,PF,14.0,46.155
158,Jrue Holiday,7T,PG,8.0,40.215
524,Bam Adebayo,5.0,C,18.0,37.925
392,Jacob Gilyard,-1.0,PG,0.0,33.455
206,Nic Claxton,9T,C,7.0,30.73


---------------------------------------------------------------------


Unnamed: 0,Rank_1994,Rank_1995,Rank_1996,Rank_1997,Rank_1998,Rank_1999,Rank_2000,Rank_2001,Rank_2002,Rank_2003,...,Rank_2014,Rank_2015,Rank_2016,Rank_2017,Rank_2018,Rank_2019,Rank_2020,Rank_2021,Rank_2022,Rank_2023
0,1.0,4,3,2,1,1,2,-1.0,1,1,...,1.0,4,2,2.0,1.0,1.0,1.0,1.0,3.0,2.0
1,2.0,1,4,5,4,2,5T,7,3T,4,...,7.0,1,1,1,8.0,2.0,3.0,9.0,-1.0,1.0
2,3.0,2,6,-1.0,2,3,1,1,-1,3,...,10.0,3.0,3,-1,6,5.0,2,-1,9.0,-1
3,-1.0,3,-1.0,4,5T,5T,7,1,2,-1,...,4.0,2,4.0,5T,-1,3.0,11,-1,-1.0,9T
4,-1.0,6T,2,3,3,4,3T,8T,-1,8,...,5,6.0,10,-1.0,3,-1,-1.0,3,4.0,3.0
5,-1.0,-1.0,5,1,5T,7T,-1,3,-1,11,...,13T,8,-1,-1,15T,4,10.0,11T,6.0,6.0
6,-1.0,5,7,-1,9T,-1,-1,-1,-1.0,-1,...,13T,-1,-1.0,-1.0,15T,-1,-1,6.0,-1.0,7T
7,-1.0,6T,1,-1,5T,7T,8T,-1.0,-1,11,...,3.0,-1.0,7.0,-1.0,-1,-1.0,-1,-1.0,-1.0,5.0
8,-1.0,-1,8T,6T,-1.0,-1,3T,5T,-1,13T,...,2.0,-1.0,-1.0,-1,-1.0,-1,8T,5.0,1.0,-1.0
9,-1.0,-1,8T,-1,11T,7T,5T,-1.0,3T,-1.0,...,6,15T,11T,-1.0,-1.0,6T,-1,7,10.0,9T


In [68]:
rank_data_10.to_csv('RDF+MI_All_rank_data_10.csv', index=False)
rank_data_20.to_csv('RDF+MI_All_rank_data_20.csv', index=False)
rank_data_30.to_csv('RDF+MI_All_rank_data_30.csv', index=False)
rank_data_40.to_csv('RDF+MI_All_rank_data_40.csv', index=False)
rank_data_50.to_csv('RDF+MI_All_rank_data_50.csv', index=False)