In [23]:
import pandas as pd

from xgboost import XGBRegressor

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

In [24]:
F_selection = pd.read_csv("F_Selection/top_N_features_SelectFromModel_XGBAll.csv")

# Extract top 10, 20, 30, and 50 features
top_10 = F_selection["Feature"].head(10).tolist()
top_20 = F_selection["Feature"].head(20).tolist()
top_30 = F_selection["Feature"].head(30).tolist()
top_40 = F_selection["Feature"].head(40).tolist()
top_50 = F_selection["Feature"].head(50).tolist()

In [25]:
def Data_Organizer(raw_Data,top):
    
    All_players_dataSet = raw_Data.copy()
    
    All_players_dataSet = All_players_dataSet.drop(columns=All_players_dataSet.columns[All_players_dataSet.columns.str.contains('Unnamed:')])
    All_players_dataSet.reset_index(drop=True, inplace=True)
    
    All_players_dataSet.fillna(0, inplace=True)
    All_players_dataSet.replace('', 0, inplace=True)
    All_players_dataSet.replace('--', 0, inplace=True)
    
    # check if any NaN,empty Strings exists in the dataframe
    any_missing_values = All_players_dataSet.isna().any().any()
    any_empty_values = (All_players_dataSet.applymap(lambda x: x == '')).any().any()
    
    if any_missing_values or any_empty_values:
        print("DataFrame contains missing values or empty strings/spaces.")
    else:
        print("DataFrame does not contain missing values or empty strings/spaces.")
        
    ## seperating our dataFrame
    Y = All_players_dataSet['Points_won']  # Target
    X = All_players_dataSet.drop('Points_won', axis=1)  # Features
    
    X = X.apply(pd.to_numeric, errors='coerce')

    ## keep the specified columns
    columns_to_keep = top
    
    ## keep only wanted columns
    X = X[columns_to_keep]

    return X, Y

In [26]:
all_players_w_add_sorted = pd.read_csv("all_players_w_add_sorted.csv")

  all_players_w_add_sorted = pd.read_csv("all_players_w_add_sorted.csv")


In [27]:
top = top_10

rank_data_10 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",10," Test year : ", year)
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a XGBoost Regressor model
    xgb_model = XGBRegressor(n_estimators=200,max_depth = 5, random_state=42, n_jobs=-1)
    xgb_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_xgb = xgb_model.predict(X_test)
    
    # Evaluate the model
    mse_xgb = mean_squared_error(y_test, y_pred_xgb)
    r2_xgb = r2_score(y_test, y_pred_xgb)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_xgb}')
    print(f'R-squared (XGBoost): {r2_xgb}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_xgb
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)
    
    if rank_data_10.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_10 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_10 = pd.concat([rank_data_10.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_10)

Selected Features Top  10  Test year :  1994
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 7.188681032209482
R-squared (XGBoost): -0.9461744891868107


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15651,Hakeem Olajuwon,1,C,23.0,72.997528
15477,Scottie Pippen,4,SF,11.0,8.936706
15583,Mookie Blaylock,7T,PG,3.0,8.917365
15539,David Robinson,2,C,22.0,8.903131
15435,Otis Thorpe,-1.0,PF,0.0,1.894845
15531,LaPhonso Ellis,-1.0,PF,0.0,0.996227
15784,Gary Payton,5,PG,9.0,0.87992
15743,Dikembe Mutombo,3,C,19.0,0.76731
15739,Patrick Ewing,-1,C,0.0,0.651609
15649,Charles Oakley,9,PF,2.0,0.336138


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1995
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 3.3158190297360655
R-squared (XGBoost): 0.4729714575298841


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
15026,Dikembe Mutombo,1.0,C,45.0,72.979698
14994,Hakeem Olajuwon,3.0,C,13.0,22.994846
15199,Scottie Pippen,2.0,SF,16.0,21.997505
15051,Patrick Ewing,-1.0,C,0.0,18.721085
15113,David Robinson,4.0,C,12.0,13.751002
15077,Mookie Blaylock,-1.0,PG,0.0,2.693089
15394,Karl Malone,-1.0,PF,0.0,2.044275
15174,Vlade Divac,-1.0,C,0.0,1.938754
15241,Rod Strickland,-1.0,PG,0.0,0.742767
15374,Toni Kukoč,-1.0,SF,0.0,0.569358


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1996
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 3.1873954784472835
R-squared (XGBoost): 0.5751746604110433


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14844,Gary Payton,1.0,PG,56.0,48.420063
14887,Dikembe Mutombo,3.0,C,11.0,44.880505
14797,Hakeem Olajuwon,5.0,C,8.0,12.983806
14897,David Robinson,4.0,C,9.0,7.941574
14672,Ervin Johnson,-1.0,C,0.0,6.845862
14949,Patrick Ewing,-1.0,C,0.0,6.406985
14619,Michael Jordan,6.0,SG,7.0,5.330116
14945,Otis Thorpe,-1.0,PF,0.0,5.233335
14749,Scottie Pippen,2.0,SF,15.0,5.002176
14770,Sam Perkins,-1.0,PF,0.0,4.47647


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1997
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 8.840227082481755
R-squared (XGBoost): 0.00785468855367677


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
14149,Gary Payton,2,PG,25.0,71.836929
14489,Dikembe Mutombo,1,C,60.0,17.815456
14210,Hakeem Olajuwon,-1,C,0.0,14.324781
14164,Scottie Pippen,4,SF,4.0,10.616818
14184,Grant Hill,-1.0,SF,0.0,10.557968
14333,Mookie Blaylock,3,PG,18.0,9.371
14492,P.J. Brown,-1.0,PF,0.0,6.226767
14061,Christian Laettner,-1.0,PF,0.0,5.221625
13992,Shawn Kemp,6T,PF,1.0,4.580391
14386,Lindsey Hunter,-1.0,PG,0.0,2.221333


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1998
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 1.2936162445882118
R-squared (XGBoost): 0.792163879710001


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13894,Dikembe Mutombo,1,C,39.0,25.482309
13764,Gary Payton,2,PG,37.0,21.051516
13740,Hakeem Olajuwon,-1,C,0.0,5.196792
13601,Kevin Garnett,11T,PF,1.0,2.224842
13605,David Robinson,3,C,10.0,1.826423
13956,Charles Oakley,-1,PF,0.0,1.721777
13889,Tim Duncan,5T,PF,4.0,1.4575
13836,Michael Jordan,4,SG,6.0,1.365962
13931,Eddie Jones,-1.0,SG,0.0,1.089939
13594,Ed Stokes,-1.0,C,0.0,1.063559


---------------------------------------------------------------------
Selected Features Top  10  Test year :  1999
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 9.991612783167824
R-squared (XGBoost): 0.41175614967141283


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
13182,Dikembe Mutombo,2,C,10.0,32.21278
13269,Alonzo Mourning,1,C,89.0,31.90781
13424,Gary Payton,3,PG,6.0,29.479237
13039,Hakeem Olajuwon,7T,C,1.0,21.344755
13434,David Robinson,4,C,3.0,2.054426
13438,Jason Kidd,-1.0,PG,0.0,1.463236
13488,Tim Duncan,5T,PF,2.0,0.647298
13404,Anfernee Hardaway,-1.0,SG,0.0,0.561247
13165,Patrick Ewing,-1,C,0.0,0.534652
13371,Darrell Armstrong,-1.0,PG,0.0,0.471588


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2000
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 4.135180249234092
R-squared (XGBoost): 0.5735157429783672


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12956,Alonzo Mourning,1,C,62.0,55.762207
12909,Dikembe Mutombo,3T,C,11.0,37.42997
12708,Gary Payton,5T,PG,4.0,31.918678
12987,David Robinson,-1,C,0.0,10.83035
12725,Shaquille O'Neal,2,C,21.0,8.443408
12830,Tim Duncan,-1,PF,0.0,3.197887
12750,Hakeem Olajuwon,-1,C,0.0,1.308552
12843,Marcus Camby,-1.0,C,0.0,1.025028
12841,Kevin Garnett,7,PF,2.0,0.695461
12904,Kobe Bryant,5T,SG,4.0,0.65672


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2001
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 9.238885147636998
R-squared (XGBoost): 0.1923654499247458


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12210,Alonzo Mourning,-1,C,0.0,28.3325
12427,Gary Payton,-1,PG,0.0,11.304347
12512,Dikembe Mutombo,1,C,48.0,10.874723
12510,Dikembe Mutombo,1,C,48.0,10.872789
12106,David Robinson,5T,C,6.0,6.600447
12418,Ben Wallace,5T,C,6.0,5.819444
12271,Tim Duncan,3,PF,14.0,5.521194
12391,Hakeem Olajuwon,-1,C,0.0,4.875531
12077,Chris Webber,-1.0,PF,0.0,3.233529
12274,Shawn Marion,11T,SF,1.0,2.56406


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2002
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 28.02562155628629
R-squared (XGBoost): 0.019307020123805585


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
12003,Dikembe Mutombo,3T,C,1.0,36.513016
12035,Alonzo Mourning,-1,C,0.0,29.563536
11950,Gary Payton,-1,PG,0.0,19.559023
11941,Ben Wallace,1,C,116.0,14.128008
11852,Tim Duncan,-1,PF,0.0,13.018267
11600,Rasheed Wallace,-1.0,PF,0.0,7.504123
11935,Elton Brand,-1.0,PF,0.0,3.419789
11927,Shaquille O'Neal,-1,C,0.0,3.343868
12041,Eddie Jones,-1,SG,0.0,3.318942
11936,Kevin Garnett,2,PF,2.0,2.656851


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2003
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 587.8479247309855
R-squared (XGBoost): 0.17825661424388417


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
11137,Ben Wallace,1.0,C,531.0,57.00045
11461,Dikembe Mutombo,-1.0,C,0.0,18.858419
11405,Tim Duncan,4.0,PF,90.0,13.392841
11559,Jason Kidd,-1.0,PG,0.0,13.058007
11505,Dirk Nowitzki,-1.0,PF,0.0,9.840309
11183,Kevin Garnett,3.0,PF,121.0,3.938565
11516,Jermaine O'Neal,-1.0,PF,0.0,2.099356
11219,Chris Webber,-1.0,PF,0.0,1.830449
11173,Elton Brand,-1.0,PF,0.0,1.765511
11495,Paul Pierce,-1.0,SG,0.0,1.711485


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2004
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 1015.3230661444687
R-squared (XGBoost): -0.46884611614950855


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10663,Ben Wallace,2.0,C,325.0,528.510559
11127,Kevin Garnett,6.0,PF,36.0,525.108582
10871,Andrei Kirilenko,5.0,PF,67.0,80.550545
10721,Gary Payton,-1.0,PG,0.0,23.708796
10818,Tim Duncan,7.0,PF,8.0,16.501728
10722,Metta World Peace,1.0,SF,476.0,8.435342
10865,Alonzo Mourning,-1.0,C,0.0,5.736508
10992,Steve Francis,-1.0,PG,0.0,3.801142
10798,Allen Iverson,-1.0,SG,0.0,2.755883
10720,Tayshaun Prince,-1.0,SF,0.0,2.284139


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2005
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 403.28645364488847
R-squared (XGBoost): 0.03255548612089321


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
10592,Andrei Kirilenko,10,PF,25.0,51.23996
10343,Kevin Garnett,9,PF,30.0,28.999962
10326,Tim Duncan,4,PF,81.0,23.586596
10188,Andre Iguodala,14T,SG,4.0,21.175144
10355,LeBron James,-1.0,SF,0.0,16.579062
10151,Manu Ginóbili,18T,SG,2.0,16.537804
10396,Tony Parker,-1.0,PG,0.0,12.917602
10337,Allen Iverson,11,PG,10.0,10.833891
10593,Robert Horry,-1.0,PF,0.0,10.020588
10395,Gerald Wallace,-1.0,SF,0.0,8.251456


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2006
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 220.97069208853048
R-squared (XGBoost): 0.6217531124805351


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9841,Ben Wallace,1,C,420.0,317.187012
9704,Marcus Camby,5,C,55.0,105.699554
9951,Chris Paul,-1.0,PG,0.0,69.537193
9691,Andrei Kirilenko,3,SF,121.0,60.233204
10019,Shane Battier,10T,SF,3.0,44.110439
9745,Alonzo Mourning,8,C,29.0,34.463917
9607,Tony Parker,-1.0,PG,0.0,22.337772
9863,Shawn Marion,7,PF,33.0,20.979094
9610,Bruce Bowen,2,SF,308.0,20.082518
9731,Metta World Peace,4,SF,65.0,19.98843


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2007
DataFrame does not contain missing values or empty strings/spaces.
DataFrame does not contain missing values or empty strings/spaces.
Mean Squared Error (XGBoost): 624.7879315250538
R-squared (XGBoost): -0.1263185999654144


Unnamed: 0,Player_name,Rank,Pos,Points_won,Predicted_points
9414,Ben Wallace,6,C,42.0,338.406433
9418,Shawn Marion,4,SF,93.0,63.929554
9407,Marcus Camby,1,C,431.0,44.650879
9438,Tim Duncan,3,C,158.0,40.64201
9194,Josh Smith,21T,SF,3.0,38.993099
9566,Metta World Peace,8,SF,20.0,20.449396
9356,Kevin Garnett,13T,PF,7.0,17.81518
9203,Manu Ginóbili,-1,SG,0.0,16.118717
9489,LeBron James,-1.0,SF,0.0,11.372885
9344,Dikembe Mutombo,-1,C,0.0,9.246873


---------------------------------------------------------------------
Selected Features Top  10  Test year :  2008
DataFrame does not contain missing values or empty strings/spaces.


KeyboardInterrupt: 

In [None]:
top = top_20

rank_data_20 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",20," Test year : ", year)
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a XGBoost Regressor model
    xgb_model = XGBRegressor(n_estimators=200,max_depth = 5, random_state=42, n_jobs=-1)
    xgb_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_xgb = xgb_model.predict(X_test)
    
    # Evaluate the model
    mse_xgb = mean_squared_error(y_test, y_pred_xgb)
    r2_xgb = r2_score(y_test, y_pred_xgb)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_xgb}')
    print(f'R-squared (XGBoost): {r2_xgb}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_xgb
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)
    
    if rank_data_20.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_20 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_20 = pd.concat([rank_data_20.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_20)

In [None]:
top = top_30

rank_data_30 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",30," Test year : ", year)
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a XGBoost Regressor model
    xgb_model = XGBRegressor(n_estimators=200,max_depth = 5, random_state=42, n_jobs=-1)
    xgb_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_xgb = xgb_model.predict(X_test)
    
    # Evaluate the model
    mse_xgb = mean_squared_error(y_test, y_pred_xgb)
    r2_xgb = r2_score(y_test, y_pred_xgb)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_xgb}')
    print(f'R-squared (XGBoost): {r2_xgb}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_xgb
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_30.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_30 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_30 = pd.concat([rank_data_30.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_30)

In [None]:
top = top_40

rank_data_40 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",40," Test year : ", year)
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a XGBoost Regressor model
    xgb_model = XGBRegressor(n_estimators=200,max_depth = 5, random_state=42, n_jobs=-1)
    xgb_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_xgb = xgb_model.predict(X_test)
    
    # Evaluate the model
    mse_xgb = mean_squared_error(y_test, y_pred_xgb)
    r2_xgb = r2_score(y_test, y_pred_xgb)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_xgb}')
    print(f'R-squared (XGBoost): {r2_xgb}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_xgb
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_40.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_40 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_40 = pd.concat([rank_data_40.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_40)

In [None]:
top = top_50

rank_data_50 = pd.DataFrame()

for year in range(1994,2024):
    print("Selected Features Top ",50," Test year : ", year)
    Test_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] == year]
    Train_Data = all_players_w_add_sorted[all_players_w_add_sorted['Year'] < year]
    
    X_train, y_train = Data_Organizer(Train_Data,top)
    X_test , y_test = Data_Organizer(Test_Data,top)
    
    # Initialize and train a XGBoost Regressor model
    xgb_model = XGBRegressor(n_estimators=200,max_depth = 5, random_state=42, n_jobs=-1)
    xgb_model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_xgb = xgb_model.predict(X_test)
    
    # Evaluate the model
    mse_xgb = mean_squared_error(y_test, y_pred_xgb)
    r2_xgb = r2_score(y_test, y_pred_xgb)
    
    # Mean Squared Error (MAE)
    print(f'Mean Squared Error (XGBoost): {mse_xgb}')
    print(f'R-squared (XGBoost): {r2_xgb}')
    
    Test_Data_copy = Test_Data.copy()
    Test_Data_copy.loc[:, 'Predicted_points'] = y_pred_xgb
    
    # Select only the desired columns
    Test_Data_concatenated = Test_Data_copy[['Player_name','Rank','Pos','Points_won', 'Predicted_points']]
    
    # Display the concatenated DataFrame sorted by 'Points_won'
    display(Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10))

    Test_Data_concatenated = Test_Data_concatenated.sort_values(by='Predicted_points', ascending=False).head(10)

    if rank_data_50.empty:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_50 = pd.DataFrame(new_rank_column)
    else:
        new_rank_column = Test_Data_concatenated['Rank'].head(10).rename(f'Rank_{year}')
        rank_data_50 = pd.concat([rank_data_50.reset_index(drop=True), new_rank_column.reset_index(drop=True)], axis=1)

    print("---------------------------------------------------------------------")

display(rank_data_50)

In [None]:
rank_data_10.to_csv('XGB+XGB_All_rank_data_10.csv', index=False)
rank_data_20.to_csv('XGB+XGB_All_rank_data_20.csv', index=False)
rank_data_30.to_csv('XGB+XGB_All_rank_data_30.csv', index=False)
rank_data_40.to_csv('XGB+XGB_All_rank_data_40.csv', index=False)
rank_data_50.to_csv('XGB+XGB_All_rank_data_50.csv', index=False)