In [6]:
#import all required packages
import pandas as pd
import numpy as np
import os
import sklearn
import xgboost as xgb
import matplotlib.pyplot as plt
import joblib
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error , mean_squared_error, make_scorer
from sklearn import tree, metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.preprocessing import LabelEncoder

In [7]:
#mounting our google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**LOADING THE FIFA 22 DATASET**


In [8]:
##importing our dataset
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/AI/Midsem/players_21.csv')

## **Demonstrate the data preparation & feature extraction process [5]**

In [9]:
df.head()

Unnamed: 0,sofifa_id,player_url,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,age,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,"RW, ST, CF",93,93,103500000.0,560000.0,33,...,52+3,52+3,52+3,62+3,19+3,https://cdn.sofifa.net/players/158/023/21_120.png,https://cdn.sofifa.net/teams/241/60.png,https://cdn.sofifa.net/flags/es.png,https://cdn.sofifa.net/teams/1369/60.png,https://cdn.sofifa.net/flags/ar.png
1,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",92,92,63000000.0,220000.0,35,...,54+3,54+3,54+3,61+3,20+3,https://cdn.sofifa.net/players/020/801/21_120.png,https://cdn.sofifa.net/teams/45/60.png,https://cdn.sofifa.net/flags/it.png,https://cdn.sofifa.net/teams/1354/60.png,https://cdn.sofifa.net/flags/pt.png
2,188545,https://sofifa.com/player/188545/robert-lewand...,R. Lewandowski,Robert Lewandowski,ST,91,91,111000000.0,240000.0,31,...,60+3,60+3,60+3,61+3,19+3,https://cdn.sofifa.net/players/188/545/21_120.png,https://cdn.sofifa.net/teams/21/60.png,https://cdn.sofifa.net/flags/de.png,,https://cdn.sofifa.net/flags/pl.png
3,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Júnior,"LW, CAM",91,91,132000000.0,270000.0,28,...,49+3,49+3,49+3,62+3,20+3,https://cdn.sofifa.net/players/190/871/21_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,,https://cdn.sofifa.net/flags/br.png
4,192985,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,"CAM, CM",91,91,129000000.0,370000.0,29,...,69+3,69+3,69+3,75+3,21+3,https://cdn.sofifa.net/players/192/985/21_120.png,https://cdn.sofifa.net/teams/10/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1325/60.png,https://cdn.sofifa.net/flags/be.png


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18944 entries, 0 to 18943
Columns: 110 entries, sofifa_id to nation_flag_url
dtypes: float64(16), int64(44), object(50)
memory usage: 15.9+ MB


HANDLING THE NULL NUMERICAL VALUES

In [11]:
#checking for missing values
numeric_columns = df.select_dtypes(include=['float64','int64'])
missing_values = numeric_columns.isnull().sum()
missing_values

sofifa_id                          0
overall                            0
potential                          0
value_eur                        237
wage_eur                         225
age                                0
height_cm                          0
weight_kg                          0
club_team_id                     225
league_level                     225
club_jersey_number               225
club_contract_valid_until        225
nationality_id                     0
nation_team_id                 17817
nation_jersey_number           17817
weak_foot                          0
skill_moves                        0
international_reputation           0
release_clause_eur               995
pace                            2083
shooting                        2083
passing                         2083
dribbling                       2083
defending                       2083
physic                          2083
attacking_crossing                 0
attacking_finishing                0
a

In [12]:
#replacing missing values with mean of its columns
df[numeric_columns.columns] = df[numeric_columns.columns].fillna(numeric_columns.mean())
df



Unnamed: 0,sofifa_id,player_url,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,age,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,"RW, ST, CF",93,93,103500000.0,560000.0,33,...,52+3,52+3,52+3,62+3,19+3,https://cdn.sofifa.net/players/158/023/21_120.png,https://cdn.sofifa.net/teams/241/60.png,https://cdn.sofifa.net/flags/es.png,https://cdn.sofifa.net/teams/1369/60.png,https://cdn.sofifa.net/flags/ar.png
1,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",92,92,63000000.0,220000.0,35,...,54+3,54+3,54+3,61+3,20+3,https://cdn.sofifa.net/players/020/801/21_120.png,https://cdn.sofifa.net/teams/45/60.png,https://cdn.sofifa.net/flags/it.png,https://cdn.sofifa.net/teams/1354/60.png,https://cdn.sofifa.net/flags/pt.png
2,188545,https://sofifa.com/player/188545/robert-lewand...,R. Lewandowski,Robert Lewandowski,ST,91,91,111000000.0,240000.0,31,...,60+3,60+3,60+3,61+3,19+3,https://cdn.sofifa.net/players/188/545/21_120.png,https://cdn.sofifa.net/teams/21/60.png,https://cdn.sofifa.net/flags/de.png,,https://cdn.sofifa.net/flags/pl.png
3,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Júnior,"LW, CAM",91,91,132000000.0,270000.0,28,...,49+3,49+3,49+3,62+3,20+3,https://cdn.sofifa.net/players/190/871/21_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,,https://cdn.sofifa.net/flags/br.png
4,192985,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,"CAM, CM",91,91,129000000.0,370000.0,29,...,69+3,69+3,69+3,75+3,21+3,https://cdn.sofifa.net/players/192/985/21_120.png,https://cdn.sofifa.net/teams/10/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1325/60.png,https://cdn.sofifa.net/flags/be.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18939,257710,https://sofifa.com/player/257710/mengxuan-zhan...,Zhang Mengxuan,张梦炫,CB,47,52,70000.0,1000.0,21,...,47+2,47+2,47+2,45+2,15+2,https://cdn.sofifa.net/players/257/710/21_120.png,https://cdn.sofifa.net/teams/112165/60.png,https://cdn.sofifa.net/flags/cn.png,,https://cdn.sofifa.net/flags/cn.png
18940,257933,https://sofifa.com/player/257933/wenzhuo-huang...,Huang Wenzhou,黄文卓,CM,47,53,70000.0,1000.0,21,...,46+2,46+2,46+2,47+2,15+2,https://cdn.sofifa.net/players/257/933/21_120.png,https://cdn.sofifa.net/teams/112540/60.png,https://cdn.sofifa.net/flags/cn.png,,https://cdn.sofifa.net/flags/cn.png
18941,257936,https://sofifa.com/player/257936/yue-song/210002,Song Yue,宋岳,CM,47,47,45000.0,2000.0,28,...,46+1,46+1,46+1,47,11+2,https://cdn.sofifa.net/players/257/936/21_120.png,https://cdn.sofifa.net/teams/111774/60.png,https://cdn.sofifa.net/flags/cn.png,,https://cdn.sofifa.net/flags/cn.png
18942,258736,https://sofifa.com/player/258736/vani-da-silva...,V. Da Silva,Ivanilson Loforte Tique Da Silva,ST,47,67,130000.0,500.0,17,...,26+2,26+2,26+2,33+2,14+2,https://cdn.sofifa.net/players/258/736/21_120.png,https://cdn.sofifa.net/teams/1920/60.png,https://cdn.sofifa.net/flags/gb-eng.png,,https://cdn.sofifa.net/flags/gb-eng.png


In [13]:
#columns of type object
objects_columns = df.select_dtypes(include=['object'])
objects_columns

Unnamed: 0,player_url,short_name,long_name,player_positions,dob,club_name,league_name,club_position,club_loaned_from,club_joined,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
0,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,"RW, ST, CF",1987-06-24,FC Barcelona,Spain Primera Division,CAM,,2004-07-01,...,52+3,52+3,52+3,62+3,19+3,https://cdn.sofifa.net/players/158/023/21_120.png,https://cdn.sofifa.net/teams/241/60.png,https://cdn.sofifa.net/flags/es.png,https://cdn.sofifa.net/teams/1369/60.png,https://cdn.sofifa.net/flags/ar.png
1,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",1985-02-05,Juventus,Italian Serie A,LS,,2018-07-10,...,54+3,54+3,54+3,61+3,20+3,https://cdn.sofifa.net/players/020/801/21_120.png,https://cdn.sofifa.net/teams/45/60.png,https://cdn.sofifa.net/flags/it.png,https://cdn.sofifa.net/teams/1354/60.png,https://cdn.sofifa.net/flags/pt.png
2,https://sofifa.com/player/188545/robert-lewand...,R. Lewandowski,Robert Lewandowski,ST,1988-08-21,FC Bayern München,German 1. Bundesliga,ST,,2014-07-01,...,60+3,60+3,60+3,61+3,19+3,https://cdn.sofifa.net/players/188/545/21_120.png,https://cdn.sofifa.net/teams/21/60.png,https://cdn.sofifa.net/flags/de.png,,https://cdn.sofifa.net/flags/pl.png
3,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Júnior,"LW, CAM",1992-02-05,Paris Saint-Germain,French Ligue 1,LW,,2017-08-03,...,49+3,49+3,49+3,62+3,20+3,https://cdn.sofifa.net/players/190/871/21_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,,https://cdn.sofifa.net/flags/br.png
4,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,"CAM, CM",1991-06-28,Manchester City,English Premier League,RCM,,2015-08-30,...,69+3,69+3,69+3,75+3,21+3,https://cdn.sofifa.net/players/192/985/21_120.png,https://cdn.sofifa.net/teams/10/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1325/60.png,https://cdn.sofifa.net/flags/be.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18939,https://sofifa.com/player/257710/mengxuan-zhan...,Zhang Mengxuan,张梦炫,CB,1999-04-26,Chongqing Liangjiang Athletic,Chinese Super League,SUB,,2020-08-01,...,47+2,47+2,47+2,45+2,15+2,https://cdn.sofifa.net/players/257/710/21_120.png,https://cdn.sofifa.net/teams/112165/60.png,https://cdn.sofifa.net/flags/cn.png,,https://cdn.sofifa.net/flags/cn.png
18940,https://sofifa.com/player/257933/wenzhuo-huang...,Huang Wenzhou,黄文卓,CM,1999-01-07,Shanghai Port FC,Chinese Super League,RES,,2020-08-01,...,46+2,46+2,46+2,47+2,15+2,https://cdn.sofifa.net/players/257/933/21_120.png,https://cdn.sofifa.net/teams/112540/60.png,https://cdn.sofifa.net/flags/cn.png,,https://cdn.sofifa.net/flags/cn.png
18941,https://sofifa.com/player/257936/yue-song/210002,Song Yue,宋岳,CM,1991-11-20,Tianjin Jinmen Tiger FC,Chinese Super League,RES,,2020-08-01,...,46+1,46+1,46+1,47,11+2,https://cdn.sofifa.net/players/257/936/21_120.png,https://cdn.sofifa.net/teams/111774/60.png,https://cdn.sofifa.net/flags/cn.png,,https://cdn.sofifa.net/flags/cn.png
18942,https://sofifa.com/player/258736/vani-da-silva...,V. Da Silva,Ivanilson Loforte Tique Da Silva,ST,2003-03-30,Oldham Athletic,English League Two,SUB,,2020-08-01,...,26+2,26+2,26+2,33+2,14+2,https://cdn.sofifa.net/players/258/736/21_120.png,https://cdn.sofifa.net/teams/1920/60.png,https://cdn.sofifa.net/flags/gb-eng.png,,https://cdn.sofifa.net/flags/gb-eng.png


In [14]:
# dropping the follwing columns from the dataset
positions =  df[['ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram',
                  'lm', 'lcm', 'cm', 'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb',
                  'lb', 'lcb', 'cb', 'rcb', 'rb', 'gk']]

df = df.drop(columns=positions)


In [15]:
#encoding the categorical variables using labelecoding
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

# Iterate over each column in the DataFrame and apply label encoding
positions_copy = positions.copy()

# Iterate over each column in the copy and apply label encoding
for column in positions_copy.columns:
    positions_copy[column] = label_encoder.fit_transform(positions_copy[column])


In [16]:
positions

Unnamed: 0,ls,st,rs,lw,lf,cf,rf,rw,lam,cam,...,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,gk
0,89+3,89+3,89+3,92,93,93,93,92,93,93,...,65+3,65+3,65+3,66+3,62+3,52+3,52+3,52+3,62+3,19+3
1,91+1,91+1,91+1,89,91,91,91,89,88+3,88+3,...,61+3,61+3,61+3,65+3,61+3,54+3,54+3,54+3,61+3,20+3
2,89+2,89+2,89+2,85,87,87,87,85,85+3,85+3,...,65+3,65+3,65+3,64+3,61+3,60+3,60+3,60+3,61+3,19+3
3,84+3,84+3,84+3,90,89,89,89,90,90+1,90+1,...,62+3,62+3,62+3,67+3,62+3,49+3,49+3,49+3,62+3,20+3
4,83+3,83+3,83+3,88,88,88,88,88,89+2,89+2,...,80+3,80+3,80+3,79+3,75+3,69+3,69+3,69+3,75+3,21+3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18939,32+2,32+2,32+2,30,30,30,30,30,29+2,29+2,...,41+2,41+2,41+2,42+2,45+2,47+2,47+2,47+2,45+2,15+2
18940,41+2,41+2,41+2,45,44,44,44,45,46+2,46+2,...,48+2,48+2,48+2,47+2,47+2,46+2,46+2,46+2,47+2,15+2
18941,46+1,46+1,46+1,47,46,46,46,47,47,47,...,47,47,47,47,47,46+1,46+1,46+1,47,11+2
18942,47+2,47+2,47+2,49,49,49,49,49,49+2,49+2,...,32+2,32+2,32+2,35+2,33+2,26+2,26+2,26+2,33+2,14+2


In [17]:
#adding the encoded values to the dataset
df = pd.concat([df, positions_copy], axis=1)
df

Unnamed: 0,sofifa_id,player_url,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,age,...,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,gk
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,"RW, ST, CF",93,93,103500000.0,560000.0,33,...,136,136,136,133,110,59,59,59,110,18
1,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",92,92,63000000.0,220000.0,35,...,110,110,110,126,104,66,66,66,104,20
2,188545,https://sofifa.com/player/188545/robert-lewand...,R. Lewandowski,Robert Lewandowski,ST,91,91,111000000.0,240000.0,31,...,136,136,136,119,104,96,96,96,104,18
3,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Júnior,"LW, CAM",91,91,132000000.0,270000.0,28,...,116,116,116,139,110,52,52,52,110,20
4,192985,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,"CAM, CM",91,91,129000000.0,370000.0,29,...,232,232,232,204,185,158,158,158,185,22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18939,257710,https://sofifa.com/player/257710/mengxuan-zhan...,Zhang Mengxuan,张梦炫,CB,47,52,70000.0,1000.0,21,...,39,39,39,39,44,46,46,46,44,9
18940,257933,https://sofifa.com/player/257933/wenzhuo-huang...,Huang Wenzhou,黄文卓,CM,47,53,70000.0,1000.0,21,...,52,52,52,48,49,44,44,44,49,9
18941,257936,https://sofifa.com/player/257936/yue-song/210002,Song Yue,宋岳,CM,47,47,45000.0,2000.0,28,...,49,49,49,46,48,43,43,43,48,1
18942,258736,https://sofifa.com/player/258736/vani-da-silva...,V. Da Silva,Ivanilson Loforte Tique Da Silva,ST,47,67,130000.0,500.0,17,...,21,21,21,30,26,10,10,10,26,7


In [18]:
objects_columns = df.select_dtypes(include=['object'])
# dropping all unwanted categorica variables
df = df.select_dtypes(exclude=['object'])
df


Unnamed: 0,sofifa_id,overall,potential,value_eur,wage_eur,age,height_cm,weight_kg,club_team_id,league_level,...,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,gk
0,158023,93,93,103500000.0,560000.0,33,170,72,241.0,1.0,...,136,136,136,133,110,59,59,59,110,18
1,20801,92,92,63000000.0,220000.0,35,187,83,45.0,1.0,...,110,110,110,126,104,66,66,66,104,20
2,188545,91,91,111000000.0,240000.0,31,184,80,21.0,1.0,...,136,136,136,119,104,96,96,96,104,18
3,190871,91,91,132000000.0,270000.0,28,175,68,73.0,1.0,...,116,116,116,139,110,52,52,52,110,20
4,192985,91,91,129000000.0,370000.0,29,181,70,10.0,1.0,...,232,232,232,204,185,158,158,158,185,22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18939,257710,47,52,70000.0,1000.0,21,177,70,112165.0,1.0,...,39,39,39,39,44,46,46,46,44,9
18940,257933,47,53,70000.0,1000.0,21,174,68,112540.0,1.0,...,52,52,52,48,49,44,44,44,49,9
18941,257936,47,47,45000.0,2000.0,28,185,79,111774.0,1.0,...,49,49,49,46,48,43,43,43,48,1
18942,258736,47,67,130000.0,500.0,17,171,58,1920.0,4.0,...,21,21,21,30,26,10,10,10,26,7


In [19]:
for column in df.columns:
    print(column)

sofifa_id
overall
potential
value_eur
wage_eur
age
height_cm
weight_kg
club_team_id
league_level
club_jersey_number
club_contract_valid_until
nationality_id
nation_team_id
nation_jersey_number
weak_foot
skill_moves
international_reputation
release_clause_eur
pace
shooting
passing
dribbling
defending
physic
attacking_crossing
attacking_finishing
attacking_heading_accuracy
attacking_short_passing
attacking_volleys
skill_dribbling
skill_curve
skill_fk_accuracy
skill_long_passing
skill_ball_control
movement_acceleration
movement_sprint_speed
movement_agility
movement_reactions
movement_balance
power_shot_power
power_jumping
power_stamina
power_strength
power_long_shots
mentality_aggression
mentality_interceptions
mentality_positioning
mentality_vision
mentality_penalties
mentality_composure
defending_marking_awareness
defending_standing_tackle
defending_sliding_tackle
goalkeeping_diving
goalkeeping_handling
goalkeeping_kicking
goalkeeping_positioning
goalkeeping_reflexes
goalkeeping_speed


### **Create feature subsets that show maximum correlation with the dependent variable**

In [20]:
#featured subsets using correlation
feature_corr = df.corr()
feature_corr = feature_corr['overall'].sort_values(ascending=False)
print(feature_corr)



overall                1.000000
movement_reactions     0.867234
mentality_composure    0.705252
passing                0.662090
rcm                    0.647694
                         ...   
nationality_id        -0.098959
club_jersey_number    -0.187180
club_team_id          -0.207319
league_level          -0.215148
sofifa_id             -0.486575
Name: overall, Length: 87, dtype: float64


In [21]:
#sorting the feature correlation in descending order
feature_corr= feature_corr.sort_values(ascending=False)
feature_corr

overall                1.000000
movement_reactions     0.867234
mentality_composure    0.705252
passing                0.662090
rcm                    0.647694
                         ...   
nationality_id        -0.098959
club_jersey_number    -0.187180
club_team_id          -0.207319
league_level          -0.215148
sofifa_id             -0.486575
Name: overall, Length: 87, dtype: float64

In [22]:
## now lets select the top eleven features with the highest absolute correlations
highest_correlations = feature_corr[1:11]
highest_correlations

movement_reactions     0.867234
mentality_composure    0.705252
passing                0.662090
rcm                    0.647694
cm                     0.647694
lcm                    0.647694
potential              0.636366
lm                     0.617624
rm                     0.617624
cam                    0.614777
Name: overall, dtype: float64

In [23]:
#Correlation does not imply causation
#Just because two variables are highly correlated doesn't mean that one directly causes the other
#It's possible that they are both influenced by a third variable, or the relationship is coincidental
#therefore after more testing and training we came up with these features for training
#features to be used for training
#features to be used for training
subsetx = [
    'movement_reactions',
    'skill_dribbling',
    'passing',
    'potential',
    'dribbling',
    'attacking_short_passing',
    'physic',
    'skill_long_passing',
    'movement_agility',
    'skill_moves',
    'shooting',
    'skill_ball_control',
    'mentality_vision',
    'weight_kg',
    'attacking_crossing'
]
feature_subsetx =df[subsetx]


In [24]:
# scaling our independent variables
scx = StandardScaler()
scaledx = scx.fit_transform(feature_subsetx)
feature_subsetx = pd.DataFrame(scaledx,columns = feature_subsetx.columns)
feature_subsetx




Unnamed: 0,movement_reactions,skill_dribbling,passing,potential,dribbling,attacking_short_passing,physic,skill_long_passing,movement_agility,skill_moves,shooting,skill_ball_control,mentality_vision,weight_kg,attacking_crossing
0,3.554438,2.153320,3.493625,3.586563,3.433013,2.216206,0.058838,2.522017,1.891773,2.135802,3.009606,2.260399,3.000047,-0.427506,1.949428
1,3.664174,1.727485,2.461857,3.422893,2.800094,1.598583,1.363898,1.601260,1.618265,3.440521,3.085367,2.019401,2.052781,1.131240,1.894339
2,3.444701,1.567797,2.152326,3.259222,2.378148,1.735833,1.907673,1.140882,0.934495,2.135802,2.933845,1.778402,1.834181,0.706127,1.178188
3,3.225227,2.100091,2.977741,3.259222,3.327526,1.941707,-0.593692,1.864334,2.233658,3.440521,2.479280,2.200150,2.635714,-0.994323,1.949428
4,3.225227,1.727485,3.699979,3.259222,2.694607,2.422080,1.472653,2.653554,1.002872,2.135802,2.555040,2.019401,2.927180,-0.710915,2.445225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18939,-1.493455,-1.945342,-3.212867,-3.123941,-3.740069,-1.901280,-1.789997,-1.818694,-1.253569,-0.473636,-2.217897,-2.077577,-2.100615,-0.710915,-1.466065
18940,-1.273981,-0.561378,-0.839801,-2.960270,-1.630339,-0.254285,-1.789997,-0.174485,-0.911684,-0.473636,-1.536049,-0.812334,-0.060350,-0.994323,-0.639736
18941,-1.932402,-0.508149,-0.839801,-3.942295,-1.735826,-0.185661,-0.593692,0.220125,-0.501422,-0.473636,-1.157245,-0.932833,-0.716150,0.564423,-0.309205
18942,-0.944771,-0.135543,-1.768392,-0.668878,-0.997420,-0.940533,-2.660037,-1.029474,0.250725,-0.473636,-0.475396,-0.872583,-0.351817,-2.411365,-1.245711


In [None]:
# pickling the model #creating the scaler model
import pickle
pickle_out = open("scaler.pkl", "wb")
pickle.dump(scx, pickle_out)
pickle_out.close()

### **Create and train a suitable machine learning model with cross-validation that can predict a player's rating. [5]**

In [None]:
#defining our X and Y for training
Y = df['overall']
X = feature_subsetx

In [None]:
Xtrain,Xtest,Ytrain,Ytest=train_test_split(X,Y,test_size=0.2,random_state=42)
model_rms_scores=[]


**DecisionTreeRegressor**

### **Measure the model's performance and fine-tune it as a process of optimization. [5]**

In [None]:
regressor = DecisionTreeRegressor()
regressor.fit(Xtrain,Ytrain)
y_pred = regressor.predict(Xtest)
mae=mean_absolute_error(y_pred,Ytest)
print("Mean Absolute Error:", mae)

mse= mean_squared_error(y_pred,Ytest)
print("Mean Squared Error:", mse)

rmse1 = np.sqrt(mse)
model_rms_scores.append(rmse1)
print("Root Mean Squared Error:",rmse1)

Mean Absolute Error: 1.7901821060965954
Mean Squared Error: 6.876484560570071
Root Mean Squared Error: 2.6223051997374505


**XGBOOST**

In [None]:
xgbr = XGBRegressor(
  objective = "reg:squarederror",
  n_estimators =200,
  learning_rate = 0.1,
  max_depth = 50
)

xgbr.fit(Xtrain,Ytrain)
y_pred = xgbr.predict(Xtest)

In [None]:
mae=mean_absolute_error(y_pred,Ytest)
print("Mean Absolute Error:", mae)

mse= mean_squared_error(y_pred,Ytest)
print("Mean Squared Error:", mse)

rmse2 = np.sqrt(mse)
model_rms_scores.append(rmse2)
print("Root Mean Squared Error:",rmse2)

Mean Absolute Error: 1.3035046357723425
Mean Squared Error: 3.4637950649025893
Root Mean Squared Error: 1.8611273639658812


**GRADIENT** **BOOST**

In [None]:
gb_regressor = GradientBoostingRegressor(
  loss = "squared_error",
  n_estimators =200,
  learning_rate = 0.1,
  max_depth = 50
)

gb_regressor.fit(Xtrain,Ytrain)
y_pred = gb_regressor.predict(Xtest)

In [None]:
mae=mean_absolute_error(y_pred,Ytest)
print("Mean Absolute Error:", mae)

mse= mean_squared_error(y_pred,Ytest)
print("Mean Squared Error:", mse)

rmse3 = np.sqrt(mse)
model_rms_scores.append(rmse3)
print("Root Mean Squared Error:",rmse3)

Mean Absolute Error: 1.7342626389915874
Mean Squared Error: 6.296006866213739
Root Mean Squared Error: 2.509184502226518


**VOTING REGRESSOR**

In [None]:
linear_regressor_model = LinearRegression()
xgboost_regressor = XGBRegressor()
random_forest_regressor = RandomForestRegressor()
gradient_boosting_regressor = GradientBoostingRegressor()

In [None]:
voting_regressor = VotingRegressor(
    estimators=[
        ('Linear regressor',linear_regressor_model),
        ('xgboost', xgboost_regressor),
        ('random_forrest', random_forest_regressor),
        ('gradient_boosting', gradient_boosting_regressor)
    ],
    weights=[1, 1, 1,1],
    n_jobs=-1
)

voting_regressor.fit(Xtrain,Ytrain)
y_pred = voting_regressor.predict(Xtest)

In [None]:
mae=mean_absolute_error(y_pred,Ytest)
print("Mean Absolute Error:", mae)

mse= mean_squared_error(y_pred,Ytest)
print("Mean Squared Error:", mse)

rmse4 = np.sqrt(mse)
model_rms_scores.append(rmse4)
print("Root Mean Squared Error:",rmse4)

Mean Absolute Error: 1.3634032401402911
Mean Squared Error: 3.377820158740074
Root Mean Squared Error: 1.8378846968022977


**RANDOM FOREST REGRESSOR**

In [None]:
rf_regressor = RandomForestRegressor(n_estimators=300, max_depth=40, n_jobs= -1)
rf_regressor.fit(Xtrain, Ytrain)
y_pred = rf_regressor.predict(Xtest)

In [None]:
mae=mean_absolute_error(y_pred,Ytest)
print("Mean Absolute Error:", mae)

mse= mean_squared_error(y_pred,Ytest)
print("Mean Squared Error:", mse)

rmse5 = np.sqrt(mse)
model_rms_scores.append(rmse5)
print("Root Mean Squared Error:",rmse5)

Mean Absolute Error: 1.2689944576405383
Mean Squared Error: 3.2006200756576053
Root Mean Squared Error: 1.7890276900198065


**THE BEST MODEL**

In [None]:
model_names = ["Decision Tree", "XGBoost", "Gradient Boosting", "Voting", "Random Forest"]
best_model_index = model_rms_scores.index(min(model_rms_scores))
best_model_name = model_names[best_model_index]
best_rmse = model_rms_scores[best_model_index]

print(f"The best model is '{best_model_name}' with RMSE of {best_rmse:.2f}")


The best model is 'Random Forest' with RMSE of 1.79


**CROSS VALIDATION  FOR MOST ACCURATE MODEL**

In [None]:

##Since Random Forest gave us the best RMSE, we will finetune that and use crossvalidation on it with GridSearchCV

# GridSearchCV combines hyperparameter tuning and cross-validation in a single process.
# It systematically searches through hyperparameter combinations, training and evaluating the model on multiple subsets of the data to find the best configuration.
# This ensures that the model is optimized for generalization to unseen data while fine-tuning its hyperparameters.
# Define the hyperparameter grid for Random Forest
param_grid = {
    'n_estimators': [100, 200, 300],  # Adjust the values as needed
    'max_depth': [10, 20, 30]  # Adjust the values as needed
}

# Create the Random Forest model
rf_model = RandomForestRegressor()



# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, n_jobs=-1)

In [None]:
# Fit the grid search to find the best hyperparameters
grid_search.fit(X, Y)

# Get the best hyperparameters
best_params = grid_search.best_params_

#Train the final model with the best hyperparameters
best_model = RandomForestRegressor(**best_params)
best_model.fit(X, Y)

In [None]:
# Make predictions with the best model
Y_pred = best_model.predict(Xtest)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(Ytest, Y_pred))

print("RMSE on the test set:", rmse)

RMSE on the test set: 0.6601987114512766


### **Use the data from another season(players_22) which was not used during the training to test how good is the model. [5]**

**FIFA 22**

In [None]:
testdf = pd.read_csv('/content/drive/My Drive/Colab Notebooks/AI/Midsem/players_22.csv')

  testdf = pd.read_csv('/content/drive/My Drive/Colab Notebooks/AI/Midsem/players_22.csv')


In [None]:
testdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19239 entries, 0 to 19238
Columns: 110 entries, sofifa_id to nation_flag_url
dtypes: float64(16), int64(44), object(50)
memory usage: 16.1+ MB


In [None]:
numeric_columns22 = testdf.select_dtypes(include=['float64','int64'])
missing_values22 = numeric_columns22.isnull().sum()
missing_values22

sofifa_id                          0
overall                            0
potential                          0
value_eur                         74
wage_eur                          61
age                                0
height_cm                          0
weight_kg                          0
club_team_id                      61
league_level                      61
club_jersey_number                61
club_contract_valid_until         61
nationality_id                     0
nation_team_id                 18480
nation_jersey_number           18480
weak_foot                          0
skill_moves                        0
international_reputation           0
release_clause_eur              1176
pace                            2132
shooting                        2132
passing                         2132
dribbling                       2132
defending                       2132
physic                          2132
attacking_crossing                 0
attacking_finishing                0
a

In [None]:
#replacing missing values with mean of its columns
testdf[numeric_columns22.columns] = testdf[numeric_columns22.columns].fillna(numeric_columns22.mean())
testdf


Unnamed: 0,sofifa_id,player_url,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,age,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,"RW, ST, CF",93,93,78000000.0,320000.0,34,...,50+3,50+3,50+3,61+3,19+3,https://cdn.sofifa.net/players/158/023/22_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,https://cdn.sofifa.net/teams/1369/60.png,https://cdn.sofifa.net/flags/ar.png
1,188545,https://sofifa.com/player/188545/robert-lewand...,R. Lewandowski,Robert Lewandowski,ST,92,92,119500000.0,270000.0,32,...,60+3,60+3,60+3,61+3,19+3,https://cdn.sofifa.net/players/188/545/22_120.png,https://cdn.sofifa.net/teams/21/60.png,https://cdn.sofifa.net/flags/de.png,https://cdn.sofifa.net/teams/1353/60.png,https://cdn.sofifa.net/flags/pl.png
2,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",91,91,45000000.0,270000.0,36,...,53+3,53+3,53+3,60+3,20+3,https://cdn.sofifa.net/players/020/801/22_120.png,https://cdn.sofifa.net/teams/11/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1354/60.png,https://cdn.sofifa.net/flags/pt.png
3,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Júnior,"LW, CAM",91,91,129000000.0,270000.0,29,...,50+3,50+3,50+3,62+3,20+3,https://cdn.sofifa.net/players/190/871/22_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,,https://cdn.sofifa.net/flags/br.png
4,192985,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,"CM, CAM",91,91,125500000.0,350000.0,30,...,69+3,69+3,69+3,75+3,21+3,https://cdn.sofifa.net/players/192/985/22_120.png,https://cdn.sofifa.net/teams/10/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1325/60.png,https://cdn.sofifa.net/flags/be.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,261962,https://sofifa.com/player/261962/defu-song/220002,Song Defu,宋德福,CDM,47,52,70000.0,1000.0,22,...,46+2,46+2,46+2,48+2,15+2,https://cdn.sofifa.net/players/261/962/22_120.png,https://cdn.sofifa.net/teams/112541/60.png,https://cdn.sofifa.net/flags/cn.png,,https://cdn.sofifa.net/flags/cn.png
19235,262040,https://sofifa.com/player/262040/caoimhin-port...,C. Porter,Caoimhin Porter,CM,47,59,110000.0,500.0,19,...,44+2,44+2,44+2,48+2,14+2,https://cdn.sofifa.net/players/262/040/22_120.png,https://cdn.sofifa.net/teams/445/60.png,https://cdn.sofifa.net/flags/ie.png,,https://cdn.sofifa.net/flags/ie.png
19236,262760,https://sofifa.com/player/262760/nathan-logue/...,N. Logue,Nathan Logue-Cunningham,CM,47,55,100000.0,500.0,21,...,45+2,45+2,45+2,47+2,12+2,https://cdn.sofifa.net/players/262/760/22_120.png,https://cdn.sofifa.net/teams/111131/60.png,https://cdn.sofifa.net/flags/ie.png,,https://cdn.sofifa.net/flags/ie.png
19237,262820,https://sofifa.com/player/262820/luke-rudden/2...,L. Rudden,Luke Rudden,ST,47,60,110000.0,500.0,19,...,26+2,26+2,26+2,32+2,15+2,https://cdn.sofifa.net/players/262/820/22_120.png,https://cdn.sofifa.net/teams/111131/60.png,https://cdn.sofifa.net/flags/ie.png,,https://cdn.sofifa.net/flags/ie.png


In [None]:
objects_columns22 = testdf.select_dtypes(include=['object'])

# dropping all unwanted categorica variables
testdf = testdf.select_dtypes(exclude=['object'])
testdf

Unnamed: 0,sofifa_id,overall,potential,value_eur,wage_eur,age,height_cm,weight_kg,club_team_id,league_level,...,mentality_composure,defending_marking_awareness,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,goalkeeping_speed
0,158023,93,93,78000000.0,320000.0,34,170,72,73.0,1.0,...,96,20,35,24,6,11,15,14,8,36.439962
1,188545,92,92,119500000.0,270000.0,32,185,81,21.0,1.0,...,88,35,42,19,15,6,12,8,10,36.439962
2,20801,91,91,45000000.0,270000.0,36,187,83,11.0,1.0,...,95,24,32,24,7,11,15,14,11,36.439962
3,190871,91,91,129000000.0,270000.0,29,175,68,73.0,1.0,...,93,35,32,29,9,9,15,15,11,36.439962
4,192985,91,91,125500000.0,350000.0,30,181,70,10.0,1.0,...,89,68,65,53,15,13,5,10,13,36.439962
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,261962,47,52,70000.0,1000.0,22,180,64,112541.0,1.0,...,37,38,43,48,6,10,5,15,13,36.439962
19235,262040,47,59,110000.0,500.0,19,175,70,445.0,1.0,...,47,37,44,47,11,12,6,8,10,36.439962
19236,262760,47,55,100000.0,500.0,21,178,72,111131.0,1.0,...,36,38,44,48,8,6,7,10,6,36.439962
19237,262820,47,60,110000.0,500.0,19,173,66,111131.0,1.0,...,47,10,14,11,7,10,7,14,15,36.439962


In [None]:
feature_subsetx

Unnamed: 0,movement_reactions,skill_dribbling,passing,potential,dribbling,attacking_short_passing,physic,skill_long_passing,movement_agility,skill_moves,shooting,skill_ball_control,mentality_vision,weight_kg,attacking_crossing
0,3.554438,2.153320,3.493625,3.586563,3.433013,2.216206,0.058838,2.522017,1.891773,2.135802,3.009606,2.260399,3.000047,-0.427506,1.949428
1,3.664174,1.727485,2.461857,3.422893,2.800094,1.598583,1.363898,1.601260,1.618265,3.440521,3.085367,2.019401,2.052781,1.131240,1.894339
2,3.444701,1.567797,2.152326,3.259222,2.378148,1.735833,1.907673,1.140882,0.934495,2.135802,2.933845,1.778402,1.834181,0.706127,1.178188
3,3.225227,2.100091,2.977741,3.259222,3.327526,1.941707,-0.593692,1.864334,2.233658,3.440521,2.479280,2.200150,2.635714,-0.994323,1.949428
4,3.225227,1.727485,3.699979,3.259222,2.694607,2.422080,1.472653,2.653554,1.002872,2.135802,2.555040,2.019401,2.927180,-0.710915,2.445225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18939,-1.493455,-1.945342,-3.212867,-3.123941,-3.740069,-1.901280,-1.789997,-1.818694,-1.253569,-0.473636,-2.217897,-2.077577,-2.100615,-0.710915,-1.466065
18940,-1.273981,-0.561378,-0.839801,-2.960270,-1.630339,-0.254285,-1.789997,-0.174485,-0.911684,-0.473636,-1.536049,-0.812334,-0.060350,-0.994323,-0.639736
18941,-1.932402,-0.508149,-0.839801,-3.942295,-1.735826,-0.185661,-0.593692,0.220125,-0.501422,-0.473636,-1.157245,-0.932833,-0.716150,0.564423,-0.309205
18942,-0.944771,-0.135543,-1.768392,-0.668878,-0.997420,-0.940533,-2.660037,-1.029474,0.250725,-0.473636,-0.475396,-0.872583,-0.351817,-2.411365,-1.245711


TESTING WITH FIFA 22

In [None]:
subsetx22 = [
    'movement_reactions',
    'skill_dribbling',
    'passing',
    'potential',
    'dribbling',
    'attacking_short_passing',
    'physic',
    'skill_long_passing',
    'movement_agility',
    'skill_moves',
    'shooting',
    'skill_ball_control',
    'mentality_vision',
    'weight_kg',
    'attacking_crossing'
]
feature_subset22 =testdf[subsetx22]
feature_subset22


Unnamed: 0,movement_reactions,skill_dribbling,passing,potential,dribbling,attacking_short_passing,physic,skill_long_passing,movement_agility,skill_moves,shooting,skill_ball_control,mentality_vision,weight_kg,attacking_crossing
0,94,96,91.0,93,95.0,91,65.0,91,91,4,92.0,96,95,72,85
1,93,85,79.0,92,86.0,85,82.0,70,77,4,92.0,88,81,81,71
2,94,88,80.0,91,88.0,80,75.0,77,86,5,94.0,88,76,83,87
3,89,95,86.0,91,94.0,86,63.0,81,96,5,83.0,95,90,68,85
4,91,88,93.0,91,88.0,94,78.0,93,79,4,86.0,91,94,70,94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,53,45,46.0,52,48.0,50,49.0,48,55,2,35.0,49,43,64,46
19235,49,41,50.0,59,46.0,51,51.0,50,64,2,39.0,42,49,70,54
19236,46,47,45.0,55,49.0,49,52.0,49,58,2,37.0,49,46,72,39
19237,48,42,36.0,60,48.0,38,42.0,33,72,2,46.0,45,40,66,29


In [None]:
sc = StandardScaler()
scaled = sc.fit_transform(feature_subset22)
feature_subset22 = pd.DataFrame(scaled,columns = feature_subset22.columns)
feature_subset22

Unnamed: 0,movement_reactions,skill_dribbling,passing,potential,dribbling,attacking_short_passing,physic,skill_long_passing,movement_agility,skill_moves,shooting,skill_ball_control,mentality_vision,weight_kg,attacking_crossing
0,3.599846,2.147538,3.548135,3.601780,3.564477,2.217457,0.019139,2.524112,1.850364,2.146241,2.992852,2.252136,3.006228,-0.416315,1.964190
1,3.489252,1.561936,2.284233,3.437470,2.575529,1.803392,1.860333,1.126551,0.908358,2.146241,2.992852,1.772039,1.980597,0.856805,1.187887
2,3.599846,1.721646,2.389558,3.273160,2.795295,1.458338,1.102194,1.592404,1.513933,3.448937,3.143798,1.772039,1.614300,1.139720,2.075090
3,3.046874,2.094301,3.021509,3.273160,3.454594,1.872403,-0.197472,1.858606,2.186795,3.448937,2.313597,2.192124,2.639931,-0.982145,1.964190
4,3.268063,1.721646,3.758786,3.273160,2.795295,2.424490,1.427111,2.657213,1.042930,2.146241,2.540015,1.952075,2.932969,-0.699230,2.463242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,-0.934526,-0.567524,-1.191498,-3.134932,-1.600026,-0.611986,-1.713750,-0.337561,-0.571938,-0.459150,-1.309098,-0.568436,-0.803260,-1.547976,-0.198369
19235,-1.376903,-0.780470,-0.770197,-1.984762,-1.819792,-0.542976,-1.497139,-0.204460,0.033638,-0.459150,-1.007207,-0.988521,-0.363704,-0.699230,0.245233
19236,-1.708687,-0.461051,-1.296823,-2.642002,-1.490143,-0.680997,-1.388833,-0.271010,-0.370079,-0.459150,-1.158153,-0.568436,-0.583482,-0.416315,-0.586520
19237,-1.487498,-0.727234,-2.244749,-1.820452,-1.600026,-1.440116,-2.471889,-1.335819,0.571927,-0.459150,-0.478897,-0.808484,-1.023038,-1.265061,-1.141023


In [None]:
X_test = feature_subset22
X_test

Unnamed: 0,movement_reactions,skill_dribbling,passing,potential,dribbling,attacking_short_passing,physic,skill_long_passing,movement_agility,skill_moves,shooting,skill_ball_control,mentality_vision,weight_kg,attacking_crossing
0,3.599846,2.147538,3.548135,3.601780,3.564477,2.217457,0.019139,2.524112,1.850364,2.146241,2.992852,2.252136,3.006228,-0.416315,1.964190
1,3.489252,1.561936,2.284233,3.437470,2.575529,1.803392,1.860333,1.126551,0.908358,2.146241,2.992852,1.772039,1.980597,0.856805,1.187887
2,3.599846,1.721646,2.389558,3.273160,2.795295,1.458338,1.102194,1.592404,1.513933,3.448937,3.143798,1.772039,1.614300,1.139720,2.075090
3,3.046874,2.094301,3.021509,3.273160,3.454594,1.872403,-0.197472,1.858606,2.186795,3.448937,2.313597,2.192124,2.639931,-0.982145,1.964190
4,3.268063,1.721646,3.758786,3.273160,2.795295,2.424490,1.427111,2.657213,1.042930,2.146241,2.540015,1.952075,2.932969,-0.699230,2.463242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,-0.934526,-0.567524,-1.191498,-3.134932,-1.600026,-0.611986,-1.713750,-0.337561,-0.571938,-0.459150,-1.309098,-0.568436,-0.803260,-1.547976,-0.198369
19235,-1.376903,-0.780470,-0.770197,-1.984762,-1.819792,-0.542976,-1.497139,-0.204460,0.033638,-0.459150,-1.007207,-0.988521,-0.363704,-0.699230,0.245233
19236,-1.708687,-0.461051,-1.296823,-2.642002,-1.490143,-0.680997,-1.388833,-0.271010,-0.370079,-0.459150,-1.158153,-0.568436,-0.583482,-0.416315,-0.586520
19237,-1.487498,-0.727234,-2.244749,-1.820452,-1.600026,-1.440116,-2.471889,-1.335819,0.571927,-0.459150,-0.478897,-0.808484,-1.023038,-1.265061,-1.141023


In [None]:
##testing our model with fifa 22 dataset
prediction = best_model.predict(X_test)

true_rating = testdf['overall'].values
rmse = mean_squared_error(true_rating, prediction, squared = False)
print("RMSE on the test set:", rmse)

RMSE on the test set: 1.5943751677395743


In [None]:
# pickling the model
import pickle
pickle_out = open("bestmodel.pkl", "wb")
pickle.dump(best_model, pickle_out)
pickle_out.close()