In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('../dataset/cleaned_cm_top5.csv')

In [12]:
df['Best_Role'].value_counts()

Best_Role
Central Midfielder         398
Ball Winning Midfielder    250
Mezzala                    221
Advanced Playmaker         157
Deep Lying Playmaker       124
Inverted Winger             77
Attacking Midfielder        48
Box To Box Midfielder       48
Defensive Midfielder        36
Winger                      23
Advanced Forward            23
Carrilero                   22
Shadow Striker              16
Deep Lying Forward          12
Central Defender            12
Pressing Forward            10
Poacher                     10
Full-Back                    8
Ball Playing Defender        7
Enganche                     6
Segundo Volante              6
Wing-Back                    6
Anchor                       5
Complete Wing-Back           4
Inside Forward               3
Complete Forward             3
Target Forward               3
False Nine                   2
No-Nonsense Centre-Back      2
Regista                      1
Half Back                    1
Defensive Winger             

In [3]:
df['Right_Foot'].value_counts()
foot_strength = {
    'Very Weak': 1,
    'Weak': 2,
    'Reasonable': 3,
    'Fairly Strong': 4,
    'Strong': 5,
    'Very Strong': 6
}
df['Right_Foot_Strength'] = df['Right_Foot'].map(foot_strength)
df['Left_Foot_Strength'] = df['Left_Foot'].map(foot_strength)
df['Foot_Total_Strength'] = df['Right_Foot_Strength'] + df['Left_Foot_Strength']
df[['Right_Foot', 'Right_Foot_Strength', 'Left_Foot_Strength', 'Foot_Total_Strength']].head()

Unnamed: 0,Right_Foot,Right_Foot_Strength,Left_Foot_Strength,Foot_Total_Strength
0,Very Strong,6,4,10
1,Very Strong,6,3,9
2,Very Strong,6,4,10
3,Very Strong,6,3,9
4,Very Strong,6,5,11


In [5]:
df.columns

Index(['UID', 'Inf', 'Name', 'DoB', 'Nat', 'Division', 'Club', 'Based',
       'Preferred_Foot', 'Right_Foot', 'Left_Foot', 'Position', 'Height',
       'Weight', 'Age', 'Transfer_Value', 'Wage', 'AT_Apps', 'AT_Gls', 'Team',
       'Caps', 'Yth_Apps', 'Style', 'Rc_Injury', 'Best_Role', 'Best_Duty',
       'Best_Pos', 'Acc', 'Aer', 'Agg', 'Agi', 'Ant', 'Bal', 'Bra', 'Cmd',
       'Com', 'Cmp', 'Cnt', 'Cor', 'Cro', 'Dec', 'Det', 'Dri', 'Ecc', 'Fin',
       'Fir', 'Fla', 'Fre', 'Han', 'Hea', 'Jum', 'Kic', 'Ldr', 'Lon', 'L_Th',
       'Mar', 'Nat_.1', 'OtB', '1v1', 'Pac', 'Pas', 'Pen', 'Pos', 'Pun', 'Ref',
       'TRO', 'Sta', 'Str', 'Tck', 'Tea', 'Tec', 'Thr', 'Vis', 'Wor',
       'Right_Foot_Strength', 'Left_Foot_Strength', 'Foot_Total_Strength'],
      dtype='object')

In [6]:
foot_preference_mapping = {
    "Left Only": 2,
    "Left": 1,
    "Either": 0,
    "Right": -1,
    "Right Only": -2
}

df["Preferred_Foot_Num"] = df["Preferred_Foot"].map(foot_preference_mapping)

In [7]:
feature_cols = [
    'Height', 'Weight', 'Age', 'Acc', 'Agg', 'Agi', 'Ant', 'Bal', 'Bra',
       'Cmp', 'Cnt', 'Cor', 'Cro', 'Dec', 'Det', 'Dri', 'Fin',
       'Fir', 'Fla', 'Fre', 'Hea', 'Ldr', 'Lon', 'L_Th',
       'Mar', 'Nat_.1', 'OtB', 'Pac', 'Pas', 'Pen', 'Pos',
       'Sta', 'Str', 'Tck', 'Tea', 'Tec', 'Vis', 'Wor', 'Foot_Total_Strength', 
       'Preferred_Foot_Num'
]
feature_df = df[feature_cols].copy()
feature_df.head()

Unnamed: 0,Height,Weight,Age,Acc,Agg,Agi,Ant,Bal,Bra,Cmp,...,Pos,Sta,Str,Tck,Tea,Tec,Vis,Wor,Foot_Total_Strength,Preferred_Foot_Num
0,177 cm,75 kg,27,13,15,12,15,13,15,17,...,13,20,11,12,17,16,18,19,10,-1
1,189 cm,82 kg,27,12,14,11,17,15,16,16,...,13,17,17,15,17,15,15,18,9,-1
2,183 cm,70 kg,19,15,12,17,14,15,12,13,...,11,12,9,12,15,18,15,15,10,-1
3,186 cm,75 kg,18,15,17,15,14,14,18,15,...,10,16,11,13,15,16,13,17,9,-1
4,177 cm,73 kg,31,14,13,16,15,12,9,13,...,7,16,11,7,11,15,15,15,11,0


In [8]:
feature_df["Height"] = feature_df["Height"].str.replace(" cm", "", regex=False).astype(float)
feature_df["Weight"] = feature_df["Weight"].str.replace(" kg", "", regex=False).astype(float)
feature_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1545 entries, 0 to 1544
Data columns (total 40 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Height               1545 non-null   float64
 1   Weight               1545 non-null   float64
 2   Age                  1545 non-null   int64  
 3   Acc                  1545 non-null   int64  
 4   Agg                  1545 non-null   int64  
 5   Agi                  1545 non-null   int64  
 6   Ant                  1545 non-null   int64  
 7   Bal                  1545 non-null   int64  
 8   Bra                  1545 non-null   int64  
 9   Cmp                  1545 non-null   int64  
 10  Cnt                  1545 non-null   int64  
 11  Cor                  1545 non-null   int64  
 12  Cro                  1545 non-null   int64  
 13  Dec                  1545 non-null   int64  
 14  Det                  1545 non-null   int64  
 15  Dri                  1545 non-null   i

In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(feature_df)

In [10]:
feature_df.to_csv('../dataset/final.csv', index=False)