In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('../dataset/FM24.csv')
print("Original columns:\n", df.columns.tolist())

Original columns:
 ['Name', 'Club', 'Nat', 'Height', 'Weight', 'Age', 'CA', 'PA', 'Acc', 'Agi', 'Bal', 'Jum', 'Nat.1', 'Pac', 'Sta', 'Str', 'Agg', 'Ant', 'Bra', 'Cmp', 'Cnt', 'Dec', 'Det', 'Fla', 'Ldr', 'OtB', 'Pos', 'Tea', 'Vis', 'Wor', 'Cor', 'Cro', 'Dri', 'Fin', 'Fir', 'Fre', 'Hea', 'Lon', 'L Th', 'Mar', 'Pas', 'Pen', 'Tck', 'Tec', 'Acc.1', 'Ant.1', 'Hea.1', 'Jum.1', 'Mar.1', 'Pac.1', 'Pos.1', 'Sta.1', 'Str.1', 'Tck.1', 'Cro.1', 'Dri.1', 'Fre.1', 'Fin.1', 'Fir.1', 'Fla.1', 'Lon.1', 'OtB.1', 'Pas.1', 'Vis.1']


In [3]:
df = df.loc[:, ~df.columns.duplicated()]
print("\nColumns after removing duplicates:\n", df.columns.tolist())


Columns after removing duplicates:
 ['Name', 'Club', 'Nat', 'Height', 'Weight', 'Age', 'CA', 'PA', 'Acc', 'Agi', 'Bal', 'Jum', 'Nat.1', 'Pac', 'Sta', 'Str', 'Agg', 'Ant', 'Bra', 'Cmp', 'Cnt', 'Dec', 'Det', 'Fla', 'Ldr', 'OtB', 'Pos', 'Tea', 'Vis', 'Wor', 'Cor', 'Cro', 'Dri', 'Fin', 'Fir', 'Fre', 'Hea', 'Lon', 'L Th', 'Mar', 'Pas', 'Pen', 'Tck', 'Tec', 'Acc.1', 'Ant.1', 'Hea.1', 'Jum.1', 'Mar.1', 'Pac.1', 'Pos.1', 'Sta.1', 'Str.1', 'Tck.1', 'Cro.1', 'Dri.1', 'Fre.1', 'Fin.1', 'Fir.1', 'Fla.1', 'Lon.1', 'OtB.1', 'Pas.1', 'Vis.1']


In [4]:
df["feat_PassingTechnique"] = df[["Pas", "Tec", "Fir", "Cmp"]].mean(axis=1)
df["feat_CreativityVision"] = df[["Vis", "Fla", "Dec", "OtB"]].mean(axis=1)
df["feat_BallRetention"] = df[["Cnt", "Bal", "Tec", "Fir"]].mean(axis=1)
df["feat_DefensiveWork"] = df[["Tck", "Pos", "Wor", "Agg", "Sta"]].mean(axis=1)
df["feat_PhysicalPower"] = df[["Str", "Hea", "Jum"]].mean(axis=1)
df["feat_Mobility"] = df[["Pac", "Acc", "Sta", "Agi"]].mean(axis=1)
df["feat_ProgressivePassing"] = df[["Pas", "Lon", "Vis"]].mean(axis=1)
df["feat_FirstContact"] = df[["Fir", "Tec", "Bal"]].mean(axis=1)

In [5]:
engineered_features = [
    "feat_PassingTechnique",
    "feat_CreativityVision",
    "feat_BallRetention",
    "feat_DefensiveWork",
    "feat_PhysicalPower",
    "feat_Mobility",
    "feat_ProgressivePassing",
    "feat_FirstContact"
]
X_engineered = df[engineered_features].copy()

In [8]:
X_norm = (X_engineered - X_engineered.mean(axis=1).values.reshape(-1,1)) / X_engineered.std(axis=1).values.reshape(-1,1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_norm)

In [9]:
X_scaled_df = pd.DataFrame(X_scaled, columns=X_engineered.columns)
X_scaled_df.head()

Unnamed: 0,feat_PassingTechnique,feat_CreativityVision,feat_BallRetention,feat_DefensiveWork,feat_PhysicalPower,feat_Mobility,feat_ProgressivePassing,feat_FirstContact
0,0.833243,1.057974,0.075982,-2.044111,1.341256,-1.828282,1.37095,-0.392376
1,1.352185,1.512531,-0.352665,0.109783,-0.723748,-0.996659,-0.163631,-0.253686
2,0.969104,-0.058356,-0.270798,0.477081,-0.690986,-1.115637,1.381522,-0.427088
3,0.248117,0.602962,0.318124,-1.625923,-0.334989,0.252506,1.008431,0.358131
4,-0.531498,0.567313,-0.362647,0.940287,-0.980209,-0.262313,0.434623,0.044543


In [10]:
X_scaled_df.to_csv('../dataset/FM24_engineered_features.csv', index=False)