In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib


In [26]:
df = pd.read_csv('fifa_players.csv')
df.head()


Unnamed: 0,name,full_name,birth_date,age,height_cm,weight_kgs,positions,nationality,overall_rating,potential,...,long_shots,aggression,interceptions,positioning,vision,penalties,composure,marking,standing_tackle,sliding_tackle
0,L. Messi,Lionel Andrés Messi Cuccittini,6/24/1987,31,170.18,72.1,"CF,RW,ST",Argentina,94,94,...,94,48,22,94,94,75,96,33,28,26
1,C. Eriksen,Christian Dannemann Eriksen,2/14/1992,27,154.94,76.2,"CAM,RM,CM",Denmark,88,89,...,89,46,56,84,91,67,88,59,57,22
2,P. Pogba,Paul Pogba,3/15/1993,25,190.5,83.9,"CM,CAM",France,88,91,...,82,78,64,82,88,82,87,63,67,67
3,L. Insigne,Lorenzo Insigne,6/4/1991,27,162.56,59.0,"LW,ST",Italy,88,88,...,84,34,26,83,87,61,83,51,24,22
4,K. Koulibaly,Kalidou Koulibaly,6/20/1991,27,187.96,88.9,CB,Senegal,88,91,...,15,87,88,24,49,33,80,91,88,87


In [32]:
# Keep relevant columns only
cols = ['age', 'height_cm', 'weight_kgs', 'overall_rating', 'potential',
        'sprint_speed', 'short_passing', 'long_passing', 'dribbling', 'strength',
        'positions']

df = df[cols].dropna()

# Take only the first position listed
df['main_position'] = df['positions'].apply(lambda x: x.split(',')[0].strip())
df.drop(columns=['positions'], inplace=True)
df.head()


Unnamed: 0,age,height_cm,weight_kgs,overall_rating,potential,sprint_speed,short_passing,long_passing,dribbling,strength,main_position
0,31,170.18,72.1,94,94,86,92,89,97,66,CF
1,27,154.94,76.2,88,89,73,91,89,84,58,CAM
2,25,190.5,83.9,88,91,79,86,90,87,87,CM
3,27,162.56,59.0,88,88,86,85,78,90,44,LW
4,27,187.96,88.9,88,91,75,68,60,69,94,CB


In [33]:
le = LabelEncoder()
df['position_encoded'] = le.fit_transform(df['main_position'])

# Save label encoder for future decoding
joblib.dump(le, "label_encoder.pkl")

['label_encoder.pkl']

In [34]:
# Split data
X = df.drop(columns=['main_position', 'position_encoded'])
y = df['position_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [35]:
# Train Random Forest model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Save
joblib.dump(clf, "player_position_model.pkl")


['player_position_model.pkl']

In [36]:
# Evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.5655806182121972
              precision    recall  f1-score   support

         CAM       0.35      0.32      0.33       210
          CB       0.81      0.86      0.84       651
         CDM       0.42      0.34      0.38       269
          CF       0.00      0.00      0.00        19
          CM       0.52      0.75      0.62       442
          GK       0.98      0.99      0.99       405
          LB       0.29      0.19      0.23       267
          LM       0.21      0.24      0.22       199
          LW       0.06      0.02      0.02        65
         LWB       0.00      0.00      0.00        10
          RB       0.28      0.24      0.26       262
          RM       0.17      0.14      0.15       188
          RW       0.14      0.03      0.05        73
         RWB       0.00      0.00      0.00        10
          ST       0.60      0.75      0.67       521

    accuracy                           0.57      3591
   macro avg       0.32      0.32      0.32      35

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [43]:
# Predict a new player
new_player = pd.DataFrame([{
    'age': 23,
    'height_cm': 180,
    'weight_kgs': 74,
    'overall_rating': 90,
    'potential': 95,
    'sprint_speed': 42,
    'short_passing': 75,
    'long_passing': 77,
    'dribbling': 90,
    'strength': 70
}])

model = joblib.load("player_position_model.pkl")
encoder = joblib.load("label_encoder.pkl")

prediction = model.predict(new_player)[0]
print("Predicted Position:", encoder.inverse_transform([prediction])[0])


Predicted Position: CM
