In [30]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Load the cleaned dataset of all players
df = pd.read_csv('all_players_cleaned.csv')

In [31]:
X = df.select_dtypes(include=['int64', 'float64']).drop(['ID'], axis=1)
y = df['BestPosition']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
# model creation and training
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_rep)

Accuracy: 0.72

Classification Report:
              precision    recall  f1-score   support

         CAM       0.60      0.82      0.69       435
          CB       0.87      0.96      0.91       711
         CDM       0.74      0.76      0.75       313
          CF       0.00      0.00      0.00        16
          CM       0.65      0.67      0.66       214
          GK       1.00      1.00      1.00       391
          LB       0.40      0.35      0.38       178
          LM       0.19      0.04      0.06       168
          LW       0.00      0.00      0.00        48
         LWB       0.35      0.16      0.22        68
          RB       0.40      0.44      0.42       197
          RM       0.49      0.54      0.51       313
          RW       0.50      0.05      0.10        55
         RWB       0.33      0.13      0.19        77
          ST       0.86      0.95      0.90       506

    accuracy                           0.72      3690
   macro avg       0.49      0.46      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [33]:
# Save the trained model
joblib.dump(rf_classifier, 'best_position_model.joblib')

['best_position_model.joblib']

In [34]:
# Load the trained model
model = joblib.load('best_position_model.joblib')

# Define a new player and predict its best postion 
new_player = {
    'ID': 999999,
    'Name': 'Zakariae Bahari',
    'FullName': 'Zakariae Bahari',
    'Age': 21,
    'Height': 184,
    'Weight': 70,
    'PhotoUrl': 'http://example.com/photo.jpg',
    'Nationality': 'Moroccan',
    'Overall': 80,
    'Potential': 85,
    'Growth': 5,
    'TotalStats': 1500,
    'BaseStats': 800,
    'Positions': 'CM',
    'BestPosition': '',
    'Club': 'Sample FC',
    'ValueEUR': 30000000,
    'WageEUR': 50000,
    'ReleaseClause': 70000000,
    'ClubPosition': 'CM',
    'ContractUntil': 2026,
    'ClubNumber': 10,
    'ClubJoined': 2022,
    'OnLoad': False,
    'NationalTeam': 'England',
    'PreferredFoot': 'Right',
    'IntReputation': 4,
    'WeakFoot': 4,
    'SkillMoves': 3,
    'AttackingWorkRate': 'Medium',
    'DefensiveWorkRate': 'High',
    'PaceTotal': 85,
    'ShootingTotal': 75,
    'PassingTotal': 80,
    'DribblingTotal': 82,
    'DefendingTotal': 60,
    'PhysicalityTotal': 75,
    'Crossing': 78,
    'Finishing': 70,
    'HeadingAccuracy': 65,
    'ShortPassing': 83,
    'Volleys': 68,
    'Dribbling': 85,
    'Curve': 75,
    'FKAccuracy': 70,
    'LongPassing': 80,
    'BallControl': 84,
    'Acceleration': 88,
    'SprintSpeed': 82,
    'Agility': 85,
    'Reactions': 80,
    'Balance': 78,
    'ShotPower': 75,
    'Jumping': 70,
    'Stamina': 80,
    'Strength': 70,
    'LongShots': 72,
    'Aggression': 68,
    'Interceptions': 60,
    'Positioning': 75,
    'Vision': 80,
    'Penalties': 65,
    'Composure': 80,
    'Marking': 60,
    'StandingTackle': 65,
    'SlidingTackle': 60,
    'GKDiving': 10,
    'GKHandling': 12,
    'GKKicking': 14,
    'GKPositioning': 15,
    'GKReflexes': 18,
    'STRating': 0,
    'LWRating': 0,
    'LFRating': 0,
    'CFRating': 0,
    'RFRating': 0,
    'RWRating': 0,
    'CAMRating': 0,
    'LMRating': 0,
    'CMRating': 0,
    'RMRating': 0,
    'LWBRating': 0,
    'CDMRating': 0,
    'RWBRating': 0,
    'LBRating': 0,
    'CBRating': 0,
    'RBRating': 0,
    'GKRating': 0
}

new_player_df = pd.DataFrame([new_player])
numeric_features_new = new_player_df.select_dtypes(include=['int64', 'float64']).drop(['ID'], axis=1)
predictions_new = model.predict(numeric_features_new)

predicted_df = pd.DataFrame({'PlayerID': new_player_df['ID'], 'PredictedBestPosition': predictions_new})
print(predicted_df)

   PlayerID PredictedBestPosition
0    999999                    ST
