In [1]:
import pandas as pd

# Load the dataset
data_path = '../data/processed/elements_processed.csv'
midfielders = pd.read_csv(data_path)

# Assume 'position' or a similar column exists that helps segment midfielders
# Example segmentation based on 'position' or any other metric
# You can also define custom criteria if 'position' is not detailed enough
midfielders['role'] = midfielders['position'].apply(lambda x: 'attacking' if x in ['Attacking Midfielder', 'Winger'] else 'defensive')

# Split into attacking and defensive midfielders
attacking_midfielders = midfielders[midfielders['role'] == 'attacking']
defensive_midfielders = midfielders[midfielders['role'] == 'defensive']

print(f"Number of attacking midfielders: {len(attacking_midfielders)}")
print(f"Number of defensive midfielders: {len(defensive_midfielders)}")


Number of attacking midfielders: 0
Number of defensive midfielders: 601


In [5]:
# ... existing code ...

# Import necessary libraries for ML
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

# Assume we have features that are relevant for prediction
features = ['minutes', 'goals_scored', 'assists', 'clean_sheets', 'influence', 'creativity', 'threat']

# Prepare the data
X = midfielders[features]
y = midfielders['total_points']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Predict next week's performance for all midfielders
X_all_scaled = scaler.transform(X)
midfielders['predicted_points'] = model.predict(X_all_scaled)

# Select top 4 midfielders based on predicted points
top_4_midfielders = midfielders.sort_values('predicted_points', ascending=False).head(4)

print("\nTop 4 Midfielders based on ML predictions:")
print(top_4_midfielders[['web_name', 'team', 'predicted_points', 'role']])

# Optional: Split by role
top_2_attacking = attacking_midfielders.sort_values('predicted_points', ascending=False).head(2)
top_2_defensive = defensive_midfielders.sort_values('predicted_points', ascending=False).head(2)

print("\nTop 2 Attacking Midfielders:")
print(top_2_attacking[['web_name', 'team', 'predicted_points']])
print("\nTop 2 Defensive Midfielders:")
print(top_2_defensive[['web_name', 'team', 'predicted_points']])


Top 4 Midfielders based on ML predictions:
      web_name  team  predicted_points       role
523   Maddison    18             11.16  defensive
552  L.PaquetÃ¡    19             10.87  defensive
452    Murillo    16             10.60  defensive
282      Delap    10             10.43  defensive


KeyError: 'predicted_points'