In [8]:
import pandas as pd

# Load the CSV files into pandas DataFrames
predictions_2016 = pd.read_csv('predictions_2016_youth.csv')
actuals_2024 = pd.read_csv('dataset_2024_senior_players.csv')

# Ensure numeric columns are correctly parsed as numeric types
predictions_2016['appearances_pred'] = pd.to_numeric(predictions_2016['appearances_pred'], errors='coerce')
predictions_2016['goals_pred'] = pd.to_numeric(predictions_2016['goals_pred'], errors='coerce')
predictions_2016['tier_pred'] = pd.to_numeric(predictions_2016['tier_pred'], errors='coerce')

actuals_2024['appearances'] = pd.to_numeric(actuals_2024['appearances'], errors='coerce')
actuals_2024['goals'] = pd.to_numeric(actuals_2024['goals'], errors='coerce')
actuals_2024['tier_quality'] = pd.to_numeric(actuals_2024['tier_quality'], errors='coerce')

# Merge the DataFrames on 'name' and 'date_of_birth'
merged_df = pd.merge(predictions_2016, actuals_2024, on=['name', 'date_of_birth'])

# Calculate differences
merged_df['appearances_difference'] = merged_df['appearances'] - merged_df['appearances_pred']
merged_df['goals_difference'] = merged_df['goals'] - merged_df['goals_pred']
merged_df['tier_difference'] = merged_df['tier_quality'] - merged_df['tier_pred']

# Save the comparison to a new CSV file
comparison_file = 'comparison_results.csv'
merged_df.to_csv(comparison_file, index=False)

# Calculate mean absolute error for each metric
mae_appearances = merged_df['appearances_difference'].abs().mean()
mae_goals = merged_df['goals_difference'].abs().mean()
mae_tier = merged_df['tier_difference'].abs().mean()

print(f"Mean Absolute Error in Appearances Prediction: {mae_appearances}")
print(f"Mean Absolute Error in Goals Prediction: {mae_goals}")
print(f"Mean Absolute Error in Tier Prediction: {mae_tier}")


Mean Absolute Error in Appearances Prediction: 119.09633027522936
Mean Absolute Error in Goals Prediction: 11.545871559633028
Mean Absolute Error in Tier Prediction: 2.055045871559633
