In [26]:
# Useful imports
import pandas as pd
import scipy.stats as stats
import numpy as np

# Setup

In [3]:
# Load the players data
male_players_df = pd.read_csv('data/male_players.csv', usecols=['player_id', 'fifa_version', 'fifa_update', 'short_name', 'overall', 'age', 'club_team_id'])

In [12]:
# Load the teams data
male_teams_df = pd.read_csv('data/male_teams.csv', usecols=['team_id', 'team_name', 'fifa_version', 'fifa_update'])

In [13]:
display(male_players_df.head())
display(male_teams_df.head())

print("Male players length: ", len(male_players_df))
print("Male teams length:", len(male_teams_df))

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,overall,age,club_team_id
0,158023,23,9,L. Messi,91,35,73.0
1,165153,23,9,K. Benzema,91,34,243.0
2,188545,23,9,R. Lewandowski,91,33,241.0
3,192985,23,9,K. De Bruyne,91,31,10.0
4,231747,23,9,K. Mbappé,91,23,73.0


Unnamed: 0,team_id,fifa_version,fifa_update,team_name
0,10,23,9,Manchester City
1,73,23,9,Paris Saint Germain
2,243,23,9,Real Madrid
3,1337,23,9,Germany
4,5,23,9,Chelsea


Male players length:  196933
Male teams length: 385055


In [15]:
# Sanitize the data
male_players_df.sort_values(by=['fifa_version','fifa_update'], ascending=False, inplace=True)
male_players_df.drop_duplicates(subset=['player_id', 'fifa_version'], inplace=True, keep='first')

male_teams_df.sort_values(by=['fifa_version','fifa_update'], ascending=False, inplace=True)
male_teams_df.drop_duplicates(subset=['team_id', 'fifa_version'], inplace=True, keep='first')

In [19]:
# Merge the dataframes
merged_df = pd.merge(male_players_df, male_teams_df, left_on=['club_team_id', 'fifa_version'], right_on=['team_id', 'fifa_version'])

# Keep only the relevant columns
merged_df = merged_df.loc[:, ["player_id", "fifa_version", "short_name", "overall", "age", "team_id", "team_name"]]

# Show the merged dataframe
display(merged_df.head())
print("Merged dataframe length: ", len(merged_df))

Unnamed: 0,player_id,fifa_version,short_name,overall,age,team_id,team_name
0,158023,23,L. Messi,91,35,73,Paris Saint Germain
1,231747,23,K. Mbappé,91,23,73,Paris Saint Germain
2,190871,23,Neymar Jr,89,30,73,Paris Saint Germain
3,193041,23,K. Navas,88,35,73,Paris Saint Germain
4,207865,23,Marquinhos,88,28,73,Paris Saint Germain


Merged dataframe length:  193801


# First approach (naive)

In [20]:
# Identify rising stars, peak players, and falling stars
merged_df['rising_star'] = (merged_df['age'] <= 23) & (merged_df['overall'] >= merged_df[merged_df['age'] <= 23]['overall'].quantile(0.90))
merged_df['peak_player'] = (merged_df['age'].between(24, 29)) & (merged_df['overall'] >= merged_df[merged_df['age'].between(24, 29)]['overall'].quantile(0.90))
merged_df['falling_star'] = (merged_df['age'] >= 30) & (merged_df['overall'] < merged_df[merged_df['age'] >= 30]['overall'].quantile(0.90))

# Calculate club rankings based on the count of each type of player
club_rising_stars_rank = merged_df.groupby('team_name')['rising_star'].mean().sort_values(ascending=False)
club_peak_players_rank = merged_df.groupby('team_name')['peak_player'].mean().sort_values(ascending=False)
club_falling_stars_rank = merged_df.groupby('team_name')['falling_star'].mean().sort_values(ascending=False)

In [23]:
# Show the rankings
display(club_rising_stars_rank.head(10))
display(club_peak_players_rank.head(10))
display(club_falling_stars_rank.head(10))

team_name
Salzburg               0.460177
Nice                   0.434783
Ajax                   0.390572
Avaí                   0.350000
PSV                    0.341463
Bayer 04 Leverkusen    0.339921
Lille                  0.338983
Milan                  0.338843
CSKA Moskva            0.333333
Fortaleza              0.329545
Name: rising_star, dtype: float64

team_name
Napoli                     0.449612
Sevilla FC                 0.421687
Benfica                    0.421488
Paris Saint Germain        0.406504
Atlético Madrid            0.397163
Sevilla                    0.376923
Porto                      0.375000
FC Bayern München          0.357692
Borussia Dortmund          0.337793
Athletic Club de Bilbao    0.337662
Name: peak_player, dtype: float64

team_name
Paraná                   0.750000
Ceará Sporting Club      0.700000
12 de Octubre            0.533333
Sport Club do Recife     0.524590
América Mineiro          0.500000
Cienciano                0.488889
Qingdao Hainiu           0.475410
América Futebol Clube    0.475000
Altay                    0.468750
Shanghai Shenhua         0.462121
Name: falling_star, dtype: float64

# Second approach (advanced)

In [25]:
def player_classifier(overall_score,mean, std):
    tvalue = stats.norm.cdf(overall_score, loc = mean, scale = std)
    threadshold_lt = {0.999:"world-top player", 0.99:"top player", 0.9:"great player", 
                      0.5:"above-average player", 0.1:"below-average player", 0:"bad player"}
    threadshold_key = sorted(threadshold_lt.keys())
    left = 0 
    right = len(threadshold_key) -1
    result = None
    # binary search
    while left <= right:
        mid = (left+right)//2
        if threadshold_key[mid] <= tvalue:
            result = threadshold_key[mid]
            left = mid + 1
        else:
            right = mid - 1
    
    return threadshold_lt[result]

stats_dict = male_players_df.groupby(['fifa_version'])['overall'].agg(['mean','std']).to_dict('index')

def map_player_classifier(row):
    mean = stats_dict[row['fifa_version']]['mean']
    std = stats_dict[row['fifa_version']]['std']
    return player_classifier(row['overall'], mean, std)

In [42]:
# Apply the player classifier
classified_player = merged_df.copy(deep=True)
classified_player['player_class'] = classified_player.apply(map_player_classifier, axis=1)

# Show the classified players
display(classified_player.head())

Unnamed: 0,player_id,fifa_version,short_name,overall,age,team_id,team_name,rising_star,peak_player,falling_star,player_class
0,158023,23,L. Messi,91,35,73,Paris Saint Germain,False,False,False,world-top player
1,231747,23,K. Mbappé,91,23,73,Paris Saint Germain,True,False,False,world-top player
2,190871,23,Neymar Jr,89,30,73,Paris Saint Germain,False,False,False,world-top player
3,193041,23,K. Navas,88,35,73,Paris Saint Germain,False,False,False,world-top player
4,207865,23,Marquinhos,88,28,73,Paris Saint Germain,False,True,False,world-top player


In [43]:
club_rising_stars_classified = merged_df[merged_df["rising_star"] == True].groupby('team_name')['rising_star'].value_counts().sort_values(ascending=False).head(10)
club_peak_players_classified = merged_df[merged_df["peak_player"] == True].groupby('team_name')['peak_player'].value_counts().sort_values(ascending=False).head(10)
club_falling_stars_classified = merged_df[merged_df["falling_star"] == True].groupby('team_name')['falling_star'].value_counts().sort_values(ascending=False).head(10)

In [44]:
# Show the rankings
display(club_rising_stars_classified.head(10))
display(club_peak_players_classified.head(10))
display(club_falling_stars_classified.head(10))

team_name            rising_star
Ajax                 True           116
PSV                  True            98
Olympique Lyonnais   True            87
Sporting CP          True            87
Bayer 04 Leverkusen  True            86
RB Leipzig           True            80
FC Barcelona         True            77
Arsenal              True            73
Tottenham Hotspur    True            71
Feyenoord            True            70
Name: rising_star, dtype: int64

team_name          peak_player
Liverpool          True           121
Napoli             True           116
Chelsea            True           108
Manchester United  True           108
Tottenham Hotspur  True           106
West Ham United    True           105
Manchester City    True           103
Arsenal            True           103
Borussia Dortmund  True           101
Everton            True           100
Name: peak_player, dtype: int64

team_name                 falling_star
Deportivo Pasto           True            90
River Plate               True            90
Universidad Católica      True            88
Antalyaspor               True            79
Perth Glory               True            74
Rosario Central           True            73
Western Sydney Wanderers  True            72
AIK                       True            71
Real Salt Lake            True            70
Newell's Old Boys         True            66
Name: falling_star, dtype: int64