In [1]:
# Useful imports
import pandas as pd
import scipy.stats as stats
import numpy as np

# Setup

In [2]:
# Load the players data
male_players_df = pd.read_csv('../data/male_players.csv', usecols=['player_id', 'fifa_version', 'fifa_update', 'short_name', 'overall', 'age', 'club_team_id', 'player_face_url'])

In [3]:
# Load the teams data
male_teams_df = pd.read_csv('../data/male_teams.csv', usecols=['team_id', 'team_name', 'fifa_version', 'fifa_update'])

In [4]:
display(male_players_df.head())
display(male_teams_df.head())

print("Male players length: ", len(male_players_df))
print("Male teams length:", len(male_teams_df))

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,overall,age,club_team_id,player_face_url
0,158023,23,9,L. Messi,91,35,73.0,https://cdn.sofifa.net/players/158/023/23_120.png
1,165153,23,9,K. Benzema,91,34,243.0,https://cdn.sofifa.net/players/165/153/23_120.png
2,188545,23,9,R. Lewandowski,91,33,241.0,https://cdn.sofifa.net/players/188/545/23_120.png
3,192985,23,9,K. De Bruyne,91,31,10.0,https://cdn.sofifa.net/players/192/985/23_120.png
4,231747,23,9,K. Mbappé,91,23,73.0,https://cdn.sofifa.net/players/231/747/23_120.png


Unnamed: 0,team_id,fifa_version,fifa_update,team_name
0,10,23,9,Manchester City
1,73,23,9,Paris Saint Germain
2,243,23,9,Real Madrid
3,1337,23,9,Germany
4,5,23,9,Chelsea


Male players length:  10003590
Male teams length: 385055


In [5]:
# Sanitize the data
male_players_df.sort_values(by=['fifa_version','fifa_update'], ascending=False, inplace=True)
male_players_df.drop_duplicates(subset=['player_id', 'fifa_version'], inplace=True, keep='first')

male_teams_df.sort_values(by=['fifa_version','fifa_update'], ascending=False, inplace=True)
male_teams_df.drop_duplicates(subset=['team_id', 'fifa_version'], inplace=True, keep='first')

In [6]:
# Merge the dataframes
merged_df = pd.merge(male_players_df, male_teams_df, left_on=['club_team_id', 'fifa_version'], right_on=['team_id', 'fifa_version'])

# Keep only the relevant columns
merged_df = merged_df.loc[:, ["player_id", "fifa_version", "short_name", "overall", "age", "team_id", "team_name", "player_face_url"]]

# Show the merged dataframe
display(merged_df.head())
print("Merged dataframe length: ", len(merged_df))

Unnamed: 0,player_id,fifa_version,short_name,overall,age,team_id,team_name,player_face_url
0,158023,23,L. Messi,91,35,73,Paris Saint Germain,https://cdn.sofifa.net/players/158/023/23_120.png
1,231747,23,K. Mbappé,91,23,73,Paris Saint Germain,https://cdn.sofifa.net/players/231/747/23_120.png
2,190871,23,Neymar Jr,89,30,73,Paris Saint Germain,https://cdn.sofifa.net/players/190/871/23_120.png
3,193041,23,K. Navas,88,35,73,Paris Saint Germain,https://cdn.sofifa.net/players/193/041/23_120.png
4,207865,23,Marquinhos,88,28,73,Paris Saint Germain,https://cdn.sofifa.net/players/207/865/23_120.png


Merged dataframe length:  193801


# Naive approach

In [7]:
# Identify rising stars, peak players, and falling stars
merged_df['rising_star'] = (merged_df['age'] <= 23) & (merged_df['overall'] >= merged_df[merged_df['age'] <= 23]['overall'].quantile(0.90))
merged_df['peak_player'] = (merged_df['age'].between(24, 29)) & (merged_df['overall'] >= merged_df[merged_df['age'].between(24, 29)]['overall'].quantile(0.90))
merged_df['falling_star'] = (merged_df['age'] >= 30) & (merged_df['overall'] < merged_df[merged_df['age'] >= 30]['overall'].quantile(0.90))

# Calculate club rankings based on the count of each type of player
club_rising_stars_rank = merged_df.groupby('team_name')['rising_star'].mean().sort_values(ascending=False)
club_peak_players_rank = merged_df.groupby('team_name')['peak_player'].mean().sort_values(ascending=False)
club_falling_stars_rank = merged_df.groupby('team_name')['falling_star'].mean().sort_values(ascending=False)

In [8]:
# Show the rankings
display(club_rising_stars_rank.head(10))
display(club_peak_players_rank.head(10))
display(club_falling_stars_rank.head(10))

team_name
Salzburg               0.460177
Nice                   0.434783
Ajax                   0.390572
Avaí                   0.350000
PSV                    0.341463
Bayer 04 Leverkusen    0.339921
Lille                  0.338983
Milan                  0.338843
CSKA Moskva            0.333333
Fortaleza              0.329545
Name: rising_star, dtype: float64

team_name
Napoli                     0.449612
Sevilla FC                 0.421687
Benfica                    0.421488
Paris Saint Germain        0.406504
Atlético Madrid            0.397163
Sevilla                    0.376923
Porto                      0.375000
FC Bayern München          0.357692
Borussia Dortmund          0.337793
Athletic Club de Bilbao    0.337662
Name: peak_player, dtype: float64

team_name
Paraná                   0.750000
Ceará Sporting Club      0.700000
12 de Octubre            0.533333
Sport Club do Recife     0.524590
América Mineiro          0.500000
Cienciano                0.488889
Qingdao Hainiu           0.475410
América Futebol Clube    0.475000
Altay                    0.468750
Shanghai Shenhua         0.462121
Name: falling_star, dtype: float64

# Advanced approach

### Find the team rankings

In [9]:
def player_classifier(overall_score,mean, std):
    tvalue = stats.norm.cdf(overall_score, loc = mean, scale = std)
    threadshold_lt = {0.999:"world-top player", 0.99:"top player", 0.9:"great player", 
                      0.5:"above-average player", 0.1:"below-average player", 0:"bad player"}
    threadshold_key = sorted(threadshold_lt.keys())
    left = 0 
    right = len(threadshold_key) -1
    result = None
    # binary search
    while left <= right:
        mid = (left+right)//2
        if threadshold_key[mid] <= tvalue:
            result = threadshold_key[mid]
            left = mid + 1
        else:
            right = mid - 1
    
    return threadshold_lt[result]

stats_dict = male_players_df.groupby(['fifa_version'])['overall'].agg(['mean','std']).to_dict('index')

def map_player_classifier(row):
    mean = stats_dict[row['fifa_version']]['mean']
    std = stats_dict[row['fifa_version']]['std']
    return player_classifier(row['overall'], mean, std)

In [10]:
# Apply the player classifier
classified_player = merged_df.copy(deep=True)
classified_player['player_class'] = classified_player.apply(map_player_classifier, axis=1)

# Keep only the players that have played for at least two teams
classified_player = classified_player.groupby('player_id').filter(lambda x: x['team_id'].nunique() >= 2)

# Keep only the players that have been classified as at least two different classes
classified_player = classified_player.groupby('player_id').filter(lambda x: x['player_class'].nunique() >= 2)

# Show the classified players
display(classified_player.head())

Unnamed: 0,player_id,fifa_version,short_name,overall,age,team_id,team_name,player_face_url,rising_star,peak_player,falling_star,player_class
1,231747,23,K. Mbappé,91,23,73,Paris Saint Germain,https://cdn.sofifa.net/players/231/747/23_120.png,True,False,False,world-top player
3,193041,23,K. Navas,88,35,73,Paris Saint Germain,https://cdn.sofifa.net/players/193/041/23_120.png,False,False,False,world-top player
5,230621,23,G. Donnarumma,88,23,73,Paris Saint Germain,https://cdn.sofifa.net/players/230/621/23_120.png,True,False,False,world-top player
7,155862,23,Sergio Ramos,84,36,73,Paris Saint Germain,https://cdn.sofifa.net/players/155/862/23_120.png,False,False,False,top player
8,235212,23,A. Hakimi,84,23,73,Paris Saint Germain,https://cdn.sofifa.net/players/235/212/23_120.png,True,False,False,top player


In [11]:
# Calculate the number of players in each class
club_rising_stars_classified = merged_df[merged_df["rising_star"] == True].groupby('team_name')['rising_star'].value_counts().sort_values(ascending=False)
club_peak_players_classified = merged_df[merged_df["peak_player"] == True].groupby('team_name')['peak_player'].value_counts().sort_values(ascending=False)
club_falling_stars_classified = merged_df[merged_df["falling_star"] == True].groupby('team_name')['falling_star'].value_counts().sort_values(ascending=False)

In [12]:
# Show the rankings
display(club_rising_stars_classified.head(10))
display(club_peak_players_classified.head(10))
display(club_falling_stars_classified.head(10))

team_name            rising_star
Ajax                 True           116
PSV                  True            98
Olympique Lyonnais   True            87
Sporting CP          True            87
Bayer 04 Leverkusen  True            86
RB Leipzig           True            80
FC Barcelona         True            77
Arsenal              True            73
Tottenham Hotspur    True            71
Feyenoord            True            70
Name: rising_star, dtype: int64

team_name          peak_player
Liverpool          True           121
Napoli             True           116
Chelsea            True           108
Manchester United  True           108
Tottenham Hotspur  True           106
West Ham United    True           105
Manchester City    True           103
Arsenal            True           103
Borussia Dortmund  True           101
Everton            True           100
Name: peak_player, dtype: int64

team_name                 falling_star
Deportivo Pasto           True            90
River Plate               True            90
Universidad Católica      True            88
Antalyaspor               True            79
Perth Glory               True            74
Rosario Central           True            73
Western Sydney Wanderers  True            72
AIK                       True            71
Real Salt Lake            True            70
Newell's Old Boys         True            66
Name: falling_star, dtype: int64

### Find the top 3 most relevant players per team

In [13]:
# Get the team_ids from club_rising_stars_classified
team_ids = club_rising_stars_classified.head(7).index.get_level_values('team_name').tolist()

# Create an empty dataframe to store the results
rising_stars_df = pd.DataFrame()
peak_players_df = pd.DataFrame()
falling_stars_df = pd.DataFrame()

# Iterate over each team_id
for team_name in team_ids:

    # Filter the merged_df for the current team_id and the category condition
    rising_stars = merged_df[(merged_df['team_name'] == team_name) & (merged_df['rising_star'] == True)]
    peak_players = merged_df[(merged_df['team_name'] == team_name) & (merged_df['peak_player'] == True)]
    falling_stars = merged_df[(merged_df['team_name'] == team_name) & (merged_df['falling_star'] == True)]

    # Sort the players by overall in descending order
    rising_stars = rising_stars.sort_values('overall', ascending=False)
    peak_players = peak_players.sort_values('overall', ascending=False)
    falling_stars = falling_stars.sort_values('overall', ascending=False)

    # Drop duplicates
    rising_stars.drop_duplicates('player_id', keep='first', inplace=True)
    peak_players.drop_duplicates('player_id', keep='first', inplace=True)
    falling_stars.drop_duplicates('player_id', keep='first', inplace=True)
    
    # Take the top 3 players
    rising_stars = rising_stars.head(3)
    peak_players = peak_players.head(3)
    falling_stars = falling_stars.head(3)
    
    # Update the dataframes
    rising_stars_df = pd.concat([rising_stars_df, rising_stars], ignore_index=True)
    peak_players_df = pd.concat([peak_players_df, peak_players], ignore_index=True)
    falling_stars_df = pd.concat([falling_stars_df, falling_stars], ignore_index=True)

# Keep only the relevant columns
columns = ["team_name", "short_name", "overall", "age", "fifa_version", "player_face_url"]
rising_stars_df = rising_stars_df.loc[:, columns]
peak_players_df = peak_players_df.loc[:, columns]
falling_stars_df = falling_stars_df.loc[:, columns]

# Show the results
print("Rising stars:")
display(rising_stars_df)

print("Peak players:")
display(peak_players_df)

print("Falling stars:")
display(falling_stars_df)

Rising stars:


Unnamed: 0,team_name,short_name,overall,age,fifa_version,player_face_url
0,Ajax,M. de Ligt,85,18,19,https://cdn.sofifa.net/players/235/243/19_120.png
1,Ajax,A. Onana,85,23,20,https://cdn.sofifa.net/players/226/753/20_120.png
2,Ajax,F. de Jong,84,21,19,https://cdn.sofifa.net/players/228/702/19_120.png
3,PSV,H. Lozano,82,22,19,https://cdn.sofifa.net/players/221/992/19_120.png
4,PSV,S. Bergwijn,81,20,19,https://cdn.sofifa.net/players/225/953/19_120.png
5,PSV,C. Gakpo,81,22,22,https://cdn.sofifa.net/players/242/516/22_120.png
6,Olympique Lyonnais,N. Fekir,84,23,18,https://cdn.sofifa.net/players/216/594/18_120.png
7,Olympique Lyonnais,A. Lacazette,82,23,15,https://cdn.sofifa.net/players/193/301/15_120.png
8,Olympique Lyonnais,M. Dembélé,82,22,20,https://cdn.sofifa.net/players/211/591/20_120.png
9,Sporting CP,Bruno Fernandes,85,23,19,https://cdn.sofifa.net/players/212/198/19_120.png


Peak players:


Unnamed: 0,team_name,short_name,overall,age,fifa_version,player_face_url
0,Ajax,H. Ziyech,86,26,20,https://cdn.sofifa.net/players/208/670/20_120.png
1,Ajax,D. Tadić,84,29,19,https://cdn.sofifa.net/players/199/434/19_120.png
2,Ajax,A. Onana,84,24,21,https://cdn.sofifa.net/players/226/753/21_120.png
3,PSV,L. de Jong,82,27,19,https://cdn.sofifa.net/players/189/805/19_120.png
4,PSV,M. Götze,81,29,22,https://cdn.sofifa.net/players/192/318/22_120.png
5,PSV,A. Guardado,81,28,16,https://cdn.sofifa.net/players/171/897/16_120.png
6,Olympique Lyonnais,A. Lopes,85,27,19,https://cdn.sofifa.net/players/199/482/19_120.png
7,Olympique Lyonnais,A. Lacazette,85,25,17,https://cdn.sofifa.net/players/193/301/17_120.png
8,Olympique Lyonnais,M. Depay,84,25,20,https://cdn.sofifa.net/players/202/556/20_120.png
9,Sporting CP,Rui Patrício,83,28,17,https://cdn.sofifa.net/players/178/005/17_120.png


Falling stars:


Unnamed: 0,team_name,short_name,overall,age,fifa_version,player_face_url
0,Ajax,M. Stekelenburg,75,38,22,https://cdn.sofifa.net/players/002/147/22_120.png
1,Ajax,K. Huntelaar,75,35,20,https://cdn.sofifa.net/players/148/803/20_120.png
2,Ajax,J. Heitinga,73,31,16,https://cdn.sofifa.net/players/049/425/16_120.png
3,PSV,André Ramalho,75,30,23,https://cdn.sofifa.net/players/210/007/23_120.png
4,PSV,K. Mitroglou,75,31,20,https://cdn.sofifa.net/players/183/483/20_120.png
5,PSV,I. Afellay,74,33,20,https://cdn.sofifa.net/players/158/372/20_120.png
6,Olympique Lyonnais,D. Da Silva,75,33,22,https://cdn.sofifa.net/players/179/789/22_120.png
7,Olympique Lyonnais,J. Morel,75,31,16,https://cdn.sofifa.net/players/139/860/16_120.png
8,Olympique Lyonnais,H. Bedimo,75,30,15,https://cdn.sofifa.net/players/150/565/15_120.png
9,Sporting CP,I. Slimani,75,33,22,https://cdn.sofifa.net/players/217/699/22_120.png


In [14]:
# Save the results as CSV
rising_stars_df.to_csv("../generated/rising_stars_players.csv", index=False)
peak_players_df.to_csv("../generated/peak_players_players.csv", index=False)
falling_stars_df.to_csv("../generated/falling_stars_players.csv", index=False)