# Football Player Performance and Market Value Analysis

In [25]:
# Import required libraries
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
from pathlib import Path

## Data Loading and Preprocessing

In [26]:
from football_data_loader import FootballDataLoader

football_data = FootballDataLoader('football-analytics/data')
print("\nAvailable datasets:")
for dataset in football_data.list_datasets():
    print(f"- {dataset}")

Loaded: game_lineups.csv
Loaded: competitions.csv
Loaded: appearances.csv
Loaded: player_valuations.csv
Loaded: game_events.csv
Loaded: players.csv
Loaded: games.csv
Loaded: club_games.csv
Loaded: clubs.csv

Available datasets:
- game_lineups
- competitions
- appearances
- player_valuations
- game_events
- players
- games
- club_games
- clubs


In [27]:
football_data.describe_datasets()


Dataset: game_lineups
Shape: (2191911, 10)
Columns:
- game_lineups_id: object
- date: object
- game_id: int64
- player_id: int64
- club_id: int64
- player_name: object
- type: object
- position: object
- number: object
- team_captain: int64

--------------------------------------------------

Dataset: competitions
Shape: (44, 11)
Columns:
- competition_id: object
- competition_code: object
- name: object
- sub_type: object
- type: object
- country_id: int64
- country_name: object
- domestic_league_code: object
- confederation: object
- url: object
- is_major_national_league: bool

--------------------------------------------------

Dataset: appearances
Shape: (1583632, 13)
Columns:
- appearance_id: object
- game_id: int64
- player_id: int64
- player_club_id: int64
- player_current_club_id: int64
- date: datetime64[ns]
- player_name: object
- competition_id: object
- yellow_cards: int64
- red_cards: int64
- goals: int64
- assists: int64
- minutes_played: int64

------------------------

In [28]:
# Access datasets
appearances = football_data.appearances
club_games = football_data.club_games
clubs = football_data.clubs
competitions = football_data.competitions
game_events = football_data.game_events
players = football_data.players
player_valuations = football_data.player_valuations

#game_lineups and games are unused

### Data cleaning & feature engineering

In [29]:
# # Adjustment of "PLAYERS" table

# Calculate mean height once
mean_height = players['height_in_cm'].mean()

# List of columns to drop
columns_to_drop = ['url', 'image_url', 'agent_name', 'last_season', 'current_club_id',
                   'player_code', 'country_of_birth', 'city_of_birth', 'current_club_name',
                   'current_club_domestic_competition_id', 'market_value_in_eur',
                   'highest_market_value_in_eur', 'contract_expiration_date',
                   'first_name', 'last_name']

# Apply all transformations in one go
players = (players
           .assign(height_in_cm=lambda x: pd.to_numeric(x['height_in_cm'].fillna(mean_height), downcast='integer'))
           .assign(foot=lambda x: x['foot'].fillna('both'))
           .drop(columns=columns_to_drop)
          )

print(players.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30518 entries, 0 to 30517
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   player_id               30518 non-null  int64         
 1   name                    30518 non-null  object        
 2   country_of_citizenship  29949 non-null  object        
 3   date_of_birth           30473 non-null  datetime64[ns]
 4   sub_position            30345 non-null  object        
 5   position                30518 non-null  object        
 6   foot                    30518 non-null  object        
 7   height_in_cm            30518 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(5)
memory usage: 1.9+ MB
None


In [30]:
# Join between "Players", "Appearances", "Clubs" and "Competitions"
players_appearances = (
    players.merge(
        appearances,
        on='player_id',
        how='inner',
        suffixes=('', '_appearance')
    )
    .drop(columns=[
        'player_current_club_id',
        'appearance_id',
        'name',
        'competition_id'
    ])
)

In [31]:
players_appearances.head(5)

Unnamed: 0,player_id,country_of_citizenship,date_of_birth,sub_position,position,foot,height_in_cm,game_id,player_club_id,date,player_name,yellow_cards,red_cards,goals,assists,minutes_played
0,10,Germany,1978-06-09,Centre-Forward,Attack,right,184.0,2254965,398,2012-08-23,Miroslav Klose,1,0,1,0,83
1,10,Germany,1978-06-09,Centre-Forward,Attack,right,184.0,2251263,398,2012-08-26,Miroslav Klose,1,0,0,0,90
2,10,Germany,1978-06-09,Centre-Forward,Attack,right,184.0,2251277,398,2012-09-02,Miroslav Klose,1,0,2,0,89
3,10,Germany,1978-06-09,Centre-Forward,Attack,right,184.0,2251283,398,2012-09-16,Miroslav Klose,0,0,1,0,90
4,10,Germany,1978-06-09,Centre-Forward,Attack,right,184.0,2262399,398,2012-09-20,Miroslav Klose,0,0,0,0,90


In [32]:
# Perform merge, drop unnecessary columns, and rename

players_appearances = pd.merge(
    players_appearances, clubs,
    left_on='player_club_id',
    right_on='club_id',
    how='inner')

players_appearances = players_appearances.drop(
    columns=[
    'player_club_id','club_code','squad_size',
    'average_age', 'foreigners_number',
    'foreigners_percentage','national_team_players',
    'stadium_name', 'stadium_seats',
    'net_transfer_record', 'last_season',
    'filename','url', 'coach_name', 
    'total_market_value'
])

players_appearances = players_appearances.rename(
    columns={
        "name": "club_name"
    })

In [33]:
players_appearances = pd.merge(
    players_appearances, competitions,
    left_on='domestic_competition_id',
    right_on='competition_id',
    how='left'
)

players_appearances = players_appearances.drop(
    columns=[
        'domestic_competition_id',
        'competition_code',
        'name', 'sub_type', 
        'type', 'country_id',
        'competition_id', 
        'domestic_league_code',
        'confederation', 'url'
    ])

In [34]:
players_appearances = players_appearances.rename(
    columns={
        "country_name": "club_national_league",
        "is_major_national_league": "is_top5_league"
    })

players_appearances = players_appearances[[
    'player_id', 'player_name', 'position',
    'sub_position', 'foot', 'height_in_cm', 
    'country_of_citizenship',
    'date_of_birth', 'club_id', 'club_name', 
    'club_national_league', 'is_top5_league', 
    'game_id', 'date', 'yellow_cards', 
    'red_cards', 'goals', 'assists', 
    'minutes_played'
]]

In [35]:
# Filter goal events and remove own-goals
goal_events = game_events[game_events['type'] == 'Goals'].drop(columns=['game_event_id', 'player_in_id', 'type', 'player_assist_id', 'minute'])
goal_events = goal_events[~goal_events['description'].str.contains('Own-goal', case=False)]

# Define different types of goals
kind_of_goals = ['Header', 'Right-footed', 'Left-footed', 'Penalty', 'Long distance kick',
                 'Free kick', 'Counter attack', 'Solo run', 'Tap-in']

# Create a dictionary for goal types and their corresponding column names
goal_type_columns = {
    'Header': 'header_goals',
    'Right-footed': 'right_footed_goals',
    'Left-footed': 'left_footed_goals',
    'Penalty': 'penalty_goals',
    'Long distance kick': 'long_distance_goals',
    'Free kick': 'direct_free_kick_goals',
    'Counter attack': 'counter_attack_goals',
    'Solo run': 'solo_run_goals',
    'Tap-in': 'tap-in_goals',
    'Other': 'other_kind_of_goals'
}

In [37]:
# Categorize goals and create new columns
for goal_type, column_name in goal_type_columns.items():
    goal_events[column_name] = np.where(goal_events['description'].str.contains(goal_type, case=False), 1, 0)

# Categorize remaining goals as 'Other'
goal_events.loc[goal_events[list(goal_type_columns.values())].sum(axis=1) == 0, 'other_kind_of_goals'] = 1

# Group by date, game_id, and player_id, and aggregate the data
grouped_goal_events = goal_events.groupby(['date', 'game_id', 'player_id']).agg({
    'club_id': 'first',
    **{col: 'sum' for col in goal_type_columns.values()}
}).reset_index()

# Rename columns
grouped_goal_events.columns = ['goal_' + col if i < 5 else col for i, col in enumerate(grouped_goal_events.columns)]

In [39]:
# Display the result
grouped_goal_events.head(5)

Unnamed: 0,goal_date,goal_game_id,goal_player_id,goal_club_id,goal_header_goals,right_footed_goals,left_footed_goals,penalty_goals,long_distance_goals,direct_free_kick_goals,counter_attack_goals,solo_run_goals,tap-in_goals,other_kind_of_goals
0,2012-07-03,2231978,21679,853,0,1,1,0,0,0,0,0,0,0
1,2012-07-03,2231978,38004,853,2,1,0,0,0,0,0,0,0,0
2,2012-07-03,2231978,82883,853,1,0,0,0,0,0,0,0,0,0
3,2012-07-03,2231978,119324,853,0,0,0,0,1,0,0,0,0,0
4,2012-07-03,2231982,1386,6335,1,2,1,0,0,0,0,0,0,0


## Player Valuations Processing

In [40]:
# Drop unnecessary columns and extract month and year
player_valuations = (player_valuations
    .drop(columns=['current_club_id', 'player_club_domestic_competition_id'])
    .assign(month=lambda x: x['date'].dt.month,
            year=lambda x: x['date'].dt.year))

In [41]:
# Group by player, year, and month, then calculate mean market value
player_valuations_monthly = (player_valuations
    .groupby(['player_id', 'year', 'month'])['market_value_in_eur']
    .mean()
    .reset_index())

In [42]:
# Function to generate date range for each player
def generate_date_range(group):
    start_date = group['date'].min().replace(day=1)
    end_date = group['date'].max()
    date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
    return pd.DataFrame({'date': date_range})

In [43]:
# Generate continuous date range for each player
player_valuations_new = (player_valuations.groupby('player_id', group_keys=False)
                         .apply(lambda x: generate_date_range(x).assign(player_id=x.name))
                         .reset_index(drop=True))

In [44]:
# Extract year and month from the generated date range
player_valuations_new['year'] = player_valuations_new['date'].dt.year
player_valuations_new['month'] = player_valuations_new['date'].dt.month


In [45]:
# Merge the generated date range with grouped valuations
# Fill missing values and rename the market value column
player_valuations_new = (pd.merge(player_valuations_new, player_valuations_monthly,
                                  on=['player_id', 'year', 'month'], how='left')
    .assign(market_value_in_eur=lambda x: x.groupby('player_id')['market_value_in_eur'].fillna(method='ffill'))
    .rename(columns={"market_value_in_eur": "avg_monthly_market_value_in_eur"}))

In [46]:
player_valuations_new.tail(5)

Unnamed: 0,date,player_id,year,month,avg_monthly_market_value_in_eur
2815882,2024-04-01,1225269,2024,4,50000.0
2815883,2024-05-01,1225269,2024,5,50000.0
2815884,2024-05-01,1229924,2024,5,250000.0
2815885,2024-06-01,1240467,2024,6,50000.0
2815886,2024-06-01,1240762,2024,6,50000.0


In [49]:
# First important join: "Players_appearances" & "Grouped_Goal_events"
players_appearances_full = (
    players_appearances.merge(
        grouped_goal_events,
        left_on=['game_id', 'player_id'],
        right_on=['goal_game_id', 'goal_player_id'],
        how='left'
    )
    .drop(columns=['goal_date', 'goal_game_id', 'goal_player_id', 'goal_club_id'])
)

In [50]:
# Print shape to verify the merge result
print(f"Shape of players_appearances_full: {players_appearances_full.shape}")

Shape of players_appearances_full: (1576038, 29)


In [51]:
# Define the list of goal types
goal_types = ['right_footed_goals', 'left_footed_goals', 'long_distance_goals',
              'direct_free_kick_goals', 'penalty_goals', 'header_goals',
              'counter_attack_goals', 'solo_run_goals', 'tap-in_goals',
              'other_kind_of_goals']

In [52]:
# Set all goal types to 0 where total goals is 0
players_appearances_full.loc[players_appearances_full['goals'] == 0, goal_types] = 0

In [53]:
# Fill NaN values with 0 and convert to int for all goal types at once
players_appearances_full[goal_types] = players_appearances_full[goal_types].fillna(0).astype(int)

In [54]:
# Define the columns to keep
columns_to_keep = ['player_id', 'player_name', 'position', 'sub_position',
                   'foot', 'height_in_cm', 'country_of_citizenship',
                   'date_of_birth', 'club_id', 'club_name',
                   'club_national_league', 'is_top5_league', 'date',
                   'game_id', 'minutes_played', 'goals', 'assists',
                   'right_footed_goals', 'left_footed_goals',
                   'header_goals', 'direct_free_kick_goals',
                   'penalty_goals', 'long_distance_goals',
                   'counter_attack_goals', 'solo_run_goals',
                   'tap-in_goals', 'other_kind_of_goals', 'yellow_cards',
                   'red_cards']

# Select the columns
players_stats = players_appearances_full[columns_to_keep]

In [55]:
players_stats

Unnamed: 0,player_id,player_name,position,sub_position,foot,height_in_cm,country_of_citizenship,date_of_birth,club_id,club_name,...,header_goals,direct_free_kick_goals,penalty_goals,long_distance_goals,counter_attack_goals,solo_run_goals,tap-in_goals,other_kind_of_goals,yellow_cards,red_cards
0,10,Miroslav Klose,Attack,Centre-Forward,right,184.0,Germany,1978-06-09,398,Società Sportiva Lazio S.p.A.,...,0,0,0,0,0,0,0,0,1,0
1,10,Miroslav Klose,Attack,Centre-Forward,right,184.0,Germany,1978-06-09,398,Società Sportiva Lazio S.p.A.,...,0,0,0,0,0,0,0,0,1,0
2,10,Miroslav Klose,Attack,Centre-Forward,right,184.0,Germany,1978-06-09,398,Società Sportiva Lazio S.p.A.,...,0,0,0,0,0,0,0,0,1,0
3,10,Miroslav Klose,Attack,Centre-Forward,right,184.0,Germany,1978-06-09,398,Società Sportiva Lazio S.p.A.,...,0,0,0,0,0,0,1,0,0,0
4,10,Miroslav Klose,Attack,Centre-Forward,right,184.0,Germany,1978-06-09,398,Società Sportiva Lazio S.p.A.,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1576033,981197,Frederic Soelle Soelle,Attack,Centre-Forward,right,190.0,Belgium,2005-12-24,54189,Racing White Daring Molenbeek,...,0,0,0,0,0,0,0,0,0,0
1576034,1113528,Christ Makosso,Defender,Centre-Back,right,192.0,Congo,2004-05-09,54189,Racing White Daring Molenbeek,...,0,0,0,0,0,0,0,0,0,0
1576035,1113528,Christ Makosso,Defender,Centre-Back,right,192.0,Congo,2004-05-09,54189,Racing White Daring Molenbeek,...,0,0,0,0,0,0,0,0,0,0
1576036,1113528,Christ Makosso,Defender,Centre-Back,right,192.0,Congo,2004-05-09,54189,Racing White Daring Molenbeek,...,0,0,0,0,0,0,0,0,0,0


### Merging Player Statistics with Club Game Data

In [56]:
# Define columns to drop after the merge
columns_to_drop = ['own_goals', 'own_position', 'own_manager_name',
                   'opponent_id', 'opponent_position',
                   'opponent_manager_name', 'hosting']

# Second important join: "Player_Stats" & "Club_games"
player_club_goals = (
    pd.merge(
        players_stats,
        club_games.drop(columns=columns_to_drop),
        on=['game_id', 'club_id'],
        how='inner'
    )
)

# Print shape to verify the merge result
print(f"Shape of player_club_goals_stat: {player_club_goals.shape}")

Shape of player_club_goals_stat: (1576038, 31)


In [57]:
# Create new columns in a single operation
player_club_goals = player_club_goals.assign(
    # Set is_clean_sheet to 1 where opponent_goals is 0, otherwise 0
    is_clean_sheet=lambda df: (df['opponent_goals'] == 0).astype(int),
    
    # Extract year and month from date
    year=lambda df: df['date'].dt.year,
    month=lambda df: df['date'].dt.month,
    
    # Calculate age based on year of birth
    age=lambda df: df['year'] - df['date_of_birth'].dt.year
)

In [58]:
# Convert age to integer, filling NaN values with 0
player_club_goals['age'] = player_club_goals['age'].fillna(0).astype(int)

In [59]:
# Print the first few rows to verify the changes
print(player_club_goals[['opponent_goals', 'is_clean_sheet', 'year', 'month', 'age']].tail())

         opponent_goals  is_clean_sheet  year  month  age
1576033               4               0  2024      3   21
1576034               4               0  2024      3   22
1576035               4               0  2024      3   23
1576036               4               0  2024      3   21
1576037               4               0  2024      3   20


### Aggregating Monthly Player Statistics

In [60]:
# Define aggregation functions for different column types
agg_functions = {
    'first': ['player_name', 'position', 'sub_position', 'foot', 'height_in_cm', 
              'country_of_citizenship', 'date_of_birth', 'club_id', 'club_name', 
              'club_national_league', 'is_top5_league', 'date', 'age'],
    'count': ['game_id'],
    'sum': ['minutes_played', 'goals', 'assists', 'right_footed_goals', 
            'left_footed_goals', 'header_goals', 'direct_free_kick_goals', 
            'penalty_goals', 'long_distance_goals', 'counter_attack_goals', 
            'solo_run_goals', 'tap-in_goals', 'other_kind_of_goals', 
            'yellow_cards', 'red_cards', 'opponent_goals', 'is_win', 'is_clean_sheet']
}

In [61]:
# Create a dictionary comprehension for the aggregation
agg_dict = {col: op for op, cols in agg_functions.items() for col in cols}

# Perform groupby and aggregation
grouped_stats = (
    player_club_goals
    .groupby(['player_id', 'year', 'month'])
    .agg(agg_dict)
    .reset_index()
)

In [62]:
# Define columns to keep
columns_to_keep = ['player_id', 'date', 'year', 'month', 'player_name', 'position', 'sub_position',
                   'foot', 'height_in_cm', 'country_of_citizenship', 'date_of_birth', 'club_id', 
                   'club_name', 'club_national_league', 'is_top5_league', 'age', 'game_id', 
                   'minutes_played', 'is_win', 'goals', 'assists', 'right_footed_goals', 
                   'left_footed_goals', 'header_goals', 'direct_free_kick_goals', 'penalty_goals', 
                   'long_distance_goals', 'counter_attack_goals', 'solo_run_goals', 'tap-in_goals', 
                   'other_kind_of_goals', 'opponent_goals', 'is_clean_sheet', 'yellow_cards', 'red_cards']

# Select columns and rename in one step
monthly_player_stats = grouped_stats[columns_to_keep].rename(columns={
    "game_id": "games_played", 
    "is_win": "games_won", 
    "is_clean_sheet": "n_of_clean_sheets"
})

In [63]:
# Set clean sheets to 0 for non-goalkeepers
monthly_player_stats.loc[monthly_player_stats['position'] != "Goalkeeper", 'n_of_clean_sheets'] = 0

# Calculate new features efficiently
monthly_player_stats = monthly_player_stats.assign(
    goals_scored_per_match=(monthly_player_stats['goals'] / monthly_player_stats['games_played']).round(2),
    percentage_of_win=(monthly_player_stats['games_won'] / monthly_player_stats['games_played'] * 100).round(2),
    percentage_of_clean_sheets=(monthly_player_stats['n_of_clean_sheets'] / monthly_player_stats['games_played'] * 100).round(2),
    conceded_goals_per_match=(monthly_player_stats['opponent_goals'] / monthly_player_stats['games_played']).round(2)
)

# Drop rows with NaN values
monthly_player_stats = monthly_player_stats.dropna()

In [64]:
# Print shape and first few rows to verify the result
print(f"Shape of monthly_player_stats: {monthly_player_stats.shape}")
print(monthly_player_stats.head())

Shape of monthly_player_stats: (514499, 39)
   player_id       date  year  month     player_name position    sub_position   
0         10 2012-08-23  2012      8  Miroslav Klose   Attack  Centre-Forward  \
1         10 2012-09-02  2012      9  Miroslav Klose   Attack  Centre-Forward   
2         10 2012-10-07  2012     10  Miroslav Klose   Attack  Centre-Forward   
3         10 2012-11-08  2012     11  Miroslav Klose   Attack  Centre-Forward   
4         10 2012-12-02  2012     12  Miroslav Klose   Attack  Centre-Forward   

    foot  height_in_cm country_of_citizenship  ... tap-in_goals   
0  right         184.0                Germany  ...            0  \
1  right         184.0                Germany  ...            1   
2  right         184.0                Germany  ...            0   
3  right         184.0                Germany  ...            0   
4  right         184.0                Germany  ...            0   

   other_kind_of_goals opponent_goals n_of_clean_sheets  yellow_ca

### Merging Player Performance with Market Valuations

In [65]:
# Perform left join between stats and player_valuations_new
player_stats_with_valuations = pd.merge(
    monthly_player_stats, 
    player_valuations_new[['player_id', 'year', 'month', 'avg_monthly_market_value_in_eur']], 
    on=['player_id', 'year', 'month'], 
    how='left'
)

In [66]:
# Fill missing market values using forward and backward fill within each player group
player_stats_with_valuations['avg_monthly_market_value_in_eur'] = (
    player_stats_with_valuations.groupby('player_id')['avg_monthly_market_value_in_eur']
    .transform(lambda x: x.ffill().bfill())
)

# Remove rows with any remaining NaN values
player_stats_with_valuations = player_stats_with_valuations.dropna()

In [67]:
# Print shape and first few rows to verify the result
print(f"Shape of player_stats_with_valuations: {player_stats_with_valuations.shape}")
print(player_stats_with_valuations[['player_id', 'year', 'month', 'avg_monthly_market_value_in_eur']].head())

Shape of player_stats_with_valuations: (512644, 40)
   player_id  year  month  avg_monthly_market_value_in_eur
0         10  2012      8                        6000000.0
1         10  2012      9                        6000000.0
2         10  2012     10                        6000000.0
3         10  2012     11                        6000000.0
4         10  2012     12                        6000000.0


In [68]:
def plot_player_market_value(df, player_name):
    """
    Plot the market value evolution for a given player.
    
    Args:
    df (pd.DataFrame): DataFrame containing player stats and market values
    player_name (str): Name of the player to plot
    
    Returns:
    plotly.graph_objs._figure.Figure: Plotly figure object
    """
    # Filter data for the specified player
    player_values = df[df['player_name'] == player_name]
    
    # Create the line plot
    fig = px.line(player_values, 
                  x='date', 
                  y='avg_monthly_market_value_in_eur', 
                  title=f"{player_name}'s Market Value Trends Over Time")
    
    # Customize the plot
    fig.update_traces(line=dict(color='red', width=4))
    fig.update_layout(
        plot_bgcolor='white',
        paper_bgcolor='white',
        title_font=dict(size=24, family='Arial', color='black'),
        xaxis=dict(
            title='Years', 
            tickfont=dict(size=12, color='black'),
            showgrid=True, 
            dtick='M12', 
            gridcolor='grey'
        ),
        yaxis=dict(
            title='Value (EUR)', 
            tickfont=dict(size=12, color='black'),
            showgrid=True, 
            gridcolor='grey'
        )
    )
    
    return fig

In [69]:
# Example usage
name = 'Kylian Mbappé'
fig = plot_player_market_value(player_stats_with_valuations, name)
fig.show()

In [70]:
def plot_top_players_market_value(df, n_players=5):
    """
    Plot the market value evolution for the top n players by average market value.
    
    Args:
    df (pd.DataFrame): DataFrame containing player stats and market values
    n_players (int): Number of top players to plot
    
    Returns:
    plotly.graph_objs._figure.Figure: Plotly figure object
    """
    # Get the top n players by average market value
    top_players = df.groupby('player_name')['avg_monthly_market_value_in_eur'].mean().nlargest(n_players).index

    # Create the figure
    fig = go.Figure()

    # Add a line for each top player
    for player in top_players:
        player_data = df[df['player_name'] == player].sort_values('date')
        fig.add_trace(go.Scatter(
            x=player_data['date'],
            y=player_data['avg_monthly_market_value_in_eur'],
            mode='lines',
            name=player
        ))

    # Customize the layout
    fig.update_layout(
        title=f"Top {n_players} Players' Market Value Trends Over Time",
        xaxis_title="Date",
        yaxis_title="Market Value (EUR)",
        legend_title="Players",
        plot_bgcolor='white',
        paper_bgcolor='white',
        title_font=dict(size=24, family='Arial', color='black'),
        xaxis=dict(
            tickfont=dict(size=12, color='black'),
            showgrid=True,
            gridcolor='lightgrey'
        ),
        yaxis=dict(
            tickfont=dict(size=12, color='black'),
            showgrid=True,
            gridcolor='lightgrey'
        )
    )

    return fig

# Use the function to plot top 5 players
fig = plot_top_players_market_value(player_stats_with_valuations, n_players=5)
fig.show()