# Set Up

In [1]:
import pandas as pd
df = pd.read_csv('data/24_25_EPL.csv')

In [2]:
df = df[df['90s'] >= 3] # Had to have played at least 3 games in the 90s
df = df[df['Pos'] != 'GK']  # Exclude goalkeepers
df['Gls per 90'] = df['Gls'] / df['90s']
df['Ast per 90'] = df['Ast'] / df['90s']
df['Cards per 90'] = (df['CrdY'] + df['CrdR']) / df['90s']
df['Pro Carries per 90'] = df['PrgC'] / df['90s']
df['Pro Passes per 90'] = df['PrgP'] / df['90s']
filtered = df[['Player','Squad','Gls per 90','Ast per 90','Cards per 90','Pro Carries per 90','Pro Passes per 90']]
filtered.head(2)

Unnamed: 0,Player,Squad,Gls per 90,Ast per 90,Cards per 90,Pro Carries per 90,Pro Passes per 90
2,Tyler Adams,Bournemouth,0.0,0.166667,0.388889,0.666667,3.388889
3,Tosin Adarabioyo,Chelsea,0.072993,0.072993,0.218978,0.364964,2.70073


In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
features = filtered[["Gls per 90", "Ast per 90", "Cards per 90", "Pro Carries per 90", "Pro Passes per 90"]]
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)
similarity_matrix = cosine_similarity(normalized_features)
similarity_df = pd.DataFrame(similarity_matrix, index=df['Player'], columns=df['Player'])
# similarity_df.to_csv("data/similarity_matrix.csv")
similarity_df.head(2)

Player,Tyler Adams,Tosin Adarabioyo,Simon Adingra,Emmanuel Agbadou,Ola Aina,Rayan Aït-Nouri,Kristoffer Ajer,Manuel Akanji,Nathan Aké,Carlos Alcaraz,...,Nathan Wood-Gordon,Luke Woolfenden,Yehor Yarmoliuk,Ryan Yates,Leny Yoro,Ashley Young,Illia Zabarnyi,Oleksandr Zinchenko,Joshua Zirkzee,Martin Ødegaard
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Tyler Adams,1.0,0.511849,-0.65246,0.269257,-0.466909,0.303583,0.220099,-0.152319,-0.305789,0.445392,...,0.520668,0.202495,0.535417,0.674669,0.51952,0.84521,0.058011,0.143441,-0.206671,-0.104613
Tosin Adarabioyo,0.511849,1.0,-0.152185,0.917814,0.320182,-0.306559,0.455552,-0.236931,-0.084492,-0.44467,...,0.895163,0.855271,0.510111,0.57043,0.247873,0.097632,0.592963,-0.388457,0.696286,-0.644642


In [None]:
from sklearn.preprocessing import MinMaxScaler

stat_cols = ["Gls per 90", "Ast per 90", "Cards per 90", "Pro Carries per 90", "Pro Passes per 90"]

# Apply min-max normalization for graphing
scaler = MinMaxScaler()
df_normalized = filtered.copy()
df_normalized[stat_cols] = scaler.fit_transform(filtered[stat_cols])
# Adjusting scale to be 0.5-1.0 (The graph would look weird if some players had 0 values)
df_normalized[stat_cols] = 0.5 + 0.5 * df_normalized[stat_cols]
df_normalized.rename(columns={
    "Gls per 90": "Scoring",
    "Ast per 90": "Assists",
    "Cards per 90": "Fouls", # This is a bit misleading, but it's the best we can do with the data
    "Pro Carries per 90": "Dribbling", # Also a bit misleading
    "Pro Passes per 90": "Passing" # Also a bit misleading
}, inplace=True)
# df_normalized.to_csv("data/normalized_stats.csv", index=False)
df_normalized.head(2)

Unnamed: 0,Player,Squad,Scoring,Assists,Fouls,Dribbling,Passing
2,Tyler Adams,Bournemouth,0.5,0.633333,0.643889,0.527462,0.647802
3,Tosin Adarabioyo,Chelsea,0.536496,0.558394,0.581022,0.515034,0.612093


In [10]:
import plotly.graph_objects as go

# Select the players
player1 = 'Tyler Adams'
player2 = 'Harvey Barnes'

# Metrics to plot
metrics = ["Scoring", "Assists", "Fouls", "Dribbling", "Passing"]

# Get their normalized values
row1 = df_normalized[df_normalized['Player'] == player1].iloc[0]
row2 = df_normalized[df_normalized['Player'] == player2].iloc[0]

values1 = [row1[m] for m in metrics] + [row1[metrics[0]]]  # Loop closure
values2 = [row2[m] for m in metrics] + [row2[metrics[0]]]  # Loop closure
metrics += [metrics[0]]  # Loop closure for labels

# Create the figure
fig = go.Figure()

# Player 1
fig.add_trace(go.Scatterpolar(
    r=values1,
    theta=metrics,
    fill='toself',
    name=player1,
    fillcolor='rgba(31, 119, 180, 0.3)',  # Blue with 30% opacity
    line=dict(color='rgba(31, 119, 180, 1)')  # Solid line
))

# Player 2
fig.add_trace(go.Scatterpolar(
    r=values2,
    theta=metrics,
    fill='toself',
    name=player2,
    fillcolor='rgba(255, 127, 14, 0.3)',  # Orange with 30% opacity
    line=dict(color='rgba(255, 127, 14, 1)')  # Solid line
))

# Layout
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, 1],
            showticklabels=False
        )),
    showlegend=True,
    title=f"{player1} vs {player2} - Attribute Comparison"
)

fig.show()
