In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.max_columns',None)
pd.option_context('mode.use_inf_as_na', True)

In [3]:
df_male = pd.read_csv('data/male_players.csv')
df_male.replace([np.inf, -np.inf], np.nan, inplace=True)
df_male.drop('Unnamed: 0.1', axis=1, inplace = True)
df_male.drop('Unnamed: 0', axis=1, inplace = True)
df_male['Gender'] = 'Male'

In [4]:
df_female = pd.read_csv('data/female_players.csv')
df_female.replace([np.inf, -np.inf], np.nan, inplace=True)
df_female.drop('Unnamed: 0', axis=1, inplace = True)
df_female.drop('Unnamed: 0.1', axis=1, inplace = True)
df_female['Gender'] = 'Female'

In [5]:
df_players = pd.concat([df_male, df_female])

In [6]:
gk_columns = [
    column for column in df_players.columns if 'GK' in column
]

df_players[gk_columns] = df_players[gk_columns].fillna(0.0)

number_columns = df_players.select_dtypes(include=['int64']).columns.to_list()
number_columns.remove('Rank')
number_columns.remove('Age')

embedding_columns = number_columns + gk_columns

In [10]:
scaler = MinMaxScaler()
embeddings = scaler.fit_transform(df_players[embedding_columns])

In [11]:
# Find index of Manuel Neuer
manuel_neuer_index = df_players[df_players['Name'] == 'Manuel Neuer'].index[0]
jude_bellingham_index = df_players[df_players['Name'] == 'Jude Bellingham'].index[0]
lamine_yamal_index = df_players[df_players['Name'] == 'Lamine Yamal'].index[0]

In [12]:
np.savetxt('embeddings/fc25.embeddings.tsv', embeddings, delimiter='\t')
df_players[["Name", "Position", "Preferred foot", "Nation", "League", "Team", "Gender"]].to_csv("embeddings/fc25.metadata.tsv", sep="\t", index=False)

In [16]:
centroid = np.mean(embeddings, axis=0)

# Step 2: Center the data
centered_embeddings = embeddings - centroid

# Step 3: L2 normalize each row (sphereize)
norms = np.linalg.norm(centered_embeddings, axis=1, keepdims=True)
sphereized_embeddings = centered_embeddings / norms

# Save to TSV
np.savetxt("embeddings/fc25.sphereized.embeddings.tsv", sphereized_embeddings, delimiter="\t")