### Load data

In [1]:
import pandas as pd 
import utils


# Set seed for reproducibility
SEED = 42

df, player_info = utils.load_player_statistics()

### Load feature space

### Scale

In [2]:
from sklearn.preprocessing import StandardScaler, Normalizer

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)
df_scaled.shape

(4803, 137)

### Autoencoder
#### Dataloader

In [3]:
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

torch.manual_seed(SEED)
np.random.seed(SEED)

# Convert to PyTorch tensors
data_tensor = torch.tensor(df_scaled, dtype=torch.float32)

# Create a DataLoader
batch_size = 32
dataset = TensorDataset(data_tensor, data_tensor)  # Input and target are the same for autoencoder
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Define model parameters
input_dim = df_scaled.shape[1]  # Input dimensions = number of features
encoding_dim = 50  # Size of the bottleneck layer

#### Model, Criterion and Optimizer

In [4]:
import autoencoder
import torch.nn as nn
import torch.optim as optim

# Create model
model = autoencoder.Autoencoder(input_dim, encoding_dim)

# Loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss for reconstruction
optimizer = optim.Adam(model.parameters(), lr=0.001)

#### Training

In [5]:
model = autoencoder.train_model(
    model=model,
    dataloader=dataloader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=50
)

Epoch [10/50], Loss: 1.9141
Epoch [20/50], Loss: 1.7930
Epoch [30/50], Loss: 0.4700
Epoch [40/50], Loss: 0.3416
Epoch [50/50], Loss: 0.2704


#### Bottleneck Layer - latent vector - embeddings

In [6]:
# Pushing whole data set through network to get latent vector
embeddings = model.get_embeddings(data_tensor)

### Top k Similar Players - Cosine

In [7]:
top_k_similar_players = utils.get_top_k_similar_players(
    embeddings=embeddings,
    query_index=795,
    player_info=player_info,
    top_k=10
)

top_k_similar_players

Unnamed: 0_level_0,Season,League,Team,Player,Nation,Pos,Global Pos,Age,Matches Played,Playing Time_Starts,Playing Time_Min,Playing Time_90s,cosine_similarity
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Christopher Nkunku,2022-2023,Bundesliga,RB Leipzig,Christopher Nkunku,FRA,"FW,MF",FW,24.0,25.0,20.0,1897.0,21.1,1.0
Ciro Immobile,2022-2023,SeriaA,Lazio,Ciro Immobile,ITA,FW,FW,32.0,31.0,27.0,2219.0,24.7,0.975201
Sadio Mané,2022-2023,Bundesliga,Bayern Munich,Sadio Mané,SEN,"FW,MF",FW,30.0,25.0,18.0,1425.0,15.8,0.969246
Dany Mota,2022-2023,SeriaA,Monza,Dany Mota,POR,"FW,MF",FW,24.0,29.0,21.0,1912.0,21.2,0.968277
Rafa Silva,2022-2023,PrimeiraLiga,Benfica,Rafa Silva,POR,"MF,FW",MF,29.0,28.0,26.0,2174.0,24.2,0.964922
Karim Benzema,2022-2023,LaLiga,Real Madrid,Karim Benzema,FRA,FW,FW,34.0,24.0,24.0,2038.0,22.6,0.964208
Breel Embolo,2022-2023,Ligue1,Monaco,Breel Embolo,SUI,FW,FW,25.0,32.0,19.0,1859.0,20.7,0.960094
Randal Kolo Muani,2022-2023,Bundesliga,Eint Frankfurt,Randal Kolo Muani,FRA,FW,FW,23.0,32.0,31.0,2631.0,29.2,0.959998
Tammy Abraham,2022-2023,SeriaA,Roma,Tammy Abraham,ENG,FW,FW,24.0,38.0,24.0,2189.0,24.3,0.959219
Folarin Balogun,2022-2023,Ligue1,Reims,Folarin Balogun,USA,FW,FW,21.0,37.0,34.0,2999.0,33.3,0.959179


### Top k Similar Players - Euclidean

In [8]:
top_k_similar_players = utils.get_top_k_similar_players(
    embeddings=embeddings,
    query_index=795,
    player_info=player_info,
    top_k=10,
    distance_metric='euclidean'
)

top_k_similar_players

Unnamed: 0_level_0,Season,League,Team,Player,Nation,Pos,Global Pos,Age,Matches Played,Playing Time_Starts,Playing Time_Min,Playing Time_90s,euclidean_distance
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Christopher Nkunku,2022-2023,Bundesliga,RB Leipzig,Christopher Nkunku,FRA,"FW,MF",FW,24.0,25.0,20.0,1897.0,21.1,0.0
Ciro Immobile,2022-2023,SeriaA,Lazio,Ciro Immobile,ITA,FW,FW,32.0,31.0,27.0,2219.0,24.7,7.938149
Sadio Mané,2022-2023,Bundesliga,Bayern Munich,Sadio Mané,SEN,"FW,MF",FW,30.0,25.0,18.0,1425.0,15.8,8.814219
Breel Embolo,2022-2023,Ligue1,Monaco,Breel Embolo,SUI,FW,FW,25.0,32.0,19.0,1859.0,20.7,9.147747
Karim Benzema,2022-2023,LaLiga,Real Madrid,Karim Benzema,FRA,FW,FW,34.0,24.0,24.0,2038.0,22.6,9.23471
Dany Mota,2022-2023,SeriaA,Monza,Dany Mota,POR,"FW,MF",FW,24.0,29.0,21.0,1912.0,21.2,9.266175
Ademola Lookman,2022-2023,SeriaA,Atalanta,Ademola Lookman,NGA,"FW,MF",FW,24.0,31.0,20.0,1729.0,19.2,9.696276
Jonathan Ikone,2022-2023,SeriaA,Fiorentina,Jonathan Ikone,FRA,FW,FW,24.0,33.0,24.0,2007.0,22.3,9.834916
Amin Sarr,2022-2023,EreDivisie,Heerenveen,Amin Sarr,SWE,FW,FW,21.0,19.0,18.0,1634.0,18.2,10.027
Grejohn Kyei,2022-2023,Ligue1,Clermont Foot,Grejohn Kyei,FRA,FW,FW,26.0,37.0,26.0,2232.0,24.8,10.14204
