In [85]:
#data manipulation
import pandas as pd
import numpy as np

#data visualization
import seaborn as sns
import matplotlib.pyplot as plt
from yellowbrick.cluster import KElbowVisualizer
from yellowbrick.cluster import silhouette_visualizer

# database connection
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker

#data process
from sklearn.preprocessing import MinMaxScaler
from umap import UMAP

# Models
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.metrics import silhouette_samples

#other
import warnings
import tqdm
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

In [86]:
with open('src/sql_settings.txt', 'rb') as f:
    settings = [str(i).split('=') for i in f]

user = settings[0][1].strip().split('\\')[0]
password = settings[1][1].strip().split('\'')[0]

#generamos el motor de conexion
engine = create_engine(f'mysql+pymysql://{user}:{password}@localhost:3306/fifa23')

In [87]:
query = '''SELECT ps.id_player as id_player, 
       p.full_name as player,
       ps.pos as pos,
       p_country.country as country,
       cp.club as club,
       p.age,
       pls.overall as overall,
       p.photo as photo,
       cp.logo as club_logo,
       ps.id_player_compare as id_player_compare,
       pc.full_name as player_compare,
       pc.best_position as player_compare_pos,
       pc_country.country as country_player_compare,
       cpc.club as club_player_compare,
       plsc.overall as player_compare_overall,
       pc.age as player_compare_Age,
       pc.photo as player_compare_photo,
       cpc.logo as player_compare_club_logo,
       ps.distance as distance

FROM fifa23.players_similitude as ps 
left join players as pc on pc.ID = ps.id_player_compare
left join players as p on p.ID = ps.id_player
left join players_stats as pls on p.ID = pls.ID
left join players_stats as plsc on pc.ID = plsc.ID
left join club as cp on p.club_id = cp.ID
left join club as cpc on pc.club_id = cpc.ID
left join country as p_country on p.country_id = p_country.ID
left join country as pc_country on pc.country_id = pc_country.ID 
 
; '''

In [88]:
df = pd.read_sql(query, engine)

In [89]:
df.head()

Unnamed: 0,id_player,player,pos,country,club,age,overall,photo,club_logo,id_player_compare,player_compare,player_compare_pos,country_player_compare,club_player_compare,player_compare_overall,player_compare_Age,player_compare_photo,player_compare_club_logo,distance
0,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,253169,Jung Ho Kim,GK,Korea Republic,Gangwon FC,57,24,https://cdn.sofifa.net/players/253/169/23_60.png,https://cdn.sofifa.net/teams/112115/30.png,0.017631
1,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,254513,Carljohan Eriksson,GK,Finland,Dundee United,64,27,https://cdn.sofifa.net/players/254/513/23_60.png,https://cdn.sofifa.net/teams/181/30.png,0.024298
2,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,193312,Sebastian Mielitz,GK,Germany,VfB Oldenburg,65,32,https://cdn.sofifa.net/players/193/312/23_60.png,https://cdn.sofifa.net/teams/110587/30.png,0.030863
3,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,204502,Matt Ingram,GK,England,Hull City,68,28,https://cdn.sofifa.net/players/204/502/23_60.png,https://cdn.sofifa.net/teams/1952/30.png,0.030882
4,2147,Maarten Stekelenburg,GK,Netherlands,Ajax,39,74,https://cdn.sofifa.net/players/002/147/23_60.png,https://cdn.sofifa.net/teams/245/30.png,161663,Sebastián Viera,GK,Uruguay,Junior FC,74,39,https://cdn.sofifa.net/players/161/663/23_60.png,https://cdn.sofifa.net/teams/101101/30.png,0.000215


In [90]:
df['ID'] = [i for i in range(len(df))]

In [91]:
df.columns

Index(['id_player', 'player', 'pos', 'country', 'club', 'age', 'overall',
       'photo', 'club_logo', 'id_player_compare', 'player_compare',
       'player_compare_pos', 'country_player_compare', 'club_player_compare',
       'player_compare_overall', 'player_compare_Age', 'player_compare_photo',
       'player_compare_club_logo', 'distance', 'ID'],
      dtype='object')

In [92]:
df = df[['ID','id_player', 'player', 'pos', 'country', 'club', 'age', 'overall',
       'photo', 'club_logo', 'id_player_compare', 'player_compare',
       'player_compare_pos', 'country_player_compare', 'club_player_compare',
       'player_compare_overall', 'player_compare_Age', 'player_compare_photo',
       'player_compare_club_logo', 'distance']]

In [93]:
df.head()

Unnamed: 0,ID,id_player,player,pos,country,club,age,overall,photo,club_logo,id_player_compare,player_compare,player_compare_pos,country_player_compare,club_player_compare,player_compare_overall,player_compare_Age,player_compare_photo,player_compare_club_logo,distance
0,0,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,253169,Jung Ho Kim,GK,Korea Republic,Gangwon FC,57,24,https://cdn.sofifa.net/players/253/169/23_60.png,https://cdn.sofifa.net/teams/112115/30.png,0.017631
1,1,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,254513,Carljohan Eriksson,GK,Finland,Dundee United,64,27,https://cdn.sofifa.net/players/254/513/23_60.png,https://cdn.sofifa.net/teams/181/30.png,0.024298
2,2,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,193312,Sebastian Mielitz,GK,Germany,VfB Oldenburg,65,32,https://cdn.sofifa.net/players/193/312/23_60.png,https://cdn.sofifa.net/teams/110587/30.png,0.030863
3,3,1179,Gianluigi Buffon,GK,Italy,Parma,44,79,https://cdn.sofifa.net/players/001/179/23_60.png,https://cdn.sofifa.net/teams/50/30.png,204502,Matt Ingram,GK,England,Hull City,68,28,https://cdn.sofifa.net/players/204/502/23_60.png,https://cdn.sofifa.net/teams/1952/30.png,0.030882
4,4,2147,Maarten Stekelenburg,GK,Netherlands,Ajax,39,74,https://cdn.sofifa.net/players/002/147/23_60.png,https://cdn.sofifa.net/teams/245/30.png,161663,Sebastián Viera,GK,Uruguay,Junior FC,74,39,https://cdn.sofifa.net/players/161/663/23_60.png,https://cdn.sofifa.net/teams/101101/30.png,0.000215


In [101]:
df[df.club=='Paris Saint-Germain'].sort_values('overall', ascending=False).head(20)

Unnamed: 0,ID,id_player,player,pos,country,club,age,overall,photo,club_logo,id_player_compare,player_compare,player_compare_pos,country_player_compare,club_player_compare,player_compare_overall,player_compare_Age,player_compare_photo,player_compare_club_logo,distance
34193,34193,231747,Kylian Mbappé,ST,France,Paris Saint-Germain,23,91,https://cdn.sofifa.net/players/231/747/23_60.png,https://cdn.sofifa.net/teams/73/30.png,246430,Dušan Vlahović,ST,Serbia,Juventus,84,22,https://cdn.sofifa.net/players/246/430/23_60.png,https://cdn.sofifa.net/teams/45/30.png,0.005941
34191,34191,231747,Kylian Mbappé,ST,France,Paris Saint-Germain,23,91,https://cdn.sofifa.net/players/231/747/23_60.png,https://cdn.sofifa.net/teams/73/30.png,231478,Lautaro Martínez,ST,Argentina,Inter,86,24,https://cdn.sofifa.net/players/231/478/23_60.png,https://cdn.sofifa.net/teams/44/30.png,0.003959
34190,34190,231747,Kylian Mbappé,ST,France,Paris Saint-Germain,23,91,https://cdn.sofifa.net/players/231/747/23_60.png,https://cdn.sofifa.net/teams/73/30.png,202126,Harry Kane,ST,England,Tottenham Hotspur,89,28,https://cdn.sofifa.net/players/202/126/23_60.png,https://cdn.sofifa.net/teams/18/30.png,0.000273
34189,34189,231747,Kylian Mbappé,ST,France,Paris Saint-Germain,23,91,https://cdn.sofifa.net/players/231/747/23_60.png,https://cdn.sofifa.net/teams/73/30.png,239085,Erling Haaland,ST,Norway,Manchester City,88,21,https://cdn.sofifa.net/players/239/085/23_60.png,https://cdn.sofifa.net/teams/10/30.png,5.8e-05
649,649,158023,Lionel Messi,CAM,Argentina,Paris Saint-Germain,35,91,https://cdn.sofifa.net/players/158/023/23_60.png,https://cdn.sofifa.net/teams/73/30.png,238067,Nicolò Zaniolo,CAM,Italy,Roma,81,22,https://cdn.sofifa.net/players/238/067/23_60.png,https://cdn.sofifa.net/teams/52/30.png,0.000481
650,650,158023,Lionel Messi,CAM,Argentina,Paris Saint-Germain,35,91,https://cdn.sofifa.net/players/158/023/23_60.png,https://cdn.sofifa.net/teams/73/30.png,228251,Lorenzo Pellegrini,CAM,Italy,Roma,84,26,https://cdn.sofifa.net/players/228/251/23_60.png,https://cdn.sofifa.net/teams/52/30.png,0.002051
651,651,158023,Lionel Messi,CAM,Argentina,Paris Saint-Germain,35,91,https://cdn.sofifa.net/players/158/023/23_60.png,https://cdn.sofifa.net/teams/73/30.png,256790,Jamal Musiala,CAM,Germany,FC Bayern München,81,19,https://cdn.sofifa.net/players/256/790/23_60.png,https://cdn.sofifa.net/teams/21/30.png,0.003304
652,652,158023,Lionel Messi,CAM,Argentina,Paris Saint-Germain,35,91,https://cdn.sofifa.net/players/158/023/23_60.png,https://cdn.sofifa.net/teams/73/30.png,189596,Thomas Müller,CAM,Germany,FC Bayern München,87,32,https://cdn.sofifa.net/players/189/596/23_60.png,https://cdn.sofifa.net/teams/21/30.png,0.007301
653,653,158023,Lionel Messi,CAM,Argentina,Paris Saint-Germain,35,91,https://cdn.sofifa.net/players/158/023/23_60.png,https://cdn.sofifa.net/teams/73/30.png,225663,Alexandr Golovin,CAM,Russia,AS Monaco,79,26,https://cdn.sofifa.net/players/225/663/23_60.png,https://cdn.sofifa.net/teams/69/30.png,0.010181
34192,34192,231747,Kylian Mbappé,ST,France,Paris Saint-Germain,23,91,https://cdn.sofifa.net/players/231/747/23_60.png,https://cdn.sofifa.net/teams/73/30.png,188545,Robert Lewandowski,ST,Poland,FC Barcelona,91,33,https://cdn.sofifa.net/players/188/545/23_60.png,https://cdn.sofifa.net/teams/241/30.png,0.00443


In [102]:
df.to_csv('../data/players_distance_vr_1.csv', index=False)

In [103]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 91650 entries, 0 to 91649
Data columns (total 20 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   ID                        91650 non-null  int64  
 1   id_player                 91650 non-null  int64  
 2   player                    91650 non-null  object 
 3   pos                       91650 non-null  object 
 4   country                   91650 non-null  object 
 5   club                      91410 non-null  object 
 6   age                       91650 non-null  int64  
 7   overall                   91650 non-null  int64  
 8   photo                     91650 non-null  object 
 9   club_logo                 91410 non-null  object 
 10  id_player_compare         91650 non-null  int64  
 11  player_compare            91650 non-null  object 
 12  player_compare_pos        91650 non-null  object 
 13  country_player_compare    91650 non-null  object 
 14  club_p