# TP Reduction des dimensions : PCA et Clustering
Récupérer les données sur https://www.kaggle.com/sylvemel/data-visualisation-with-pca-and-clustering/data

In [None]:
import numpy as np
import pandas as pd
# Affichage complet des dataframes
pd.set_option('display.max_columns',100)
pd.set_option('display.max_rows',1000)

import itertools
import io

# Librairie graphique plotly
from plotly.offline import init_notebook_mode, plot,iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

import plotly.tools as tls
import plotly.figure_factory as ff

## Data Preparation

In [None]:
df=pd.read_csv("data/fifa22.csv")
df.head(3)

In [None]:
attaquants=['CF', 'ST', 'LW', 'LF', 'RF','RW','RS','LS']
milieux=['LM','CAM','CDM','CM','RM','RCM','RDM','LCM','LDM','RAM']
defenseurs=['LWB','LB','CB','RB','RWB','LCB','RCB']
gardiens=['GK']

def pos2(player_positions):
    if player_positions in attaquants:
        return 'Attaquant'    
    elif player_positions in milieux:
        return 'Milieu'    
    elif player_positions in defenseurs:
        return 'Defenseur'    
    elif player_positions in gardiens:
        return 'Gardien'    
    else:
        return 'Remplaçant'
df["Position2"]=df["club_position"].apply(lambda x: pos2(x))
df["Position2"].value_counts()

In [None]:
best_players_per_position=df.iloc[df.groupby(df['Position2'])['overall'].idxmax()][['Position2','short_name','overall']]
best_players_per_position

## T-SNE

In [None]:
n_sne=2000 # Limitation au 2000 premiers joueurs
df_sne = df.loc[:n_sne]
# competences_ratings = ['Crossing', 'Finishing', 'HeadingAccuracy', 'ShortPassing',
#                   'Volleys', 'Dribbling', 'Curve', 'FKAccuracy', 'LongPassing',
#                   'BallControl', 'Acceleration', 'SprintSpeed', 'Agility', 'Reactions',
#                   'Balance', 'ShotPower', 'Jumping', 'Stamina', 'Strength', 'LongShots',
#                   'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties', 
#                   'Composure', 'Marking', 'StandingTackle', 'SlidingTackle',
#                   'GKDiving', 'GKHandling', 'GKKicking', 'GKPositioning', 'GKReflexes']

competences_ratings = ['pace','shooting','passing','dribbling','defending','physic','attacking_crossing','attacking_finishing',
'attacking_heading_accuracy','attacking_short_passing','attacking_volleys','skill_dribbling','skill_curve',
'skill_fk_accuracy','skill_long_passing','skill_ball_control','movement_acceleration','movement_sprint_speed',
'movement_agility','movement_reactions','movement_balance','power_shot_power','power_jumping','power_stamina',
'power_strength','power_long_shots','mentality_aggression','mentality_interceptions','mentality_positioning',
'mentality_vision','mentality_penalties','mentality_composure','defending_marking_awareness','defending_standing_tackle',
'defending_sliding_tackle','goalkeeping_diving','goalkeeping_handling','goalkeeping_kicking','goalkeeping_positioning',
'goalkeeping_reflexes','goalkeeping_speed']
# 1.1 Construire Un dataframe des compétences 
X=df_sne[competences_ratings].fillna(0)
X.sample(10)

In [None]:
# 1.2 remplacer les valeurs NaN par 0 puis Exécuter une transformation T_SNE sur les données 


In [None]:
#1.3 Créer des filtres booléens par type de poste, puis sur les meilleurs joueurs (Overall au dessus de 90)


In [None]:
palette = ['navy','red','maroon','orange','green','pink']

#1.4 Creer 4 nuages de points correspondant aux attaquants, milieux, défenseurs et gradient.

#1.5 Ajouter les noms des meilleurs joueurs


## ACP + Clustering

In [None]:
n_kmean=18000

# Suppression des gardiens de but

df_kmean=df.loc[:n_kmean]
df_kmean=df_kmean[(df_kmean["Position"]!='GK')&(df_kmean['overall']>69)]

competences_ratings = ['pace','shooting','passing','dribbling','defending','physic','attacking_crossing','attacking_finishing',
'attacking_heading_accuracy','attacking_short_passing','attacking_volleys','skill_dribbling','skill_curve',
'skill_fk_accuracy','skill_long_passing','skill_ball_control','movement_acceleration','movement_sprint_speed',
'movement_agility','movement_reactions','movement_balance','power_shot_power','power_jumping','power_stamina',
'power_strength','power_long_shots','mentality_aggression','mentality_interceptions','mentality_positioning',
'mentality_vision','mentality_penalties','mentality_composure','defending_marking_awareness','defending_standing_tackle',
'defending_sliding_tackle','goalkeeping_diving','goalkeeping_handling','goalkeeping_kicking','goalkeeping_positioning',
'goalkeeping_reflexes','goalkeeping_speed']
# Création d'un joueur parfait pour savoir où il se situe

JoueurParfaitDict={'Name':'MrParfait','Overall':99}
for competence in competences_ratings:
    JoueurParfaitDict[competence]=99
    
df_kmean=df_kmean.append(JoueurParfaitDict,ignore_index=True)

df_competences=df_kmean[competences_ratings].fillna(0)
X=df_competences
X

In [None]:
#2.1 Créer une division en 5 clusters des joueurs


In [None]:
#2.2 Appliquer une PCA à 2 dimensions sur les joueurs

In [None]:
#2.3 Afficher les clusters en couleurs sur la représentation PCA en 2D des joueurs. 
# Ajouter un label pour les meilleurs joueurs (Overall au dessus de 90)

filtre_meilleurs = df_kmean["overall"]>90
kmeans_clusters=go.Scatter(x=X_projected[:,0],y=X_projected[:,1],mode='markers',
                           marker=dict(
                                size=5,
                                color=clusters.astype(np.float),
                                colorscale='Portland',
                                showscale=False
                           )
                        )
acp_meilleurs=go.Scatter(x=X_projected[filtre_meilleurs,0], y=X_projected[filtre_meilleurs,1],name='Meilleurs joueurs',
                      text=df_kmean.loc[filtre_meilleurs,'Name'],textfont=dict(family='sans serif',color='black',size=16),
                      opacity=0.9,mode='text'
                            )
data=[kmeans_clusters,acp_meilleurs]

layout = go.Layout(title="ACP + Clustering ",titlefont=dict(size=40),
                   xaxis=dict(title='F1'),
                   yaxis=dict(title='F2'),
                   autosize=False, width=1000,height=1000)

fig = go.Figure(data=data, layout=layout)

iplot(fig)

## 3. Qui va gagner la Ligue des champions ?