
# Exemple PCA et t-SNE

Analyse en Composantes Principales et T-NSE appliqué aux dataset des Iris.


In [None]:
import numpy as np
from time import time

from sklearn import decomposition
from sklearn import datasets
from sklearn.cluster import KMeans

#Eviter les warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

iris = datasets.load_iris()
X = iris.data
y = iris.target

X[:10]

In [None]:
t0 = time()
pca = decomposition.PCA(n_components=2)
pca.fit(X)
X = pca.transform(X)
print(f"Durée PCA : {round(time()-t0,4)} s.")
print(pca.explained_variance_ratio_)
print(X[:10])

In [None]:
filtre_setosa = y==0
filtre_versicolor = y==1
filtre_virginica = y==2
print(filtre_virginica)

In [None]:
import plotly.graph_objs as go  # Alternative plus moderne à matplotlib


palette = ['navy','red','maroon']

acp_setosa =go.Scatter(x=X[filtre_setosa,0],y=X[filtre_setosa,1],name='Setosa',
                          text='Setosa',opacity=0.9,
                          marker=dict(color=palette[0],size=5),mode='markers'
                        )
acp_versicolor =go.Scatter(x=X[filtre_versicolor,0],y=X[filtre_versicolor,1],name='Versicolor',
                          text='Versicolor',opacity=0.9,
                          marker=dict(color=palette[1],size=5),mode='markers'
                        )
acp_virginica =go.Scatter(x=X[filtre_virginica,0],y=X[filtre_virginica,1],name='Virginica',
                          text='Virginica',opacity=0.9,
                          marker=dict(color=palette[2],size=5),mode='markers'
                        )

layout = go.Layout(title="PCA - Iris",titlefont=dict(size=40),autosize=False, width=1000,height=600)

data=[acp_setosa,acp_versicolor,acp_virginica]
fig = go.Figure(data=data, layout=layout)
fig.show()

## Exemple T-SNE
T-SNE appliqué au dataset des Iris.

In [None]:
from sklearn import manifold
t0 = time()
X = iris.data
tsne = manifold.TSNE(n_components=2, verbose=0,perplexity=3, n_iter=1000)
X_tsne = tsne.fit_transform(X)
print(f"Durée T-SNE : {round(time()-t0,4)} s.")

In [None]:
import plotly.graph_objs as go  # Alternative plus moderne à matplotlib

palette = ['navy','red','maroon']

acp_setosa =go.Scatter(x=X_tsne[filtre_setosa,0],y=X_tsne[filtre_setosa,1],name='Setosa',
                          text='Setosa',opacity=0.9,
                          marker=dict(color=palette[0],size=5),mode='markers'
                        )
acp_versicolor =go.Scatter(x=X_tsne[filtre_versicolor,0],y=X_tsne[filtre_versicolor,1],name='Versicolor',
                          text='Versicolor',opacity=0.9,
                          marker=dict(color=palette[1],size=5),mode='markers'
                        )
acp_virginica =go.Scatter(x=X_tsne[filtre_virginica,0],y=X_tsne[filtre_virginica,1],name='Virginica',
                          text='Virginica',opacity=0.9,
                          marker=dict(color=palette[2],size=5),mode='markers'
                        )

layout = go.Layout(title="T-SNE - Iris",titlefont=dict(size=40),autosize=False, width=1000,height=600)

data=[acp_setosa,acp_versicolor,acp_virginica]
fig = go.Figure(data=data, layout=layout)
fig.show()

## Exemple K-Means

In [None]:

# Liste pour stocker les inerties
inertias = []

# Appliquer KMeans pour différents nombres de clusters (de 1 à 10)
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertias.append(kmeans.inertia_)

# Tracer l'Elbow Plot avec Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(x=list(range(1, 11)), y=inertias, mode='lines+markers',
                         line=dict(dash='dash', color='blue'), marker=dict(size=8)))

fig.update_layout(
    title="Elbow Plot pour déterminer le nombre optimal de clusters",
    xaxis_title="Nombre de clusters",
    yaxis_title="Inertie Intra",
    xaxis=dict(tickmode='linear'),
    template="plotly_white"
)

# Afficher la figure
fig.show()