**Iris Dataset Visualization : PCA vs t-SNE**

In [1]:
%matplotlib inline
# this is a magic command specific to Jupyter Notebooks and JupyterLab.
# When you include this command at the beginning of a notebook cell and then run the cell,
# it configures the notebook to display Matplotlib plots directly within the notebook interface,
# rather than opening them in a separate window.
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import matplotlib.pyplot as plt
import seaborn as sns
# Python data visualization library based on Matplotlib.
# It provides a high-level interface for creating attractive statistical graphics.

import plotly.express as px
# is a high-level Python visualization library built on top of Plotly,
# a powerful interactive visualization library. It provides a simple
# and concise interface for creating a wide range of interactive plots with minimal code.
from matplotlib.colors import ListedColormap

In [3]:
sns.set_theme()
sns.set_theme(rc={"figure.figsize": (10, 8)})
PALETTE = sns.color_palette('deep', n_colors=3)
CMAP = ListedColormap(PALETTE.as_hex())

RANDOM_STATE = 42

**Load and prepare the dataset**

In [None]:
#using iris dataset from sklearn
dataset = load_iris()
print(dataset.feature_names)

features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
target = 'species'

iris = pd.DataFrame(dataset.data, columns=features)
display(iris.head())

iris[target] = dataset.target
display(iris.head())
display(iris.tail())

**Plotting functions**

In [7]:
def plot_iris_2d(x, y, title, xlabel="1st dimension", ylabel="2nd dimension"):
    plt.scatter(x, y,
        c=iris['species'],
        cmap=CMAP,
        s=70)

    plt.title(title, fontsize=20, y=1.03)
    plt.xlabel(xlabel, fontsize=16)
    plt.ylabel(ylabel, fontsize=16)

def plot_iris_3d(x, y, z, title):
    fig = plt.figure(1, figsize=(8, 6))
    ax = fig.add_subplot(111, projection='3d')
    # This line adds a 3D subplot to the figure created in the previous line.
    # 111 indicates that we want a subplot grid with 1 row, 1 column, and we're selecting the first subplot.
    ax.scatter(x, y, z,
               c=iris['species'],
               cmap=CMAP,
               s=40)
    ax.set_title(title, fontsize=20, y=1.03)
    fsize = 14
    ax.set_xlabel("1st dimension", fontsize=fsize)
    ax.set_ylabel("2nd dimension", fontsize=fsize)
    ax.set_zlabel("3rd dimension", fontsize=fsize)
    ax.xaxis.set_ticklabels([])
    ax.yaxis.set_ticklabels([])
    ax.zaxis.set_ticklabels([])

def dynamic_plot_iris_3d(dataset, x, y, z, title):
  y_str = iris['species'].astype(str)
  fig = px.scatter_3d(dataset, x=x, y= y, z=z,
              color=y_str, symbol =y_str,
              labels=dict(x="1st dimension", y="2nd dimension", z="3rd dimension", symbol = "class"))
  fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
  ))
  fig.show()


**Exploring some two-dimension (two components) visualizations**

In [None]:
#TO DO : visuali
plot_iris_2d(
    x = iris['sepal_length'],
    y = iris['sepal_width'],
    title = 'Plotting first two components',
    xlabel = 'Sepal length',
    ylabel = 'Sepal width')

**PCA for dimensionality reduction: 2D plotting**

In [None]:
pca = PCA(n_components=2)
points = pca.fit_transform(iris[features])
print(points.shape)

plot_iris_2d(
    x = points[:,0],
    y = points[:,1],
    title = 'Iris dataset visualized with PCA')



**2D Plotting with t-SNE**

In [None]:
tsne = TSNE(n_components=2, max_iter=1000, random_state=RANDOM_STATE)
points = tsne.fit_transform(iris[features])

plot_iris_2d(
    x = points[:, 0],
    y = points[:, 1],
    title = 'Iris dataset visualized with t-SNE')



**PCA: 3D Plotting***

In [None]:
pca = PCA(n_components=3)
points = pca.fit_transform(iris[features])

plot_iris_3d(
    x = points[:,0],
    y = points[:,1],
    z = points[:,2],
    title = "Iris dataset visualized with PCA")

In [None]:
dynamic_plot_iris_3d(
    points,
    x = points[:,0],
    y = points[:,1],
    z = points[:,2],
    title ="Dynamic 3d visualization with PCA")

**t-SNE: 3D Plotting**

In [None]:
tsne = TSNE(n_components=3, max_iter=5000, random_state=RANDOM_STATE)
points = tsne.fit_transform(iris[features])

plot_iris_3d(
    x = points[:,0],
    y = points[:,1],
    z = points[:,2],
    title = "Iris dataset visualized with t-SNE")

In [None]:
dynamic_plot_iris_3d(
    points,
    x = points[:,0],
    y = points[:,1],
    z = points[:,2],
    title ="Dynamic 3d visualization with t-SNE")