# Exploring the scikit-learn iris dataset

In [101]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset
from sklearn import datasets
iris = datasets.load_iris()

In [102]:
X = iris.data
y = iris.target

In [103]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [104]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], 
      dtype='<U10')

## Create a scatter plot

In [110]:
plt.figure()                 # Create the figure

plt.scatter(X[:, 0],         # Sepal length on x-axis
            X[:, 1],         # Sepal width on y-axis
            s=100,           # The size of each marker point on the graph
            c=y,             # use the type of iris to colour the dots
            cmap=plt.cm.Set1 # 'Set1' is a particular colourmap. 
           ) 

# Label the axes
# (the semicolon here is to suppress text output from this Jupyter cell as we only want the graph)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width');

<IPython.core.display.Javascript object>

## Plotting a scatter matrix using Pandas

In [118]:
import pandas as pd
X_panda = pd.DataFrame(data=X, columns=iris.feature_names) # Convert to Pandas dataframe
y_panda = pd.DataFrame(y)                                  # Convert to Pandas dataframe

scatter = pd.scatter_matrix(X_panda, 
                            c=y_panda, 
                            marker = 'o', 
                            s=40, 
                            hist_kwds={'bins':10}, 
                            figsize=(10,10),
                            cmap=plt.cm.Set1
                           )

<IPython.core.display.Javascript object>

## A 3D plot of the first 3 features 

In [119]:
from mpl_toolkits.mplot3d import Axes3D

# Create the figure
fig = plt.figure()

# The '111' here means create a 1 x 1 grid of subplots and choose the first one
# i.e. we want just one subplot
ax = fig.add_subplot(111, projection = '3d')

ax.scatter(X[:, 0],
           X[:, 1],
           X[:, 2],
           c = y,
           marker = 'o',
           s=100)

plt.show()

<IPython.core.display.Javascript object>

## References

http://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html

https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.scatter.html

https://matplotlib.org/examples/color/colormaps_reference.html

https://stackoverflow.com/questions/3584805/in-matplotlib-what-does-the-argument-mean-in-fig-add-subplot111

https://pandas.pydata.org/pandas-docs/stable/visualization.html#scatter-matrix-plot
