#### Loading and Visualizing the Iris Data Set
---

In [None]:
# Read the iris data into a DataFrame.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Display plots in-notebook
%matplotlib inline

# Increase default figure and font sizes for easier viewing.
plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['font.size'] = 14


In [None]:
iris = pd.read_csv('iris.data')

In [None]:
iris.head()

In [None]:
iris.species.unique()

In [None]:
iris.species.value_counts()

#### Gather some basic information about the data.

In [None]:
# 150 observations, 5 columns (the 4 features & response)
iris.shape

In [None]:
iris.info()

### The Classifier

* What does the rule-based classifier look like
  * What is a rule?
* How will we build one
* How will we measure its quality

##### EDA on the data set to see if we think species depends on any of the three numeric attributes

In [None]:
iris.groupby('species').describe()

In [None]:
# Box plot of petal_width, grouped by species.
iris.boxplot(column='petal_length', by='species');

In [None]:
# Box plot of all numeric columns, grouped by species.
plt.rcParams['figure.figsize'] = (16, 10)
iris.boxplot(by='species', rot=45);

In [None]:
# Map species to a numeric value so that plots can be colored by species.
iris['species_num'] = iris.species.map({'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2})

In [None]:
prod = iris.petal_length * iris.petal_width
iris["prod"] = prod

In [None]:
iris.boxplot(column='prod', by='species');

In [None]:
# Scatterplot of petal_length vs. petal_width, colored by species
plt.rcParams['figure.figsize'] = (8, 6)
iris.plot(kind='scatter', x='petal_length', y='petal_width', c='species_num', colormap='brg');

In [None]:
# Scatter matrix of all features, colored by species.
pd.plotting.scatter_matrix(iris.drop('species_num', axis=1), c=iris.species_num, figsize=(12, 10));