# Iris species classification

In [1]:
import pandas as pd
from sklearn import datasets

iris = datasets.load_iris()

X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target).map(lambda x: iris.target_names[x])

In [2]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [3]:
y.head()

0    setosa
1    setosa
2    setosa
3    setosa
4    setosa
dtype: object

We'll train a nearest neighbors classifier, and scale the data beforehand by using a pipeline.

In [4]:
from sklearn import model_selection
from sklearn import neighbors
from sklearn import pipeline
from sklearn import preprocessing

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, shuffle=True, random_state=42)

model = pipeline.make_pipeline(
    preprocessing.StandardScaler(),
    neighbors.KNeighborsClassifier()
)
model.fit(X_train, y_train)

y_pred = model.predict_proba(X_test)
y_pred = pd.DataFrame(y_pred, columns=model.classes_)

y_pred.head()

Unnamed: 0,setosa,versicolor,virginica
0,0.0,0.8,0.2
1,1.0,0.0,0.0
2,0.0,0.0,1.0
3,0.0,1.0,0.0
4,0.0,1.0,0.0


As we are working on a classification task, we will use a `ClassificationExplainer`.

In [5]:
import ethik

explainer = ethik.ClassificationExplainer()

## Prediction plots

One variable, one class.

In [11]:
explainer.plot_bias(
    X_test=X_test['sepal length (cm)'],
    y_pred=y_pred['setosa'],
)

41it [00:00, 1141.36it/s]


Two variables, one class.

In [7]:
explainer.plot_bias(
    X_test=X_test[['sepal length (cm)', 'sepal width (cm)']],
    y_pred=y_pred['setosa']
)

82it [00:00, 1272.86it/s]


One variable, two classes.

In [8]:
explainer.plot_bias(
    X_test=X_test['sepal length (cm)'],
    y_pred=y_pred[['versicolor', 'virginica']]
)

41it [00:00, 1263.23it/s]
41it [00:00, 1259.27it/s]


Two variables, two classes.

In [9]:
explainer.plot_bias(
    X_test=X_test[['sepal length (cm)', 'sepal width (cm)']],
    y_pred=y_pred[['versicolor', 'virginica']]
)

82it [00:00, 1144.18it/s]
82it [00:00, 1182.72it/s]


All the variables, all the classes.

In [13]:
explainer.plot_bias(
    X_test=X_test,
    y_pred=y_pred,
    size=(None, 1000)
)

164it [00:00, 1243.36it/s]
164it [00:00, 991.03it/s] 
164it [00:00, 1195.98it/s]
