# This notebook is to exemplify the usage of jupyter notebooks plus showing an interesting life consideration when implementing <em>Machine Learning</em> models

In [1]:
from sklearn.datasets import load_iris

In [2]:
iris = load_iris() # this is a dataset of measurements to predict the kind of Flower Iris you're sampling.

In [3]:
print iris.data

[[ 5.1  3.5  1.4  0.2]
 [ 4.9  3.   1.4  0.2]
 [ 4.7  3.2  1.3  0.2]
 [ 4.6  3.1  1.5  0.2]
 [ 5.   3.6  1.4  0.2]
 [ 5.4  3.9  1.7  0.4]
 [ 4.6  3.4  1.4  0.3]
 [ 5.   3.4  1.5  0.2]
 [ 4.4  2.9  1.4  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 5.4  3.7  1.5  0.2]
 [ 4.8  3.4  1.6  0.2]
 [ 4.8  3.   1.4  0.1]
 [ 4.3  3.   1.1  0.1]
 [ 5.8  4.   1.2  0.2]
 [ 5.7  4.4  1.5  0.4]
 [ 5.4  3.9  1.3  0.4]
 [ 5.1  3.5  1.4  0.3]
 [ 5.7  3.8  1.7  0.3]
 [ 5.1  3.8  1.5  0.3]
 [ 5.4  3.4  1.7  0.2]
 [ 5.1  3.7  1.5  0.4]
 [ 4.6  3.6  1.   0.2]
 [ 5.1  3.3  1.7  0.5]
 [ 4.8  3.4  1.9  0.2]
 [ 5.   3.   1.6  0.2]
 [ 5.   3.4  1.6  0.4]
 [ 5.2  3.5  1.5  0.2]
 [ 5.2  3.4  1.4  0.2]
 [ 4.7  3.2  1.6  0.2]
 [ 4.8  3.1  1.6  0.2]
 [ 5.4  3.4  1.5  0.4]
 [ 5.2  4.1  1.5  0.1]
 [ 5.5  4.2  1.4  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 5.   3.2  1.2  0.2]
 [ 5.5  3.5  1.3  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 4.4  3.   1.3  0.2]
 [ 5.1  3.4  1.5  0.2]
 [ 5.   3.5  1.3  0.3]
 [ 4.5  2.3  1.3  0.3]
 [ 4.4  3.2  1.3  0.2]
 [ 5.   3.5

In [4]:
#each measurements represents the following features
print iris.feature_names

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [5]:
#modeled data has several samples for 3 types of Flower Iris
print 'samples:'
print iris.target
print 'text identifier for the Flower Iris name:'
print iris.target_names

samples:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
text identifier for the Flower Iris name:
['setosa' 'versicolor' 'virginica']


In [6]:
#lets make data available in this scope
X = iris.data #this is the set of measurements
y = iris.target #this is the resulting value for the given measurements

### here's where the magic comes

In [7]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 1)
print knn

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')


In [8]:
#lets train our model with original samples
knn.fit(X,y) #we're telling that given our X measurements, we've predicted y values

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')

In [9]:
#now lets give some random data to be predicted
import numpy as np
knn.predict(np.array([3,5,4,2]).reshape(1,-1))

array([2])

In [10]:
#let's make another example
knn.predict(np.array([5,4,3,2]).reshape(1,-1))

array([1])

## So, this told us that one sample was predicted to be a "virginica" and the other a "versicolor"

### Let's see what happens now with another ML approach

In [11]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
print logreg

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)


In [12]:
#lets train our model with original samples
logreg.fit(X,y) #we're telling that given our X measurements, we've predicted y values

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [13]:
logreg.predict(np.array([3,5,4,2]).reshape(1,-1))

array([2])

In [14]:
logreg.predict(np.array([5,4,3,2]).reshape(1,-1))

array([0])

<h1 style="color: orange"> aha ! for the first sample the two models predicted a "virginica" but for the second sample, logistic regressions predic a "setosa" while knn told us it was a "versicolor"...</h1>

In [15]:
#we could also send multiple samples at once
_knn = knn.predict(np.array([[3, 5, 4, 2], [5, 4, 3, 2]]).reshape(2,-1))
print 'knn says the given samples are: %s %s' % (iris.target_names[_knn[0]], iris.target_names[_knn[1]])
_logreg = logreg.predict(np.array([[3, 5, 4, 2], [5, 4, 3, 2]]).reshape(2,-1))
print 'logistic regression says the given samples are: %s %s' % (iris.target_names[_logreg[0]], iris.target_names[_logreg[1]])


knn says the given samples are: virginica versicolor
logistic regression says the given samples are: virginica setosa
