## MICMoR Summer School 2019
Prof. Dr.-Ing. Alexandra Teynor

06.09.2019

### Short Demos for Lecture "Introduction to Machine Learning"

First, all necessary packeges need to be loaded:

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
from sklearn import datasets, neighbors
from sklearn.model_selection import train_test_split

### The famous Iris dataset

In [None]:
iris = datasets.load_iris()

Show infos on the dataset (uncomment one and execute cell to see content)

In [None]:
#iris.DESCR
#iris.data
#iris.target
#iris.target_names

#### Prepare data: 
For better visualization, we only take two features per flower: sepal_length, sepal_width or petal_length, petal_width

In [None]:
iris.data = iris.data[:,:2] # (sepal_length, sepal_width)
# or: 
#iris.data = iris.data[:,2:] # (petal_length, petal_width)

# show data
iris.data

#### Visualize dataset:

In [None]:
# define colors (bold an light tone) for each class 
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ['#FF0000', '#00FF00', '#0000FF']

#scatterplot
plt.scatter(iris.data[:50, 0], iris.data[:50, 1], c=cmap_bold[0], label="setosa")
plt.scatter(iris.data[50:100, 0], iris.data[50:100, 1], c=cmap_bold[1], label="versicolor")
plt.scatter(iris.data[100:, 0], iris.data[100:, 1], c=cmap_bold[2], label="verginica")
plt.xlabel('Sepal/Petal length')
plt.ylabel('Sepal/Petal width')
plt.legend()

plt.title("Iris Dataset")
plt.show()

#### Split data in training and test dataset:


In [None]:
X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=0)

## Classification

#### Create a classifier : Nearest Neighbour

In the first case, we a nearest neigbour classifier:

In [None]:
n_neighbors = 1  # number of neighbors to be considered

knnClass = neighbors.KNeighborsClassifier(n_neighbors)


#### Train classifier

In [None]:
knnClass.fit(X_train, y_train) 


#### Test classifier performance

In [None]:
# here just classification of the test data points to be tested
classResult = knnClass.predict(X_test) 
print(classResult)

In [None]:
# here classification of potential new data (new flowers found...)
measurementsOfNewFlowersFound = np.array([[4.8, 3.4],[5.3, 3.2]])
classResult = knnClass.predict(measurementsOfNewFlowersFound)
print(classResult)

In [None]:
# here classification and performance evaluation 
# at the same time (thus expected labels needed)
knnClass.score(X_test, y_test) 

#### Visualize classification boundaries

Idea:
 * generate a dense mesh of points to be classified
 * color each point according to the classifcation result 
    

In [None]:
# create dense mesh to be classified (results in xx, yy) 
x_min, x_max = iris.data[:, 0].min() - .5, iris.data[:, 0].max() + .5
y_min, y_max = iris.data[:, 1].min() - .5, iris.data[:, 1].max() + .5
h = .02  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))


In [None]:
classResult = knnClass.predict(np.c_[xx.ravel(), yy.ravel()])

classResult = classResult.reshape(xx.shape)

#plot the dense color mesh
plt.pcolormesh(xx, yy, classResult, cmap=cmap_light)

#plot the original data
plt.scatter(iris.data[:50, 0], iris.data[:50, 1], c=cmap_bold[0], label="setosa")
plt.scatter(iris.data[50:100, 0], iris.data[50:100, 1], c=cmap_bold[1], label="versicolor")
plt.scatter(iris.data[100:, 0], iris.data[100:, 1], c=cmap_bold[2], label="verginica")
plt.xlabel('Sepal/Petal length')
plt.ylabel('Sepal/Petal width')
plt.legend()

plt.title("KNN-Classifier with k = %i" % n_neighbors)
plt.show()


### Linear classification
=> Just for demonstration purposes, for "real" linear classification take Logistic regression or Support Vector Machines...



#### Prepare data: binary classification problem 

In [None]:
X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=0)

# transform the data to a binary classification problem
idx0 = np.where(y_train == 0)
idx1 = np.where(y_train == 1)
idx2 = np.where(y_train == 2)
y_train[idx0] = -1
y_train[idx1] = 1
y_train[idx2] = 1

#### Create classifier

Import, define and train the classifier 

In [None]:
from sklearn import linear_model
regr = linear_model.LinearRegression()

regr.fit(X_train, y_train)

#### Visualize the result

In [None]:
classResult = np.sign(regr.predict(np.c_[xx.ravel(), yy.ravel()])) # take signum of classification result
classResult = classResult.reshape(xx.shape)

plt.pcolormesh(xx, yy, classResult, cmap=cmap_light)

idxVec = np.where(y_train == 1)
plt.scatter(X_train[idxVec,0], X_train[idxVec, 1], c=cmap_bold[2])

idxVec = np.where(y_train == -1)
plt.scatter(X_train[idxVec,0], X_train[idxVec, 1], c=cmap_bold[0])

plt.xlabel('Sepal/Petal length')
plt.ylabel('Sepal/Petal width')
plt.show()

### Bayes Classifier


#### Create classifier
Import, define and train the classifier

In [None]:
# import
from sklearn.naive_bayes import GaussianNB
X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=0)

# define
gnb = GaussianNB()

# train
gnb.fit(X_train, y_train)

# evaluate
gnb.score(X_test, y_test)

#### Visualize classification boundary

In [None]:
classResult = gnb.predict(np.c_[xx.ravel(), yy.ravel()]) # see above...
classResult = classResult.reshape(xx.shape)


In [None]:
plt.pcolormesh(xx, yy, classResult, cmap=cmap_light)

idxVec = np.where(y_train == 0)
plt.scatter(X_train[idxVec,0], X_train[idxVec, 1], c=cmap_bold[0], label="setosa")

idxVec = np.where(y_train == 1)
plt.scatter(X_train[idxVec,0], X_train[idxVec, 1], c=cmap_bold[1], label="versicolor")

idxVec = np.where(y_train == 2)
plt.scatter(X_train[idxVec,0], X_train[idxVec, 1], c=cmap_bold[2], label="verginica")

plt.xlabel('Sepal/Petal length')
plt.ylabel('Sepal/Petal width')
plt.legend()
plt.show()