# Classification algorithms
1. k-nearest neighbors (k-NN)
2. Naive Bayes classifier (NB)

# 0. Load iris dataset
- Fisher's iris dataset, the most famous bechmark dataset
- Wikipedia: https://en.wikipedia.org/wiki/Iris_flower_data_set
- UCI data repository: https://archive.ics.uci.edu/ml/datasets/Iris

In [None]:
# read the iris data into a DataFrame
import pandas as pd
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris = pd.read_csv(url, header=None, names=col_names)

In [None]:
iris.head(5)

In [None]:
iris.shape

### 참고: seaborn
- Homepage: https://stanford.edu/~mwaskom/software/seaborn/index.html
- Seaborn is a Python visualization library based on matplotlib.
- It provides a high-level interface for drawing statistical graphs.
- You can install this package from conda repository. Just type `conda install seaborn`.

In [None]:
%matplotlib inline
import seaborn as sns
sns.pairplot(iris, hue="species", size = 3)

In [None]:
# Divite data into X and Y
X = iris.drop('species', axis = 1)
print(type(X))
print(X.shape)
X.head(5)

In [None]:
y = iris.species
print(type(y))
print(y.shape)
y.head(5)

In [None]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 123)

## 1. k-nearest neighbors 

In [None]:
# make an instance of a k-NN classifier object
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
type(knn)

In [None]:
print(knn)

In [None]:
knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)

In [None]:
# calculate classification accuracy
from sklearn import metrics
accuracy = metrics.accuracy_score(y_test, y_pred)
cm = metrics.confusion_matrix(y_test, y_pred)

In [None]:
print(accuracy)
print(cm)

## 2. Naive Bayes classifier

In [None]:
# Because all variables in iris dataset are numerical, we use Haussian naive Bayes.
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_2 = gnb.predict(X_test)

In [None]:
accuracy = metrics.accuracy_score(y_test, y_pred_2)
cm = metrics.confusion_matrix(y_test, y_pred_2)

In [None]:
print(accuracy)
print(cm)