# Machine Learning - Classification Algorithms

## Import Libraries

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import datasets
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Import Data

In [None]:
iris = datasets.load_iris()
X = iris.data 
y = iris.target
target_names = iris.target_names
feature_names = iris.feature_names
n_class = len(set(y))
colors = ['b', 'g', 'r']
symbols = ['o', '^', '*']
names = iris.target_names
df = pd.DataFrame(X, columns=iris.feature_names)
df['Class'] = iris.target
df['species'] = df['Class'].replace(to_replace= [0, 1, 2], value = ['setosa', 'versicolor', 'virginica'])
df = df.rename(columns = {'sepal length (cm)': 'sepal-length', 
           'sepal width (cm)': 'sepal-width', 
           'petal length (cm)': 'petal-length', 
           'petal width (cm)': 'petal-width'
                         })
df.head()

In [None]:
plt.scatter(x=X[:, 0], y=X[:, 1], c=df['Class'])

In [None]:
def plot_decision_boundary(X, y, clf, title = None):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                         np.arange(y_min, y_max, 0.01))

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure(figsize = (10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4)
    
    for i, c, s in (zip(range(n_class), colors, symbols)):
        ix = y == i
        plt.scatter(X[:, 0][ix], X[:, 1][ix], \
                    color = c, marker = s, s = 60, \
                    label = target_names[i])
    
    if title is not None:
        plt.title(title)
    
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.show()

## Model Training

In [None]:
X = X[:, 0:2] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) 
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) 

### KNN Classifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train) 

y_predict = knn.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))

In [None]:
plot_decision_boundary(X_train[:, 0:2], y_train, clf=knn)

### Gaussian Naive Bayes Classifier

In [None]:
gnb = GaussianNB()

model = gnb.fit(X_train, y_train)

y_predict = gnb.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))

In [None]:
plot_decision_boundary(X_train[:, 0:2], y_train, clf=gnb)

### Random Forest Classifier

In [None]:
rfc = RandomForestClassifier()

model = rfc.fit(X_train, y_train)

y_predict = rfc.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))

In [None]:
plot_decision_boundary(X_train[:, 0:2], y_train, clf=rfc)

### Support Vector Machine Classifier

In [None]:
svm = svm.SVC(kernel = 'linear')

model = svm.fit(X_train, y_train)

y_predict = svm.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))

In [None]:
plot_decision_boundary(X_train[:, 0:2], y_train, clf=svm)

### Gradient Boosting Classifier

In [None]:
gbc = GradientBoostingClassifier()

model = gbc.fit(X_train, y_train)

y_predict = gbc.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))

In [None]:
plot_decision_boundary(X_train[:, 0:2], y_train, clf=gbc)