In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error, confusion_matrix, accuracy_score

## Linear Regression (Single-Variate)

#### Dataset
[Pizza Franchise Dataset](https://raw.githubusercontent.com/ss-is-master-chief/DSoAI-materials/master/Meetup_1/datasets/Pizza.Franchise.csv?token=AY2l8zBi0IgcG8KLM5MpnTwYYZdIQvIZks5cHdxLwA%3D%3D)
#### Features
* X = annual franchise fee ($1000)

* Y = start up cost ($1000)

In [None]:
pizza_df = pd.read_csv("datasets/Pizza.Franchise.csv")
pizza_df.head()

In [None]:
from sklearn.linear_model import LinearRegression

x = np.array(pizza_df['X'])
x = x.reshape([len(x), 1])
y = np.array(pizza_df['Y'])
y = y.reshape([len(y), 1])

linear_r = LinearRegression()
linear_r.fit(x, y)

prediction = linear_r.predict(x)

plt.figure(figsize=(10,8))
plt.scatter(x, y, color = "c", marker = "D")
plt.plot(x, prediction, color = "m")
plt.xlabel("annual franchise fee ($1000)")
plt.ylabel("start up cost ($1000)")

## Logistic Regression

#### Dataset
[Iris Dataset](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data)

#### Features
* sepal length in cm
* sepal width in cm
* petal length in cm
* petal width in cm
* class: 
    * Iris Setosa
    * Iris Versicolour
    * Iris Virginica

In [None]:
iris_df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", 
                      names = ['sepal_l', 'sepal_w', 'petal_l', 'petal_w', 'class'])
iris_df.head()

In [None]:
iris_df['class'].unique()

In [None]:
classes = {
    'Iris-setosa': 0,
    'Iris-versicolor': 1,
    'Iris-virginica': 2
}

iris_df = iris_df.replace({'class': classes})
iris_df.head()

In [None]:
from sklearn.linear_model import LogisticRegression

X = iris_df.iloc[:, :4]
Y = iris_df.iloc[:, 4]
X = np.array(X)
Y = np.array(Y)

logistic_r = LogisticRegression()
logistic_r.fit(X, Y)

iris_class_pred = logistic_r.predict(X)

class_1 = np.where(iris_class_pred == 0)
class_2 = np.where(iris_class_pred == 1)
class_3 = np.where(iris_class_pred == 2)

plt.figure(figsize=(10,8))
plt.title("Logistic Regression")
plt.scatter(X[class_1, 0], X[class_1, 1])
plt.scatter(X[class_2, 0], X[class_2, 1])
plt.scatter(X[class_3, 0], X[class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

## K Nearest Neigbours

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X, Y)

knn_prediction = knn.predict(X)

knn_class_1 = np.where(knn_prediction == 0)
knn_class_2 = np.where(knn_prediction == 1)
knn_class_3 = np.where(knn_prediction == 2)

plt.figure(figsize=(10,8))
plt.title("K Nearest Neighbors")
plt.scatter(X[knn_class_1, 0], X[knn_class_1, 1])
plt.scatter(X[knn_class_2, 0], X[knn_class_2, 1])
plt.scatter(X[knn_class_3, 0], X[knn_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

## Decision Trees

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn.externals.six import StringIO  

from IPython.display import Image  
import pydotplus

tree = DecisionTreeClassifier()
tree.fit(X, Y)

dot_data = StringIO()
export_graphviz(tree, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
Image(graph.create_png(), width=900, height=550)

In [None]:
tree_prediction = tree.predict(X)

tree_class_1 = np.where(tree_prediction == 0)
tree_class_2 = np.where(tree_prediction == 1)
tree_class_3 = np.where(tree_prediction == 2)

plt.figure(figsize=(10,8))
plt.title("Decision Trees")
plt.scatter(X[tree_class_1, 0], X[tree_class_1, 1])
plt.scatter(X[tree_class_2, 0], X[tree_class_2, 1])
plt.scatter(X[tree_class_3, 0], X[tree_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

## Random Forests

In [None]:
from sklearn.ensemble import RandomForestClassifier

random_f = RandomForestClassifier(n_estimators = 100, max_depth = 3)
random_f.fit(X, Y)

rf_prediction = random_f.predict(X)

rf_class_1 = np.where(rf_prediction == 0)
rf_class_2 = np.where(rf_prediction == 1)
rf_class_3 = np.where(rf_prediction == 2)

plt.figure(figsize=(10,8))
plt.title("Random Forests Classification")
plt.scatter(X[rf_class_1, 0], X[rf_class_1, 1])
plt.scatter(X[rf_class_2, 0], X[rf_class_2, 1])
plt.scatter(X[rf_class_3, 0], X[rf_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

## Support Vector Machines (SVMs)

In [None]:
from sklearn import svm

supvec = svm.SVC(gamma='scale')
supvec.fit(X, Y)

svm_predict = supvec.predict(X)

svm_class_1 = np.where(rf_prediction == 0)
svm_class_2 = np.where(rf_prediction == 1)
svm_class_3 = np.where(rf_prediction == 2)

plt.figure(figsize=(10,8))
plt.title("Support Vector Machine")
plt.scatter(X[svm_class_1, 0], X[svm_class_1, 1])
plt.scatter(X[svm_class_2, 0], X[svm_class_2, 1])
plt.scatter(X[svm_class_3, 0], X[svm_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

## Comparing

1.   Logistic Regression
2.   K Nearest Neighbors
3.   Decision Trees
4.   Random Forests
5.   Support Vector Machines



In [None]:
print("Accuracy Scores:")
print("> Logistic Regression: {0:.2f}%".format(logistic_r.score(X, Y)*100))
print("> KNN:                 {0:.2f}%".format(knn.score(X, Y)*100))
print("> Decision Tree:       {0:.2f}%".format(tree.score(X, Y)*100))
print("> Random Forests:      {0:.2f}%".format(random_f.score(X, Y)*100))
print("> SVM:                 {0:.2f}%".format(supvec.score(X, Y)*100))

plt.figure(figsize=(19,20))

plt.subplot(3,2,1)

plt.title("Logistic Regression")
plt.scatter(X[class_1, 0], X[class_1, 1])
plt.scatter(X[class_2, 0], X[class_2, 1])
plt.scatter(X[class_3, 0], X[class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

plt.subplot(3,2,2)

plt.title("K Nearest Neighbors")
plt.scatter(X[knn_class_1, 0], X[knn_class_1, 1])
plt.scatter(X[knn_class_2, 0], X[knn_class_2, 1])
plt.scatter(X[knn_class_3, 0], X[knn_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

plt.subplot(3,2,3)

plt.title("Decision Trees")
plt.scatter(X[tree_class_1, 0], X[tree_class_1, 1])
plt.scatter(X[tree_class_2, 0], X[tree_class_2, 1])
plt.scatter(X[tree_class_3, 0], X[tree_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

plt.subplot(3,2,4)

plt.title("Random Forests Classification")
plt.scatter(X[rf_class_1, 0], X[rf_class_1, 1])
plt.scatter(X[rf_class_2, 0], X[rf_class_2, 1])
plt.scatter(X[rf_class_3, 0], X[rf_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

plt.subplot(3,2,5)

plt.title("Support Vector Machine")
plt.scatter(X[svm_class_1, 0], X[svm_class_1, 1])
plt.scatter(X[svm_class_2, 0], X[svm_class_2, 1])
plt.scatter(X[svm_class_3, 0], X[svm_class_3, 1])
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])

plt.show()