## Include all the Libraries 

In [1]:
#Library to work with Data:
import pandas as pd

#Library to Describe in Graphs (Haven't used it in the tutorial but thought it would be useful to include):
#import matplotlib.pyplot as plt

#Bellow all are the Diffrent Models (All Supervised, Obviously!!)

#Simple Linear Models:
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
#Non-Linear Models:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

#Used When we will be selecting more than on model:
from sklearn import model_selection

#Used for splitting our data into traing and testing sets:
from sklearn import cross_validation

#Used for representing how well did your model do on the validation(Testing data):
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score



## Import Data

In [2]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
df = pd.read_csv(url,names=names)

print("Dataset has a shape:")
print(df.shape)

print("First 5 rows:")
df.head()

Dataset has a shape:
(150, 5)
First 5 rows:


Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
print("Statistical Summary:")
df.describe()

Statistical Summary:


Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [4]:
print("Class Distribution:")
df.groupby('class').size()

Class Distribution:


class
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
dtype: int64

## Spliting our data into Training & Testing set

In [5]:
array = df.values
X = array[:,0:4]
y = array[:,4]
seed = 7
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2, random_state=seed)

## Impoerting all the models

In [6]:
model = []
model.append(('LR', LogisticRegression()))
model.append(('DTC', DecisionTreeClassifier()))
model.append(('KNC', KNeighborsClassifier()))
model.append(('LDA', LinearDiscriminantAnalysis()))
model.append(('GNB', GaussianNB()))
model.append(('SVC', SVC()))

## Evaluting each on our data

In [7]:
results = []
names = []
scoring = 'accuracy'
seed = 7

print("\nAccuracy by each model: \n")

for name, models in model:
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    cv_results = model_selection.cross_val_score(models, X_train, y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print("\t",msg)


Accuracy by each model: 

	 LR: 0.966667 (0.040825)
	 DTC: 0.966667 (0.040825)
	 KNC: 0.983333 (0.033333)
	 LDA: 0.975000 (0.038188)
	 GNB: 0.975000 (0.053359)
	 SVC: 0.991667 (0.025000)


## Prediction

In [8]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
predict = knn.predict(X_test)
print(accuracy_score(y_test, predict))
print(confusion_matrix(y_test, predict))
print(classification_report(y_test, predict))

0.9
[[ 7  0  0]
 [ 0 11  1]
 [ 0  2  9]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         7
Iris-versicolor       0.85      0.92      0.88        12
 Iris-virginica       0.90      0.82      0.86        11

    avg / total       0.90      0.90      0.90        30

