In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Splitting Data
from sklearn.model_selection import train_test_split

# Modeling
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score
from sklearn.model_selection import cross_validate

import warnings
warnings.filterwarnings("ignore")

In [2]:
path=("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/bezdekIris.data")

In [3]:
headernamers=["Sepal length(cm)", "sepal width(cm)", "petal length(cm)", "petal width(cm)", "class"]

In [4]:
dataset= pd.read_csv(path, names = headernamers)
dataset

Unnamed: 0,Sepal length(cm),sepal width(cm),petal length(cm),petal width(cm),class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Sepal length(cm)  150 non-null    float64
 1   sepal width(cm)   150 non-null    float64
 2   petal length(cm)  150 non-null    float64
 3   petal width(cm)   150 non-null    float64
 4   class             150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


# KNN

In [22]:
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:, 4].values
X = scale.fit_transform(x) 

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30)

In [24]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [25]:
classifier = KNeighborsClassifier(n_neighbors = 6)
classifier.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=6)

In [26]:
y_pred = classifier.predict(X_test)
score=classifier.score(X_train, y_train)

In [27]:
result = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(result)
result1 = accuracy_score(y_test,y_pred)
print("Accuracy:",result1)
print("Test score: ",score)

Confusion Matrix:
[[15  0  0]
 [ 0 15  1]
 [ 0  1 13]]
Accuracy: 0.9555555555555556
Test score:  0.9428571428571428


# Desion Tree

In [28]:
feature_cols =  ["Sepal length(cm)", "sepal width(cm)", "petal length(cm)", "petal width(cm)"]
output_cols = ["class"]

In [29]:
x = dataset[feature_cols] # Features
y = dataset[output_cols] # Target variable
X = scale.fit_transform(x) 

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=7)

In [31]:
clf = DecisionTreeClassifier(max_depth = 10, criterion = 'entropy')
clf = clf.fit(X_train,y_train)

In [32]:
y_pred = clf.predict(X_test)
score=classifier.score(X_train, y_train)

In [33]:
result = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(result)
result1 = accuracy_score(y_test,y_pred)
print("Accuracy:",result1)
print("Test score: ",score)

Confusion Matrix:
[[12  0  0]
 [ 0 14  2]
 [ 0  5 12]]
Accuracy: 0.8444444444444444
Test score:  0.9714285714285714


# Cross Validation KNN For Decision Tree

In [34]:
metrics=["precision_micro", "accuracy", "balanced_accuracy"]


In [35]:
import warnings
warnings.filterwarnings("ignore")


best_score=0
best_clf=None
for i in range (10):
    clf = KNeighborsClassifier(n_neighbors = i)
    scores= cross_validate(clf, X_train, y_train, cv =5, scoring = metrics)
    
    scores=scores["test_accuracy"]
    avgscore = sum(scores)/len(scores)
    
    if(scores > avgscore).all:
        best_score = avgscore
        best_clf = clf
        
best_clf.fit(X_train, y_train)

score= best_clf.score(X_test,y_test)
train_score= best_clf.score(X_train,y_train)
print("Test cross validation: " , score)
print("Train cross validation: " , train_score)

Test cross validation:  0.9333333333333333
Train cross validation:  0.9714285714285714


In [36]:
import warnings
warnings.filterwarnings("ignore")


best_score=0
best_clf=None
for i in range (100):
    clf = DecisionTreeClassifier(max_depth = i, criterion = 'entropy')
    scores= cross_validate(clf, X_train, y_train, cv =5, scoring = metrics)
    
    scores=scores["test_accuracy"]
    avgscore = sum(scores)/len(scores)
    
    if(scores > avgscore).all:
        best_score = avgscore
        best_clf = clf
        
best_clf.fit(X_train, y_train)

score= best_clf.score(X_test,y_test)
train_score= best_clf.score(X_train,y_train)
print("Test cross validation: " , score)
print("Train cross validation: " , train_score)

Test cross validation:  0.8666666666666667
Train cross validation:  1.0
