In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
#mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
filepath = '/content/drive/MyDrive/ICT/Data/iris.csv'

df = pd.read_csv(filepath)
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [None]:
#iris dataset is considered to be already preprocessed
#so, only encoding is done

encoder = LabelEncoder()
df['species'] = encoder.fit_transform(df['species'])
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0


Model Building

In [None]:
# split data into train and test
x = df.drop('species',axis=1)
y = df['species']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=40)


In [None]:
#building logistic regression model
log_reg_model = LogisticRegression()
log_reg_model.fit(x_train,y_train)

y_pred=log_reg_model.predict(x_test)

#evaluate the model
accuracy = accuracy_score(y_test,y_pred)
precision = precision_score(y_test,y_pred,average='weighted')
recall = recall_score(y_test,y_pred,average='weighted')
confusion_matrix_lr = confusion_matrix(y_test,y_pred)

print("Accuracy:",accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("Confusion Matrix:\n",confusion_matrix_lr)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[ 8  0  0]
 [ 0 12  0]
 [ 0  0 10]]


In [None]:
#prediction using threshold
y_pred_threshold = log_reg_model.predict_proba(x_test)[:,1]
y_pred_threshold = (y_pred_threshold>0.33).astype('int')

#evaluate the model
accuracy_thresh = accuracy_score(y_test,y_pred_threshold)
precision_thresh = precision_score(y_test,y_pred_threshold,average='weighted')
recall_thresh = recall_score(y_test,y_pred_threshold,average='weighted')
confusion_matrix_thresh = confusion_matrix(y_test,y_pred_threshold)

print('Threshold value used=0.33')
print("Accuracy:",accuracy_thresh)
print("Precision:",precision_thresh)
print("Recall:",recall_thresh)
print("Confusion Matrix:\n",confusion_matrix_thresh)

Threshold value used=0.33
Accuracy: 0.6666666666666666
Precision: 0.47619047619047616
Recall: 0.6666666666666666
Confusion Matrix:
 [[ 8  0  0]
 [ 0 12  0]
 [ 8  2  0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


k-NN(K-Nearest Neighbors)

In [None]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train,y_train)
y_pred_knn = knn.predict(x_test)

accuracy_knn = accuracy_score(y_test,y_pred_knn)
precision_knn = precision_score(y_test,y_pred_knn,average='weighted')
recall_knn = recall_score(y_test,y_pred_knn,average='weighted')
confusion_matrix_knn = confusion_matrix(y_test,y_pred_knn)


print("Accuracy:",accuracy_knn)
print("Precision:",precision_knn)
print("Recall:",recall_knn)
print("Confusion Matrix:\n",confusion_matrix_knn)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[ 8  0  0]
 [ 0 12  0]
 [ 0  0 10]]


Naive-Bayes classification

In [None]:
nb_model = GaussianNB()
nb_model.fit(x_train,y_train)
y_pred_nb = nb_model.predict(x_test)

accuracy_nb = accuracy_score(y_test,y_pred_nb)
precision_nb = precision_score(y_test,y_pred_nb,average='weighted')
recall_nb = recall_score(y_test,y_pred_nb,average='weighted')
confusion_matrix_nb = confusion_matrix(y_test,y_pred_nb)

print("Accuracy:",accuracy_nb)
print("Precision:",precision_nb)
print("Recall:",recall_nb)
print("Confusion Matrix:\n",confusion_matrix_nb)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[ 8  0  0]
 [ 0 12  0]
 [ 0  0 10]]


SVM classsifier

In [None]:
svm_model = SVC()
svm_model.fit(x_train,y_train)
y_pred_svm = svm_model.predict(x_test)

accuracy_svm = accuracy_score(y_test,y_pred_svm)
precision_svm = precision_score(y_test,y_pred_svm,average='weighted')
recall_svm = recall_score(y_test,y_pred_svm,average='weighted')
confusion_matrix_svm = confusion_matrix(y_test,y_pred_svm)

print("Accuracy:",accuracy_svm)
print("Precision:",precision_svm)
print("Recall:",recall_svm)
print("Confusion Matrix:\n",confusion_matrix_svm)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[ 8  0  0]
 [ 0 12  0]
 [ 0  0 10]]


Decision Tree

In [None]:
dt=DecisionTreeClassifier()
dt.fit(x_train,y_train)
y_pred_dt=dt.predict(x_test)

accuracy_dt = accuracy_score(y_test,y_pred_dt)
precision_dt = precision_score(y_test,y_pred_dt,average='weighted')
recall_dt = recall_score(y_test,y_pred_dt,average='weighted')
confusion_matrix_dt = confusion_matrix(y_test,y_pred_dt)

print("Accuracy:",accuracy_svm)
print("Precision:",precision_svm)
print("Recall:",recall_svm)
print("Confusion Matrix:\n",confusion_matrix_dt)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[ 8  0  0]
 [ 0 12  0]
 [ 0  0 10]]


Random Forest

In [None]:
rf=RandomForestClassifier()
rf.fit(x_train,y_train)
y_pred_rf=rf.predict(x_test)

accuracy_rf = accuracy_score(y_test,y_pred_rf)
precision_rf = precision_score(y_test,y_pred_rf,average='weighted')
recall_rf = recall_score(y_test,y_pred_rf,average='weighted')
confusion_matrix_rf = confusion_matrix(y_test,y_pred_rf)

print("Accuracy:",accuracy_rf)
print("Precision:",precision_rf)
print("Recall:",recall_rf)
print("Confusion Matrix: \n",confusion_matrix_rf)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix: 
 [[ 8  0  0]
 [ 0 12  0]
 [ 0  0 10]]


Comparison of Models

In [None]:
#building a dataframe
model_comparison = pd.DataFrame({
    'Model': ['Logistic Regression', 'K-NN', 'Naive Bayes', 'SVM', 'Decision Tree'],
    'Accuracy': [accuracy, accuracy_knn, accuracy_nb, accuracy_svm, accuracy_dt],
    'Precision': [precision, precision_knn, precision_nb, precision_svm, precision_dt],
    'Recall': [recall, recall_knn, recall_nb, recall_svm, recall_dt]
})
model_comparison

Unnamed: 0,Model,Accuracy,Precision,Recall
0,Logistic Regression,1.0,1.0,1.0
1,K-NN,1.0,1.0,1.0
2,Naive Bayes,1.0,1.0,1.0
3,SVM,1.0,1.0,1.0
4,Decision Tree,1.0,1.0,1.0
