<a href="https://colab.research.google.com/github/ananya253/Python/blob/main/classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [26]:
import pandas as pd
from google.colab import drive
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

from sklearn.neighbors import KNeighborsClassifier #for KNN

from sklearn.naive_bayes import GaussianNB # for Naive Bayes
from sklearn.svm import SVC #for SVM classifier

from sklearn.tree import DecisionTreeClassifier #for Decision Tree
from sklearn.ensemble import RandomForestClassifier #for Random Forest



# Loading the dataset from google drive

In [11]:
#mounting the drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
df_iris = pd.read_csv('/content/drive/MyDrive/Data Science/Data/iris.csv')
df_iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


# Encoding

In [13]:
encoder = LabelEncoder()
df_iris['species'] = encoder.fit_transform(df_iris['species'])
df_iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


# Model Building

In [14]:
# split data into train and test
X = df_iris.drop('species', axis=1)
y = df_iris['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

##Logistic Regression

In [20]:
# building logistic regression model

log_reg_model = LogisticRegression()
log_reg_model.fit(X_train, y_train)  #training the model

y_pred = log_reg_model.predict(X_test)  #predicting the test data

# evaluate the model

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
confusion = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", confusion)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0 11  0]
 [ 0  0  9]]


In [21]:
# prediction using threshold

y_pred_threshold = log_reg_model.predict_proba(X_test)[:, 1]

# for class 1
y_pred_threshold = (y_pred_threshold > 0.5).astype(int)

#evaluate the model

accuracy_thresh = accuracy_score(y_test, y_pred_threshold)
precision_thresh = precision_score(y_test, y_pred_threshold, average='weighted')
recall_thresh = recall_score(y_test, y_pred_threshold, average='weighted')
confusion_thresh = confusion_matrix(y_test, y_pred_threshold)

print('Threshold value used: 0.33')
print("Accuracy: ", accuracy_thresh)
print("Precision: ", precision_thresh)
print("Recall: ", recall_thresh)
print("Confusion Matrix:\n", confusion_thresh)

Threshold value used: 0.33
Accuracy:  0.7
Precision:  0.5421052631578948
Recall:  0.7
Confusion Matrix:
 [[10  0  0]
 [ 0 11  0]
 [ 9  0  0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##KNN - K nearest neighbour

In [22]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)  # training the model

y_pred_knn = knn_model.predict(X_test) #prediction

# evaluate the model

accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn, average='weighted')
recall_knn = recall_score(y_test, y_pred_knn, average='weighted')
confusion_knn = confusion_matrix(y_test, y_pred_knn)

print("Accuracy:", accuracy_knn)
print("Precision:", precision_knn)
print("Recall:", recall_knn)
print("Confusion Matrix:\n", confusion_knn)

Accuracy: 0.9666666666666667
Precision: 0.9700000000000001
Recall: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0 10  1]
 [ 0  0  9]]


#Naive Bayes Classification


In [23]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)  #training the model

y_pred_nb = nb_model.predict(X_test) #prediction

#evaluate the model

accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb, average='weighted')
recall_nb = recall_score(y_test, y_pred_nb, average='weighted')
confusion_nb = confusion_matrix(y_test, y_pred_nb)

print("Accuracy:", accuracy_nb)
print("Precision:", precision_nb)
print("Recall:", recall_nb)
print("Confusion Matrix:\n", confusion_nb)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0 11  0]
 [ 0  0  9]]


# SVM  Classifier

In [24]:

svm_model = SVC()
svm_model.fit(X_train, y_train)  #training the model

y_pred_svm = svm_model.predict(X_test) #prediction

# evaluate the model

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted')
recall_svm = recall_score(y_test, y_pred_svm, average='weighted')
confusion_svm = confusion_matrix(y_test, y_pred_svm)

print("Accuracy:", accuracy_svm)
print("Precision:", precision_svm)
print("Recall:", recall_svm)
print("Confusion Matrix:\n", confusion_svm)

Accuracy: 0.9666666666666667
Precision: 0.9700000000000001
Recall: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0 10  1]
 [ 0  0  9]]


#Decision Tree

In [29]:
decision_tree_model = DecisionTreeClassifier(criterion='gini')
decision_tree_model.fit(X_train, y_train)  #training the model

y_pred_dt = decision_tree_model.predict(X_test) #prediction

#evaluate the model

accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
confusion_dt = confusion_matrix(y_test, y_pred_dt)

print("Accuracy:", accuracy_dt)
print("Precision:", precision_dt)
print("Recall:", recall_dt)
print("Confusion Matrix:\n", confusion_dt)


Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0 11  0]
 [ 0  0  9]]


#Random Forest

In [28]:
random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)  #training the model

y_pred_rf = random_forest_model.predict(X_test) #prediction

#evaluate the model

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
confusion_rf = confusion_matrix(y_test, y_pred_rf)

print("Accuracy:", accuracy_rf)
print("Precision:", precision_rf)
print("Recall:", recall_rf)
print("Confusion Matrix:\n", confusion_rf)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0 11  0]
 [ 0  0  9]]


#Comparison of Models

In [30]:
#building a dataframe
model_comparison = pd.DataFrame({
    'Model': ['Logistic Regression', 'KNN', 'Naive Bayes', 'SVM', 'Decision Tree', 'Random Forest'],
    'Accuracy': [accuracy, accuracy_knn, accuracy_nb, accuracy_svm, accuracy_dt, accuracy_rf],
    'Precision': [precision, precision_knn, precision_nb, precision_svm, precision_dt, precision_rf],
    'Recall': [recall, recall_knn, recall_nb, recall_svm, recall_dt, recall_rf]
})
model_comparison

Unnamed: 0,Model,Accuracy,Precision,Recall
0,Logistic Regression,1.0,1.0,1.0
1,KNN,0.966667,0.97,0.966667
2,Naive Bayes,1.0,1.0,1.0
3,SVM,0.966667,0.97,0.966667
4,Decision Tree,1.0,1.0,1.0
5,Random Forest,1.0,1.0,1.0
