<a href="https://colab.research.google.com/github/ishambyk/sample01/blob/main/Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## LIBRARIES

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder#1
from sklearn.model_selection import train_test_split#2
from sklearn.linear_model import LogisticRegression #3
from sklearn.neighbors import KNeighborsClassifier #5 for k-nn
from sklearn.naive_bayes import GaussianNB #6 for naive bayes
from sklearn.svm import SVC #7 for svm classifier
from sklearn.tree import DecisionTreeClassifier #8 for decision tree
from sklearn.ensemble import RandomForestClassifier #9 for random forest


from sklearn.metrics import accuracy_score, precision_score, recall_score,confusion_matrix # Classification evaluation metrics

## Loading Data From Drive

In [None]:
#Mounting the Drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
filepath = '/content/drive/MyDrive/DSA/iris/iris.csv'

df_iris = pd.read_csv(filepath)
df_iris.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [None]:
print(df_iris['species'].unique())

['setosa' 'versicolor' 'virginica']


In [None]:
# iris dataset is considered to be already preprocessed
# So, Only encoding is done
#1
encoder=LabelEncoder()
df_iris['species']=encoder.fit_transform(df_iris['species'])
df_iris.head(3)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0


## Model Building

# preparing datset

In [None]:
#2
#split data into train and test
X=df_iris.drop(columns=['species'])
y=df_iris['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)


###building Logistic regression model

In [None]:
#3
#building Logistic regression model
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train, y_train) # traing the model

y_pred_log = log_reg_model.predict(X_test) # predicting on test data

#evaluate the model
accuracy_log = accuracy_score(y_test, y_pred_log)
precision_log = precision_score(y_test, y_pred_log, average='weighted')
recall_log = recall_score(y_test, y_pred_log, average='weighted')

print("Accuracy:", accuracy_log)
print("Precision:", precision_log)
print("Recall:", recall_log)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [None]:
#4
# Prediciton using threshold
y_pred_threshold = log_reg_model.predict_proba(X_test)[:, 1]
# for class 1
y_pred_threshold=(y_pred_threshold > 0.33).astype(int)

#evaluate the model
accuracy_thresh = accuracy_score(y_test, y_pred_threshold)
precision_thresh = precision_score(y_test, y_pred_threshold, average='weighted')
recall_thresh = recall_score(y_test, y_pred_threshold, average='weighted')

print('threshold value = 0.33')
print("Accuracy:", accuracy_thresh)
print("Precision:", precision_thresh)
print("Recall:", recall_thresh)

threshold value = 0.33
Accuracy: 0.6333333333333333
Precision: 0.43666666666666665
Recall: 0.6333333333333333


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### k-NN (k-Nearest Neighbours)

In [None]:
#5
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train) #training the model
y_pred_knn = knn.predict(X_test) #prediction

#evaluate the model
accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn, average='weighted')
recall_knn = recall_score(y_test, y_pred_knn, average='weighted')

print("Accuracy:", accuracy_knn)
print("Precision:", precision_knn)
print("Recall:", recall_knn)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


### Naive bayes classification

uses naive bayes's theorum

In [None]:
#6
nb_model = GaussianNB()
nb_model.fit(X_train, y_train) #training the model
y_pred_nb = nb_model.predict(X_test) #prediction

#evaluation
accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb, average='weighted')
recall_nb = recall_score(y_test, y_pred_nb, average='weighted')

print("Accuracy:", accuracy_nb)
print("Precision:", precision_nb)
print("Recall:", recall_nb)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


## Svm Classifier

In [None]:
#7
svc_model = SVC()
svc_model.fit(X_train, y_train) #training the model
y_pred_svc = svc_model.predict(X_test) #prediction

#evaluating the model
accuracy_svc = accuracy_score(y_test, y_pred_svc)
precision_svc = precision_score(y_test, y_pred_svc, average='weighted')
recall_svc = recall_score(y_test, y_pred_svc, average='weighted')
confusion_matrix_svc = confusion_matrix(y_test, y_pred_svc) # the matrix is 3,3 , bcoz there are 3 unique values ['setosa' 'versicolor' 'virginica']

print("Accuracy:", accuracy_svc)
print("Precision:", precision_svc)
print("Recall:", recall_svc)
print("Confusion Matrix:\n", confusion_matrix_svc)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


# Decision Tree

In [None]:
#8
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train) #training the model
y_pred_dt = dt_model.predict(X_test) #prediction

# evaluate the model
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
confusion_matrix_dt = confusion_matrix(y_test, y_pred_dt) # the matrix is 3,3 , bcoz there are 3 unique values ['setosa' 'versicolor' 'virginica']


print("Accuracy:", accuracy_dt)
print("Precision:", precision_dt)
print("Recall:", recall_dt)
print("Confusion Matrix:\n", confusion_matrix_dt)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


#random forest

In [None]:
#9
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train) #training the model
y_pred_rf = rf_model.predict(X_test) #prediction

#evaluate the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
confusion_matrix_rf = confusion_matrix(y_test, y_pred_rf) # the matrix is 3,3 , bcoz there are 3 unique values ['setosa' 'versicolor' 'virginica']

print("Accuracy:", accuracy_rf)
print("Precision:", precision_rf)
print("Recall:", recall_rf)
print("Confusion Matrix:\n", confusion_matrix_rf)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


Comparison of Models


In [None]:
dict={'Model':['Accuracy','Precision','Recall'],
      'Logistic regression':[accuracy_log,precision_log,recall_log],
      'k-NN (k-Nearest Neighbours)':[accuracy_knn,precision_knn,recall_knn],
      'Naive bayes':[accuracy_nb,precision_nb,recall_nb],
      'Svm Classifier':[accuracy_svc,precision_svc,recall_svc],
      'Decision Tree':[accuracy_dt,precision_dt,recall_dt],
      'random forest':[accuracy_rf,precision_rf,recall_rf]}

df_models=pd.DataFrame(dict)
df_models

Unnamed: 0,Model,Logistic regression,k-NN (k-Nearest Neighbours),Naive bayes,Svm Classifier,Decision Tree,random forest
0,Accuracy,1.0,1.0,1.0,1.0,1.0,1.0
1,Precision,1.0,1.0,1.0,1.0,1.0,1.0
2,Recall,1.0,1.0,1.0,1.0,1.0,1.0
