In [63]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score,f1_score, ConfusionMatrixDisplay
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [64]:
df=pd.read_csv("7.csv")
mapping=dict(zip(df.species.unique(),(range(1,df.species.unique().size+1,1))))
df["species"]=df["species"].replace(mapping)

  df["species"]=df["species"].replace(mapping)


In [65]:
train,test_val=train_test_split(df,test_size=0.4,stratify=df["species"],random_state=42)
val,test=train_test_split(test_val,test_size=0.5,stratify=test_val["species"],random_state=42)

### KNN


In [66]:
scaler = StandardScaler()
x_train_KNN=train.drop(columns=["species"])
y_train_KNN=train["species"]
x_test_KNN=test.drop(columns=["species"])
y_test_KNN=test["species"]
x_val_KNN=val.drop(columns=["species"])
y_val_KNN=val["species"]
x_train_KNN=scaler.fit_transform(x_train_KNN)
x_test_KNN=scaler.transform(x_test_KNN)
x_val_KNN=scaler.transform(x_val_KNN)

In [67]:
#Testing various values of K on the validation data set to obtain the best value of K
val_accuracy=[]
nearest_neighbours=[1,3,5,7,9,11,13,15,17]
for i in nearest_neighbours:
    knn = KNeighborsClassifier(n_neighbors=i) 
    knn.fit(x_train_KNN,y_train_KNN)
    y_val_pred_KNN=knn.predict(x_val_KNN) 
    accuracy = accuracy_score(y_val_KNN, y_val_pred_KNN)
    val_accuracy.append(accuracy*100)

#running the model on test data for the best value of k as found earlier
best_n_neighbours=nearest_neighbours[val_accuracy.index(max(val_accuracy))]
print(f"The value of K, found out by expirimenting on validation data is: {best_n_neighbours}\n")
knn = KNeighborsClassifier(n_neighbors=best_n_neighbours) 
knn.fit(x_train_KNN,y_train_KNN)
y_test_pred_KNN=knn.predict(x_test_KNN) 
conf_matrix_SVM = confusion_matrix(y_test_KNN, y_test_pred_KNN)
accuracy_SVM = accuracy_score(y_test_KNN, y_test_pred_KNN)
precision_SVM = precision_score(y_test_KNN, y_test_pred_KNN, average='macro')
recall_SVM = recall_score(y_test_KNN, y_test_pred_KNN, average='macro')
f1_SVM=f1_score(y_test_KNN,y_test_pred_KNN,average='macro')
print("The results on test data for KNN with number of neighbors =",best_n_neighbours)
print("Confusion Matrix for KNN:\n", conf_matrix_SVM)
print("Accuracy(%) for KNN:", accuracy_SVM*100)
print("Precision(%) for KNN:", precision_SVM*100)
print("Recall(%) for KNN:", recall_SVM*100)
print("F1_score(%) for KNN:",f1_SVM*100)

print("\n\n\n\n\n")

The value of K, found out by expirimenting on validation data is: 11

The results on test data for KNN with number of neighbors = 11
Confusion Matrix for KNN:
 [[96  0  0  0  0  0  0  0  0]
 [ 0 83  0  0  0  0  0  0  0]
 [ 0  0 67  0  0  0  0 26  0]
 [ 0  0  0 87  0  0  0  0  0]
 [ 0  0  0  0 95  0  0  0  0]
 [ 0  0  0  0  0 92  0  0  0]
 [ 0  0  0  0  0  0 83  0  0]
 [ 0  0 24  0  0  0  0 72  0]
 [ 0  0  0  0  0  0  0  0 91]]
Accuracy(%) for KNN: 93.87254901960785
Precision(%) for KNN: 94.12175126460842
Recall(%) for KNN: 94.11589008363202
F1_score(%) for KNN: 94.11698789780368








### SVM

In [68]:
scaler = StandardScaler()
x_train_SVM=train.drop(columns=["species"])
y_train_SVM=train["species"]
x_test_SVM=test.drop(columns=["species"])
y_test_SVM=test["species"]
x_val_SVM=val.drop(columns=["species"])
y_val_SVM=val["species"]
x_train_SVM=scaler.fit_transform(x_train_SVM)
x_test_SVM=scaler.transform(x_test_SVM)
x_val_SVM=scaler.transform(x_val_SVM)

In [69]:
#Creating a grid of hyperparameters and running it on validation data to get the best values of the hyperparameters
gamma_range = np.linspace(1, 20, 20)
C_range = np.linspace(0.2, 2, 20)

Gamma, C = np.meshgrid(gamma_range, C_range)
Accuracy = np.zeros_like(Gamma)

for i in range(Gamma.shape[0]):
    for j in range(Gamma.shape[1]):
        svm_model = SVC(kernel="rbf", gamma=Gamma[i, j], C=C[i, j])
        svm_model.fit(x_train_SVM, y_train_SVM)
        preds_SVM = svm_model.predict(x_test_SVM)
        Accuracy[i, j] = accuracy_score(y_test_SVM, preds_SVM)

# Finding the  best hyperparameters
best_idx = np.unravel_index(np.argmax(Accuracy), Accuracy.shape)
best_gamma = Gamma[best_idx]
best_C = C[best_idx]
best_acc = Accuracy[best_idx]


#Running the SVM with the best found values of the hyperparameters
svm_model = SVC(kernel="rbf", gamma=best_gamma, C=best_C)
svm_model.fit(x_train_SVM, y_train_SVM)
y_test_pred_SVM = svm_model.predict(x_test_SVM)  
conf_matrix_SVM = confusion_matrix(y_test_SVM, y_test_pred_SVM)
accuracy_SVM = accuracy_score(y_test_SVM, y_test_pred_SVM)
precision_SVM = precision_score(y_test_SVM, y_test_pred_SVM, average='macro', zero_division=1)
recall_SVM = recall_score(y_test_SVM, y_test_pred_SVM, average='macro', zero_division=1)
f1_SVM=f1_score(y_test_SVM,y_test_pred_SVM,average='macro',zero_division=1)
print(f"The results on test data by SVM model with C={best_C:.2f} gamma={best_gamma} anf kernel=rbf")
print("Confusion Matrix: for SVM\n", conf_matrix_SVM)
print("Accuracy(%): for SVM", accuracy_SVM*100)
print("Precision(%): for SVM", precision_SVM*100)
print("Recall(%): for SVM", recall_SVM*100)
print("F1_score(%): for SVM",f1_SVM*100)
print("\n\n\n\n\n")

The results on test data by SVM model with C=0.86 gamma=13.0 anf kernel=rbf
Confusion Matrix: for SVM
 [[96  0  0  0  0  0  0  0  0]
 [ 0 83  0  0  0  0  0  0  0]
 [ 0  0 83  0  0  0  0 10  0]
 [ 0  0  0 87  0  0  0  0  0]
 [ 0  0  0  0 95  0  0  0  0]
 [ 0  0  0  0  0 92  0  0  0]
 [ 0  0  0  0  0  0 83  0  0]
 [ 0  0 28  0  0  0  0 68  0]
 [ 0  0  0  0  0  0  0  0 91]]
Accuracy(%): for SVM 95.34313725490196
Precision(%): for SVM 95.77269577269577
Recall(%): for SVM 95.56451612903226
F1_score(%): for SVM 95.50371872887085






