# Testing my models against Iris Dataset and comapre With Sklearn models


## Imports

In [1]:
import numpy as np
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from evaluator.model_evaluator import ModelEvaluator

from models.knn import Conformal,NearestNeighbours
from models.decision_tree import DecisionTree

from evaluator.model_evaluator import ModelEvaluator


#### Splitting and loading iris dataset

In [2]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### SKLEARN KNN

In [3]:
knn_classifier = KNeighborsClassifier(n_neighbors=1)
knn_classifier.fit(X_train, y_train)

# Make predictions
y_pred = knn_classifier.predict(X_test)
# Evaluate the model
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print(f"Metrics for sklearn KNN: {metrics}")

Metrics for sklearn KNN: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1_score': 1.0, 'f2_score': 1.0}


### MY KNN model

In [4]:
knn_model = NearestNeighbours(neighbours=1)
knn_model.fit(X_train, y_train)

# Make predictions
y_pred = knn_classifier.predict(X_test)
# Evaluate the model
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print(f"Metrics for sklearn KNN: {metrics}")

Metrics for sklearn KNN: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1_score': 1.0, 'f2_score': 1.0}


### Model Validation Summary

The custom KNN model was validated against scikit-learn's KNN implementation on the Iris dataset. Both models produced identical metrics:

- **Accuracy**: 1.0  
- **Precision**: 1.0  
- **Recall**: 1.0  
- **F1-Score**: 1.0  
- **F2-Score**: 1.0  

This confirms the correctness of my KNN model implementation.


### Sklearn DT model

In [5]:
# Scikit-learn Decision Tree
sklearn_dt = DecisionTreeClassifier(criterion="gini",max_depth=10,min_samples_split=2,random_state=42)
sklearn_dt.fit(X_train, y_train)
y_dt_pred_sklearn = sklearn_dt.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_dt_pred_sklearn)
print(f"Metrics for sklearn DT: {metrics}")

# Scikit-learn Decision Tree
sklearn_dt = DecisionTreeClassifier(criterion="entropy",max_depth=10,min_samples_split=2,random_state=42)
sklearn_dt.fit(X_train, y_train)
y_dt_pred_sklearn = sklearn_dt.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_dt_pred_sklearn)
print(f"Metrics for sklearn DT: {metrics}")



Metrics for sklearn DT: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1_score': 1.0, 'f2_score': 1.0}
Metrics for sklearn DT: {'accuracy': 0.9777777777777777, 'precision': 0.9285714285714286, 'recall': 1.0, 'f1_score': 0.962962962962963, 'f2_score': 0.9848484848484849}


### My Implmentation of Decision Tree

In [6]:
# Custom Decision Tree
custom_dt = DecisionTree(uniformity_measure="gini", max_depth=None, min_samples_split=2)
custom_dt.fit(X_train, y_train)
y_dt_pred_custom = custom_dt.predict(X_test)
metrics_custom = ModelEvaluator.calculate_metrics(y_test, y_dt_pred_custom)
print(f"Metrics for custom Decision Tree: {metrics_custom}")

# Custom Decision Tree
custom_dt = DecisionTree(uniformity_measure="entropy", max_depth=None, min_samples_split=2)
custom_dt.fit(X_train, y_train)
y_dt_pred_custom = custom_dt.predict(X_test)
metrics_custom = ModelEvaluator.calculate_metrics(y_test, y_dt_pred_custom)
print(f"Metrics for custom Decision Tree: {metrics_custom}")


Metrics for custom Decision Tree: {'accuracy': 0.9555555555555556, 'precision': 1.0, 'recall': 0.8461538461538461, 'f1_score': 0.9166666666666666, 'f2_score': 0.8730158730158731}
Metrics for custom Decision Tree: {'accuracy': 0.9555555555555556, 'precision': 1.0, 'recall': 0.8461538461538461, 'f1_score': 0.9166666666666666, 'f2_score': 0.8730158730158731}


**Observation:**  
My custom decision tree implementation still produces valid predictions. However scikit-learnâ€™s DecisionTreeClassifier achieves higher training accuracy due to optimised threshold selection, and other internal refinements. Sklearn DT model was rigorously tested and improved and plan to investigate further where differences lie.
