In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import numpy as np
from collections import Counter

from models.multiclass import OneVsRestClassifier
from models.linear_model import LogisticRegression as MyLogisticRegression
from models.tree import DecisionTreeClassifier as MyDecisionTreeClassifier
from models.svm import SVC as MySVC
from models.neighbors import KNeighborsClassifier as MyKNeighborsClassifier
from models.cluster import KMeansClassifier as MyKMeansClassifier

from models.evaluate import evaluate_model_and_print

In [2]:
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [3]:
model = LogisticRegression(multi_class="ovr")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Logistic Regression", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.95, 'Precision': 0.9507101086048454, 'Recall': 0.9499999999999998, 'F1': 0.9499687304565354}
Testing model
{'Accuracy': 0.9333333333333333, 'Precision': 0.9444444444444445, 'Recall': 0.9333333333333332, 'F1': 0.9326599326599326}




In [3]:
ovr_model = OneVsRestClassifier(MyLogisticRegression)
ovr_model.fit(X_train, y_train)

evaluate_model_and_print(ovr_model, "Logistic Regression", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.9083333333333333, 'Precision': 0.9266666666666667, 'Recall': 0.9105691056910569, 'F1': 0.907158305639078}
Testing model
{'Accuracy': 0.8666666666666667, 'Precision': 0.9111111111111111, 'Recall': 0.8518518518518517, 'F1': 0.8534798534798536}


In [3]:
model = SVC()
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Support Vector Classifier", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.975, 'Precision': 0.975, 'Recall': 0.9751928288513655, 'F1': 0.9749960931395529}
Testing model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}


In [4]:
ovr_model = OneVsRestClassifier(MySVC)
ovr_model.fit(X_train, y_train)

evaluate_model_and_print(ovr_model, "Support Vector Classifier", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.6583333333333333, 'Precision': 0.4696296296296296, 'Recall': 0.6666666666666666, 'F1': 0.541795665634675}
Testing model
{'Accuracy': 0.7, 'Precision': 0.5166666666666667, 'Recall': 0.6666666666666666, 'F1': 0.5698924731182796}


In [4]:
model = DecisionTreeClassifier(criterion="gini")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}
Testing model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612}


In [5]:
model = MyDecisionTreeClassifier(max_depth=6)
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}
Testing model
{'Accuracy': 0.9333333333333333, 'Precision': 0.9333333333333332, 'Recall': 0.9333333333333332, 'F1': 0.9333333333333332}


In [3]:
model = DecisionTreeClassifier(criterion="entropy")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}
Testing model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612}


In [3]:
model = MyDecisionTreeClassifier(max_depth=10,min_samples_split=10, criterion="entropy")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.4083333333333333, 'Precision': 0.7867867867867867, 'Recall': 0.4083333333333334, 'F1': 0.30763725338897946}
Testing model
{'Accuracy': 0.4666666666666667, 'Precision': 0.4615384615384615, 'Recall': 0.4666666666666666, 'F1': 0.37566137566137564}


In [5]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KNN", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9670627475505524, 'Recall': 0.9670627475505524, 'F1': 0.9666666666666667}
Testing model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}


In [6]:
model = MyKNeighborsClassifier()
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KNN", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.95, 'Precision': 0.9499687304565354, 'Recall': 0.9499687304565354, 'F1': 0.9499687304565354}
Testing model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}


In [8]:
def assign_cluster_labels(clusters, true_labels, n_clusters):
    predicted_labels = np.zeros_like(clusters)

    for cluster_id in range(n_clusters):
        mask = (clusters == cluster_id)
        cluster_points = true_labels[mask]
        if len(cluster_points) > 0:
            most_common = Counter(cluster_points).most_common(1)[0][0]
            predicted_labels[mask] = most_common
        else:
            predicted_labels[mask] = -1

    return predicted_labels

In [9]:
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_train)

train_clusters = kmeans.predict(X_train)

train_labels = assign_cluster_labels(train_clusters, y_train, n_clusters=3)

print("=== Train Evaluation ===")
print("Accuracy:", accuracy_score(y_train, train_labels))
print("Precision:", precision_score(y_train, train_labels, average='macro'))
print("Recall:", recall_score(y_train, train_labels, average='macro'))
print("F1 Score:", f1_score(y_train, train_labels, average='macro'))

test_clusters = kmeans.predict(X_test)

test_labels = assign_cluster_labels(test_clusters, y_test, n_clusters=3)

print("\n=== Test Evaluation ===")
print("Accuracy:", accuracy_score(y_test, test_labels))
print("Precision:", precision_score(y_test, test_labels, average='macro'))
print("Recall:", recall_score(y_test, test_labels, average='macro'))
print("F1 Score:", f1_score(y_test, test_labels, average='macro'))

=== Train Evaluation ===
Accuracy: 0.8916666666666667
Precision: 0.9100743745774172
Recall: 0.8916666666666666
F1 Score: 0.8895790200138026

=== Test Evaluation ===
Accuracy: 0.9
Precision: 0.9023569023569024
Recall: 0.9
F1 Score: 0.899749373433584


In [3]:
n_clusters = len(np.unique(y))
model = MyKMeansClassifier(n_clusters=n_clusters)
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KMeans", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.6666666666666666, 'Precision': 0.4939796233405372, 'Recall': 0.6666666666666666, 'F1': 0.5533077428502265}
Testing model
{'Accuracy': 0.6666666666666666, 'Precision': 0.46296296296296297, 'Recall': 0.6666666666666666, 'F1': 0.5411255411255411}
