In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import numpy as np
from collections import Counter

from models.multiclass import OneVsRestClassifier, OneVsOneClassifier, SoftmaxClassifier
from models.linear_model import LogisticRegression as MyLogisticRegression
from models.tree import DecisionTreeClassifier as MyDecisionTreeClassifier
from models.svm import SVC as MySVC
from models.neighbors import KNeighborsClassifier as MyKNeighborsClassifier
from models.cluster import KMeansClassifier as MyKMeansClassifier

from models.evaluate import evaluate_model_and_print

In [5]:
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [3]:
model = LogisticRegression(multi_class="ovr")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Logistic Regression", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.95, 'Precision': 0.9507101086048454, 'Recall': 0.9499999999999998, 'F1': 0.9499687304565354, 'Runtime (s)': 0.0019915103912353516}
Testing model
{'Accuracy': 0.9333333333333333, 'Precision': 0.9444444444444445, 'Recall': 0.9333333333333332, 'F1': 0.9326599326599326, 'Runtime (s)': 0.0}




In [4]:
ovr_model = OneVsRestClassifier(MyLogisticRegression)
ovr_model.fit(X_train, y_train)

evaluate_model_and_print(ovr_model, "Logistic Regression", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.875, 'Precision': 0.9090909090909092, 'Recall': 0.875, 'F1': 0.8704453441295547, 'Runtime (s)': 0.0}
Testing model
{'Accuracy': 0.8666666666666667, 'Precision': 0.9047619047619048, 'Recall': 0.8666666666666667, 'F1': 0.8611111111111112, 'Runtime (s)': 0.0}


In [4]:
ovo_model = OneVsOneClassifier(MyLogisticRegression)
ovo_model.fit(X_train, y_train)

evaluate_model_and_print(ovo_model, "Logistic Regression", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.975, 'Precision': 0.9767441860465116, 'Recall': 0.975, 'F1': 0.9749647942419027, 'Runtime (s)': 0.0}
Testing model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612, 'Runtime (s)': 0.0}


In [4]:
sm_model = SoftmaxClassifier()
sm_model.fit(X_train, y_train)

evaluate_model_and_print(sm_model, "Softmax Classifier", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612, 'Runtime (s)': 0.0}
Testing model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612, 'Runtime (s)': 0.0}


In [5]:
model = SVC()
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Support Vector Classifier", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.9833333333333333, 'Precision': 0.9833333333333334, 'Recall': 0.9833333333333334, 'F1': 0.9833333333333334, 'Runtime (s)': 0.0010027885437011719}
Testing model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612, 'Runtime (s)': 0.0015053749084472656}


In [3]:
ovr_model = OneVsRestClassifier(MySVC)
ovr_model.fit(X_train, y_train)

evaluate_model_and_print(ovr_model, "Support Vector Classifier", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.8166666666666667, 'Precision': 0.8497891733575721, 'Recall': 0.8166666666666668, 'F1': 0.8083847736625515, 'Runtime (s)': 0.0}
Testing model
{'Accuracy': 0.7666666666666667, 'Precision': 0.7676767676767676, 'Recall': 0.7666666666666666, 'F1': 0.7660818713450291, 'Runtime (s)': 0.0}


In [6]:
ovo_model = OneVsOneClassifier(MySVC)
ovo_model.fit(X_train, y_train)

evaluate_model_and_print(ovo_model, "Support Vector Classifier", X_train, y_train, X_test, y_test,
                         is_classification_task=True)

Training model
{'Accuracy': 0.9083333333333333, 'Precision': 0.9281045751633986, 'Recall': 0.9083333333333333, 'F1': 0.9065668630886021, 'Runtime (s)': 0.0009903907775878906}
Testing model
{'Accuracy': 0.9333333333333333, 'Precision': 0.9444444444444445, 'Recall': 0.9333333333333332, 'F1': 0.9326599326599326, 'Runtime (s)': 0.0009963512420654297}


In [7]:
model = DecisionTreeClassifier(criterion="gini")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.0}
Testing model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612, 'Runtime (s)': 0.0}


In [8]:
model = MyDecisionTreeClassifier(max_depth=6)
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.0}
Testing model
{'Accuracy': 0.9333333333333333, 'Precision': 0.9333333333333332, 'Recall': 0.9333333333333332, 'F1': 0.9333333333333332, 'Runtime (s)': 0.0}


In [9]:
model = DecisionTreeClassifier(criterion="entropy")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.0}
Testing model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9696969696969697, 'Recall': 0.9666666666666667, 'F1': 0.9665831244778612, 'Runtime (s)': 0.0}


In [10]:
model = MyDecisionTreeClassifier(max_depth=10, min_samples_split=10, criterion="entropy")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "Decision Treee", X_train, y_train, X_test, y_test, is_classification_task=True)

[0]
[0 2 1 0 1 2 1 2 2 2 2 1 1 1 1 0 0 2 2 0 1 0 2 0 1 2 2 0 2 0 0 1 1 0 2 2 1
 1 2 1 0 1 0 2 0 0 2 0 0 0 0 1 2 1 0 2 1 2 0 2 0 1 2 0 1 1 2 1 1 2 0 0 0 2
 1 2 1 2 2 1 0 2 1 0 2 0 2 1 1 0 1 2 0 2 2 2 1 2 0 2 1 2 2 0 1 1 1 1 1 0 2
 1 1 0 0 0 0 1 0]
[0 2 1 0 1 2 1 2 2 2 2 1 1 1 1 0 0 2 2 0 1 0 2 0 1 2 2 0 2 0 0 1 1 0 2 2 1
 1 2 1 0 1 0 2 0 0 2 0 0 0 0 1 2 1 0 2 1 2 0 2 0 1 2 0 1 1 2 1 1 2 0 0 0 2
 1 2 1 2 2 1 0 2 1 0 2 0 2 1 1 0 1 2 0 0 2 2 2 1 2 0 2 1 2 2 0 1 1 1 1 1 0
 2 1 1 0 0 0 0 1 0]
[0 0]
[2 1 0 1 2 1 2 2 2 2 1 1 1 1 0 0 2 2 0 1 0 2 0 1 2 2 0 2 0 0 1 1 0 2 2 1 1
 2 1 0 1 0 2 0 0 2 0 0 0 0 1 2 1 0 2 1 2 0 2 0 1 2 0 1 1 2 1 1 2 0 0 0 2 1
 2 1 2 2 1 0 2 1 0 2 0 2 1 1 0 1 2 0 2 2 2 1 2 0 2 1 2 2 0 1 1 1 1 1 0 2 1
 1 0 0 0 0 1 0]
[0 2 1 0 1 2 1 2 2 2 2 1 1 1 1 0 0 2 2 0 1 0 2 0 1 2 2 0 2 0 0 1 1 0 2 2 1
 1 2 1 0 1 0 2 0 0 2 0 0 0 0 1 2 1 0 2 1 2 0 2 0 1 2 0 1 1 2 1 1 2 0 0 0 2
 1 2 1 2 2 1 0 2 1 0 2 0 2 1 1 0 1 2 0 0 2 2 2 1 2 0 2 1 2 2 0 1 1 1 1 1 0
 2 1 1 0 0 0 0 1 0]
[0 0 0]
[2 1 0 1

In [11]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KNN", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.9666666666666667, 'Precision': 0.9666666666666667, 'Recall': 0.9666666666666667, 'F1': 0.9666666666666667, 'Runtime (s)': 0.010025739669799805}
Testing model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.0010061264038085938}


In [8]:
model = KNeighborsClassifier(weights="distance")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KNN", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.003506183624267578}
Testing model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.0010006427764892578}


In [12]:
model = MyKNeighborsClassifier()
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KNN", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 0.9583333333333334, 'Precision': 0.9585157390035439, 'Recall': 0.9583333333333334, 'F1': 0.958326821899255, 'Runtime (s)': 0.0030012130737304688}
Testing model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.0009999275207519531}


In [3]:
model = MyKNeighborsClassifier(weights="distance")
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KNN", X_train, y_train, X_test, y_test, is_classification_task=True)

Training model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.00852346420288086}
Testing model
{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0, 'Runtime (s)': 0.0010159015655517578}


In [18]:
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_train)

train_clusters = kmeans.predict(X_train)

def assign_cluster_labels(clusters, true_labels, n_clusters):
    predicted_labels = np.zeros_like(clusters)

    for cluster_id in range(n_clusters):
        # Tạo mặt nạ: các điểm nào thuộc cụm này
        mask = (clusters == cluster_id)

        # Lấy nhãn thật của các điểm thuộc cụm
        cluster_points = true_labels[mask]

        if len(cluster_points) > 0:
            most_common = Counter(cluster_points).most_common(1)[0][0]
            predicted_labels[mask] = most_common
        else:
            predicted_labels[mask] = -1

    return predicted_labels



train_labels = assign_cluster_labels(train_clusters, y_train, n_clusters=3)

print("=== Train Evaluation ===")
print("Accuracy:", accuracy_score(y_train, train_labels))
print("Precision:", precision_score(y_train, train_labels, average='macro'))
print("Recall:", recall_score(y_train, train_labels, average='macro'))
print("F1 Score:", f1_score(y_train, train_labels, average='macro'))

test_clusters = kmeans.predict(X_test)

test_labels = assign_cluster_labels(test_clusters, y_test, n_clusters=3)

print("\n=== Test Evaluation ===")
print("Accuracy:", accuracy_score(y_test, test_labels))
print("Precision:", precision_score(y_test, test_labels, average='macro'))
print("Recall:", recall_score(y_test, test_labels, average='macro'))
print("F1 Score:", f1_score(y_test, test_labels, average='macro'))

=== Train Evaluation ===
Accuracy: 0.8916666666666667
Precision: 0.9100743745774172


KeyboardInterrupt: 

In [15]:
n_clusters = len(np.unique(y))
model = MyKMeansClassifier(n_clusters=n_clusters)
model.fit(X_train, y_train)

evaluate_model_and_print(model, "KMeans", X_train, y_train, X_test, y_test, is_classification_task=True)

[0 2 2 0 2 2 2 2 2 2 2 2 2 2 2 0 1 2 2 1 2 1 2 0 2 2 2 1 2 1 1 2 2 1 2 2 2
 2 2 2 0 2 1 2 0 1 2 0 0 0 0 2 2 2 1 2 2 2 0 2 1 2 2 0 2 2 2 2 2 2 1 1 1 2
 2 2 2 2 2 2 0 2 2 1 2 1 2 0 2 0 2 2 1 0 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 1
 2 2 2 0 1 0 1 2 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[2 1 1 2 1 2 2 2 2 1 1 1 1 2 2 1 2 1 2 2 2 1 1 2 2 1 1 2 1 1 2 2 1 2 1 2 1
 2 2 1 2 1 1 2 1 1 2 2 1 2 1 2 2 1 2 1 2 2 1 1 2 2 2 2 1 2 2 1 2 2 1 1 1 1
 1 2 1 1 1]
Training model
{'Accuracy': 0.6666666666666666, 'Precision': 0.4939796233405372, 'Recall': 0.6666666666666666, 'F1': 0.5533077428502265, 'Runtime (s)': 0.00099945068359375}
Testing model
{'Accuracy': 0.6666666666666666, 'Precision': 0.46296296296296297, 'Recall': 0.6666666666666666, 'F1': 0.5411255411255411, 'Runtime (s)': 0.0}
