In [1]:
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from models.linear_model import LogisticRegression as MyLogisticRegression
from models.neighbors import KNeighborsClassifier as MyKNeighborsClassifier
from models.svm import SVC as MySVC
from models.tree import DecisionTreeClassifier as MyDecisionTreeClassifier
from models.multiclass import OneVsRestClassifier

In [2]:
train_df = pd.read_csv("./dataset/housing_train.csv")
test_df = pd.read_csv("./dataset/housing_test.csv")

In [3]:
X_train = train_df.drop(columns=["LivingLevel"])
y_train = train_df["LivingLevel"]
X_test = test_df.drop(columns=["LivingLevel"])
y_test = test_df["LivingLevel"]

In [15]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000000),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "SVM": SVC()
}
target_names = ["Low", "Medium", "High"]

for name, model in models.items():
    print(f"=== {name} ===")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred, target_names=target_names))
    print("-" * 40)

=== Logistic Regression ===
              precision    recall  f1-score   support

         Low       0.99      0.96      0.97      1063
      Medium       0.96      0.98      0.97      1983
        High       0.97      0.96      0.97      1082

    accuracy                           0.97      4128
   macro avg       0.97      0.97      0.97      4128
weighted avg       0.97      0.97      0.97      4128

----------------------------------------
=== KNN ===
              precision    recall  f1-score   support

         Low       0.97      0.96      0.96      1063
      Medium       0.95      0.96      0.95      1983
        High       0.95      0.95      0.95      1082

    accuracy                           0.95      4128
   macro avg       0.96      0.95      0.96      4128
weighted avg       0.95      0.95      0.95      4128

----------------------------------------
=== SVM ===
              precision    recall  f1-score   support

         Low       0.99      0.98      0.99      

In [14]:
model = OneVsRestClassifier(base_model_class=MyLogisticRegression, lr=0.1, max_iter=1000000)

target_names = ["Low", "Medium", "High"]

print(f"=== Logistic Regression ===")
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=target_names))
print("-" * 40)
# 13m49s


=== Logistic Regression ===
              precision    recall  f1-score   support

         Low       0.99      0.98      0.98      1063
      Medium       0.99      0.98      0.99      1983
        High       0.98      0.99      0.99      1082

    accuracy                           0.99      4128
   macro avg       0.99      0.99      0.99      4128
weighted avg       0.99      0.99      0.99      4128

----------------------------------------


In [26]:
model = MyDecisionTreeClassifier(max_depth=10)

target_names = ["Low", "Medium", "High"]

print(f"=== DecisionTreeClassifier ===")
model.fit(X_train.values, y_train.values)
y_pred = model.predict(X_test.values)
print(classification_report(y_test, y_pred, target_names=target_names))
print("-" * 40)


=== DecisionTreeClassifier ===
              precision    recall  f1-score   support

         Low       0.94      0.92      0.93      1063
      Medium       0.90      0.93      0.92      1983
        High       0.93      0.90      0.91      1082

    accuracy                           0.92      4128
   macro avg       0.93      0.92      0.92      4128
weighted avg       0.92      0.92      0.92      4128

----------------------------------------


In [27]:
model = MyKNeighborsClassifier()

target_names = ["Low", "Medium", "High"]

print(f"=== KNeighborsClassifier ===")
model.fit(X_train.values, y_train.values)
y_pred = model.predict(X_test.values)
print(classification_report(y_test, y_pred, target_names=target_names))
print("-" * 40)

=== KNeighborsClassifier ===
              precision    recall  f1-score   support

         Low       0.97      0.96      0.97      1063
      Medium       0.96      0.95      0.95      1983
        High       0.94      0.95      0.95      1082

    accuracy                           0.96      4128
   macro avg       0.96      0.96      0.96      4128
weighted avg       0.96      0.96      0.96      4128

----------------------------------------


In [24]:
model = MySVC()

target_names = ["Low", "Medium", "High"]

print(f"=== SVC ===")
model.fit(X_train.values, y_train.values)
y_pred = model.predict(X_test.values)
print(classification_report(y_test, y_pred, target_names=target_names))
print("-" * 40)
# 13m49s


=== SVC ===
              precision    recall  f1-score   support

         Low       0.00      0.00      0.00      1063
      Medium       0.48      1.00      0.65      1983
        High       0.00      0.00      0.00      1082

    accuracy                           0.48      4128
   macro avg       0.16      0.33      0.22      4128
weighted avg       0.23      0.48      0.31      4128

----------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
