In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium 
import requests
import json
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression

hdb_class_data = pd.read_csv("hbd_model_data_classification.csv")

In [None]:
hdb_class_data.head()

### Check distribution of classes

In [None]:
hdb_class_data.groupby("over_under_classification").count()

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_title("Distribution of Valuation Classifications")

sns.countplot(x=hdb_class_data["over_under_classification"])

### Define Independent and Target Variables

In [None]:
# declare independent and dependent variables
X = hdb_class_data.drop(columns=['over_under_classification'])
y = hdb_class_data[['over_under_classification']]

In [None]:
from sklearn.preprocessing import StandardScaler

# initiatie standard scalers for x variables
sc_x = StandardScaler()

# scale x
X = sc_x.fit_transform(X)

### Split Data into Training and Test Sets

In [None]:
#Split the dataset into training and testing data
from sklearn.model_selection import train_test_split

# split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Train Logistic Regression Model

In [None]:
logreg_clf = LogisticRegression(random_state=42)
logreg_clf.fit(X_train, y_train)

In [None]:
print(logreg_clf.coef_)

In [None]:
print(logreg_clf.intercept_)

### Model Evaluation

In [None]:
y_pred = logreg_clf.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix

# generate confusion matrix for log reg
cnf = confusion_matrix(y_test, y_pred)
print("Confusion Matrix: \n", cnf)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# Find accuracy, precision, recall, and f1 score
asr = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy: " + str(asr))
print("F1: " + str(f1))
print("Precision: " + str(precision))
print("Recall: " + str(recall))