# Binary Classification 

In [None]:
import os 
import json
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from utils import *
from img_utils import *

# Loading Data 

In [None]:
path = "../nybolig-scrape/output"
houses_df = data_to_DF(path, max_houses=1000)
houses_df = houses_df[(houses_df['postal_code'] >= 1000) & (houses_df['postal_code'] <= 2920)]
houses_df = houses_df[houses_df['type'] == 'ejerlejlighed']
print("Number of datapoints of type 'Ejerlejlighed': ", len(houses_df))
display(houses_df)

# Setting up splits 

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into train, validation and test sets with a 60-20-20 ratio
train_df, test_df = train_test_split(houses_df, test_size=0.2, random_state=0)
train_df, valid_df = train_test_split(train_df, test_size=0.20, random_state=0)

# As most of the original images are 2100x1400, we resize them to 448*x
target_width = 224*3
target_height = 224*3

resize = True
gray_scale = False
threshhold = True

train_images = preprocess_images(train_df, "image_floorplan", target_width, target_height, resize, gray_scale, threshhold)
validation_images = preprocess_images(valid_df, "image_floorplan", target_width, target_height, resize, gray_scale, threshhold)
test_images = preprocess_images(test_df, "image_floorplan", target_width, target_height, resize, gray_scale, threshhold)

median = houses_df["price"].median()
train_labels = train_df["price"].apply(lambda x: 0 if x > median else 1)
valid_labels = valid_df["price"].apply(lambda x: 0 if x > median else 1)
test_labels = test_df["price"].apply(lambda x: 0 if x > median else 1)

## Model Fitting

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization
from tensorflow.keras.layers import Dropout, Flatten, Dense

input_shape = train_images[0].shape

binary_model = Sequential()

# Convolutional layers with batch normalization and dropout
binary_model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
binary_model.add(BatchNormalization())
binary_model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
binary_model.add(BatchNormalization())
binary_model.add(MaxPooling2D(strides=(2, 2)))
binary_model.add(Dropout(0.25))

binary_model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
binary_model.add(BatchNormalization())
binary_model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
binary_model.add(BatchNormalization())
binary_model.add(MaxPooling2D(strides=(2, 2)))
binary_model.add(Dropout(0.25))

# Flatten and dense layers with dropout
binary_model.add(Flatten())
binary_model.add(Dense(512, activation='relu'))
binary_model.add(Dropout(0.25))
binary_model.add(Dense(1024, activation='relu'))
binary_model.add(Dropout(0.4))

# Output layer with sigmoid activation for binary prediction
binary_model.add(Dense(1, activation='sigmoid'))

# Compile the model
binary_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

binary_model.fit(train_images, train_labels, validation_data=(validation_images, valid_labels), epochs=8, batch_size=32)
binary_model.save("binary_model")

## Binary Model: Evaluation

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns

predicted_test_labels = binary_model.predict(test_images)
actual_test_labels = [test_df['price'].apply(lambda x: 0 if x >= median else 1)]

# Print the accuracy
print(f"Accuracy: {accuracy_score(actual_test_labels, predicted_test_labels):.2f}")

# Print the classification report
print(classification_report(actual_test_labels, predicted_test_labels))

# Plot the confusion matrix
cm = confusion_matrix(actual_test_labels, predicted_test_labels)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()