In [1]:
import os
import pickle
import numpy as np
import datetime
import json
from multiprocessing import Process
from multiprocess import Process

import keras
from tensorflow.keras.utils import plot_model
from tensorflow.keras.applications import VGG16, MobileNetV2, MobileNetV3Small

from models import *
from utils import regression_stats
from img_utils import data_to_df, preprocess_images, set_gpu, set_cpu

import matplotlib.pyplot as plt
%reload_ext autoreload
%autoreload 2

2024-04-30 07:39:04.809017: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [2]:
from img_utils import data_to_df
#try reloading the module
IMAGE_WIDTH: int = 224
IMAGE_HEIGHT: int = 224


# Load Data
train_1_path: str = "output/train/train_1"
train_2_path: str = "output/train/train_2"
valid_path: str = "output/valid"
test_path: str = "output/test"

# train_1_path: str = "output/train/train_1"
# train_2_path: str = "output/train/train_2"
# valid_path: str = "output/valid"
# test_path: str = "output/test"

train1_df, train2_df, valid_df, test_df = data_to_df(
    [train_1_path, train_2_path, valid_path, test_path], preprocess=True
)

display(train1_df.head())
# Print the lenghts of the datasets
print("Length of datasets:")
print(f"Train 1: {len(train1_df)}")
print(f"Train 2: {len(train2_df)}")
print(f"Valid: {len(valid_df)}")
print(f"Test: {len(test_df)}")

# TODO: Would be better with this format
# train_images: np.array = preprocess_images(
#     train1_df["image_floorplan"], IMAGE_WIDTH, IMAGE_HEIGHT, True, False, False
# )


#### Train Set 1 ####
train1_features = train1_df.drop(columns=["image_floorplan", "price"])
train1_images: np.array = preprocess_images(
    train1_df, "image_floorplan", IMAGE_WIDTH, IMAGE_HEIGHT, True, False, False
)
train1_prices: np.array = train1_df["price"].values


#### Train Set 2 ####
train2_features = train2_df.drop(columns=["image_floorplan", "price"])
train2_images: np.array = preprocess_images(
    train2_df, "image_floorplan", IMAGE_WIDTH, IMAGE_HEIGHT, True, False, False
)
train2_prices: np.array = train2_df["price"].values


#### Validation Set ####
valid_features = valid_df.drop(columns=["image_floorplan", "price"])
valid_images: np.array = preprocess_images(
    valid_df, "image_floorplan", IMAGE_WIDTH, IMAGE_HEIGHT, True, False, False
)
valid_prices: np.array = valid_df["price"].values


#### Test Set ####
test_features = test_df.drop(columns=["image_floorplan", "price"])
test_images: np.array = preprocess_images(
    test_df, "image_floorplan", IMAGE_WIDTH, IMAGE_HEIGHT, True, False, False
)
test_prices: np.array = test_df["price"].values

Processing output/train/train_1: 100%|██████████| 311/311 [00:00<00:00, 462071.75it/s]
Processing output/train/train_2: 100%|██████████| 312/312 [00:00<00:00, 569213.94it/s]
Processing output/valid: 100%|██████████| 89/89 [00:00<00:00, 352828.98it/s]
Processing output/test: 100%|██████████| 178/178 [00:00<00:00, 511710.84it/s]
Preprocessing: 100%|██████████| 4/4 [00:00<00:00, 37.37it/s]


Unnamed: 0,postal_code,type,price,size,basement_size,rooms,year_built,year_rebuilt,energy_label,postal_avg_sqm_price,lat,lng,image_floorplan
0,64,0,1750000,40,0,2.0,1944.0,1944.0,4,32687.5,55.736966,12.513117,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
1,37,0,8500000,138,0,4.0,2005.0,2005.0,4,55737.75,55.698085,12.59447,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
2,37,0,13495000,176,0,5.0,1907.0,1907.0,5,55737.75,55.693852,12.587047,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
3,40,0,5995000,139,0,5.0,1935.0,2009.0,4,44946.75,55.654361,12.601795,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."
4,36,0,4495000,76,0,3.0,1932.0,1932.0,4,50502.0,55.691502,12.529448,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."


Length of datasets:
Train 1: 292
Train 2: 290
Valid: 84
Test: 163


In [17]:
def save_expected_predicted(test_prices, test_predictions, img_dir):
        #Set X and Y axis to [0, 9.000.000]
    #plt.xlim(0, 9999999)
    #plt.ylim(0, 9999999)
    plt.scatter(test_prices, test_predictions)
    plt.xlabel("Expected Price")
    plt.ylabel("Predicted Price")
    plt.title("Expected vs Predicted Price")
    try: 
        plt.plot([min(test_prices), max(test_prices)], [min(test_prices), max(test_prices)], color='red')
    except:
        pass
    plt.savefig(f"{img_dir}/expected_vs_predicted.png")
    plt.close()

def save_residuals(test_prices, test_predictions, img_dir):
    residuals = test_prices - test_predictions.reshape(-1)
    plt.scatter(test_predictions, residuals)
    try:
        plt.hlines(y=0, xmin=test_prices.min(), xmax=test_prices.max(), colors="r")
    except:
        pass
    plt.xlabel("Expected Price")
    plt.ylabel("Residuals")
    plt.title("Residuals")
    plt.savefig(f"{img_dir}/residuals.png")
    plt.close()

def get_saliency_map(model, image):
    image = np.expand_dims(image, axis=0)
    image = image / 255.0
    image = image.astype(np.float32)
    image = tf.convert_to_tensor(image)
    with tf.GradientTape() as tape:
        tape.watch(image)
        prediction = model(image)
    gradients = tape.gradient(prediction, image)
    gradients = tf.squeeze(gradients)
    gradients = tf.reduce_max(gradients, axis=-1)
    gradients = gradients.numpy()
    gradients = (gradients - np.min(gradients)) / (np.max(gradients) - np.min(gradients))
    return gradients

def save_worst_best_predictions(model, test_predictions, test_prices, test_images, img_dir):
    residuals = test_prices - test_predictions.reshape(-1)
    distances = np.abs(test_prices - test_predictions.reshape(-1))
    worst_predictions = np.argsort(distances)[-8:]
    best_predictions = np.argsort(distances)[:8]
    test_images = np.array(test_images)
    for i, idx in enumerate(worst_predictions):
        image = test_images[idx]
        price = test_prices[idx]
        prediction = test_predictions[idx]
        residual = residuals[idx]
        plt.imshow(image)
        textstr = '\n'.join((
            f"Price: {price}",
            f"Predicted Price: {prediction}",
            f"Residual: {residual}"
        ))
        plt.text(0.01, 0.99, textstr, fontsize=10, transform=plt.gcf().transFigure, verticalalignment='top')
        plt.axis("off")
        plt.savefig(f"{img_dir}/worst_{i}.png")
        plt.close()
        
        saliency_map = get_saliency_map(model, image)
        plt.imshow(saliency_map, cmap="hot")
        plt.axis("off")
        plt.savefig(f"{img_dir}/worst_saliency_map_{i}.png")
        plt.close()
        
    for i, idx in enumerate(best_predictions):
        image = test_images[idx]
        price = test_prices[idx]
        prediction = test_predictions[idx]
        residual = residuals[idx]
        plt.imshow(image)
        textstr = '\n'.join((
            f"Price: {price}",
            f"Predicted Price: {prediction}",
            f"Residual: {residual}"
        ))
        plt.text(0.01, 0.99, textstr, fontsize=10, transform=plt.gcf().transFigure, verticalalignment='top')
        plt.axis("off")
        plt.savefig(f"{img_dir}/best_{i}.png")
        plt.close()
        saliency_map = get_saliency_map(model, image)
        plt.imshow(saliency_map, cmap="hot")
        plt.axis("off")
        plt.savefig(f"{img_dir}/best_saliency_map_{i}.png")
        plt.close()

def save_features_importance(feature_importance, img_dir):
    #sort the feature_importance dict by value
    feature_importance = {k: v for k, v in sorted(feature_importance.items(), key=lambda item: item[1], reverse=True)}
    #add percentages to the bars
    plt.bar(feature_importance.keys(), feature_importance.values())
    #plt.bar_label = feature_importance.values()
    plt.title('Feature Importance')
    #Remove y-labels
    plt.ylabel('')
    plt.xticks(rotation=90)
    #Zoom out so that text is visible 
    plt.subplots_adjust(bottom=0.4)
    plt.savefig(f"{img_dir}/feature_importance.png")
    plt.close()

def save_worst_best(test_predictions, test_prices, test_features, model_dir):
    #Find the best predictions, and worst predictions. 
    #Save them in two dataframes. Save a latex of the dataframe in a txt-file 
    residuals = test_prices - test_predictions.reshape(-1)
    distances = np.abs(test_prices - test_predictions.reshape(-1))
    worst_predictions = np.argsort(distances)[-8:]
    best_predictions = np.argsort(distances)[:8]
    
    test_features_ = pd.DataFrame(test_features).copy()
    test_features_["Price"] = test_prices
    test_features_["Predicted Price"] = test_predictions
    test_features_["Residual"] = residuals
    test_features_['Absolute Distances'] = distances
    test_features_ = test_features_.sort_values(by="Absolute Distances", ascending=False)
    worst_df = test_features_.head(8)
    best_df = test_features_.tail(8)
    #save worst and best as latex in txt-file 
    worst_df.to_latex(f"{model_dir}/worst_predictions.txt")
    best_df.to_latex(f"{model_dir}/best_predictions.txt")
    
   


def save_reconstuctions(AE, test_predictions, test_prices, test_images, model_dir):
    n = 10
    reconstruction_errors = AE.calculate_ssim(test_images)
    best5 = np.argsort(reconstruction_errors)[:n]
    worst5 = np.argsort(reconstruction_errors)[::-1][:n]
    print(best5)
    print(worst5)
    for i in range(n):
        idx = best5[i]
        image = test_images[idx]
        encoded_img = AE.encode(np.expand_dims(image, axis=0))
        decoded_img = AE.decode(encoded_img)
        encoded_img = np.squeeze(encoded_img)
        decoded_img = np.squeeze(decoded_img)
        #Turn decoded_img into intergers
        decoded_img = decoded_img.astype(int)
        fix, ax = plt.subplots(1, 2, figsize=(15, 5))
        ax[0].imshow(image)
        ax[0].set_title("Original Image")
        ax[1].imshow(decoded_img)
        ax[1].set_title("Reconstructed Image")
        #Set overall title as the price vs. the predicted price
        price = test_prices[idx]
        predicted_price = test_predictions[idx]
        textstr = '\n'.join((
            f"Price: {price}",
            f"Predicted Price: {predicted_price}"
        ))
        plt.text(0.01, 0.99, textstr, fontsize=10, transform=plt.gcf().transFigure, verticalalignment='top')
        plt.savefig(f"{model_dir}/best_reconstruction_{i}.png")
        plt.close()
        

    for i in range(n):
        idx = worst5[i]
        image = test_images[idx]
        encoded_img = AE.encode(np.expand_dims(image, axis=0))
        decoded_img = AE.decode(encoded_img)
        encoded_img = np.squeeze(encoded_img)
        decoded_img = np.squeeze(decoded_img)
        #Turn decoded_img into intergers
        decoded_img = decoded_img.astype(int)
        fix, ax = plt.subplots(1, 2, figsize=(15, 5))
        ax[0].imshow(image)
        ax[0].set_title("Original Image")
        ax[1].imshow(decoded_img)
        ax[1].set_title("Reconstructed Image")
        price = test_prices[idx]
        predicted_price = test_predictions[idx]
        textstr = '\n'.join((
            f"Price: {price}",
            f"Predicted Price: {predicted_price}"
        ))
        plt.text(0.01, 0.99, textstr, fontsize=10, transform=plt.gcf().transFigure, verticalalignment='top')
        plt.savefig(f"{model_dir}/worst_reconstruction_{i}.png")
        plt.close()


def save_model_and_evaluate(
    model: object,
    fit_history: object,
    test_images: np.array,
    test_features: np.array,
    test_prices: np.array,
    model_dir: str,
    model_type:str
):
    if model_type == 'RF':
        print("Saving Model...")
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        with open(f"{model_dir}/model", "wb") as file_pi:
            pickle.dump(model, file_pi)
        test_predictions = model.predict(test_features)
    
    if model_type == "CNN":
        # Save Model
        print("Saving Model...")
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        model.save(f"{model_dir}/model")
        # Save Training History
        with open(f"{model_dir}/history", "wb") as file_pi:
            pickle.dump(fit_history.history, file_pi)
        test_predictions = model.predict(test_images)
        #Save Model Architecture
        #plot_model(model, to_file=f"{model_dir}/model_architecture.png", show_shapes=True, show_layer_names=True, show_dtype=True, rankdir="TB", expand_nested=False, dpi=96)
        img = plot_model(model, to_file=f"{model_dir}/architecture.png", show_shapes=True, show_layer_names=True, show_dtype=True, rankdir="TB", expand_nested=False, dpi=96)


    if model_type == 'CNN_RF' or model_type == 'CNN_AE_RF':
        print("Saving Model...")
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        with open(f"{model_dir}/model", "wb") as file_pi:
            pickle.dump(model, file_pi)
        test_predictions = model.predict(test_images, test_features)
        

    # Evaluate Model
    print("Evaluating Model...")
    r2, mae, percentage_error, mse = regression_stats(test_prices, test_predictions)

    try:
        feature_importance = model.feature_importances_
        if model_type == "RF":
            feature_importance = dict(zip(test_features.columns, feature_importance))
    except AttributeError:
        print("Cant find feature_importance")
        feature_importance = None

    # Load existing evaluation data
    evaluation_file_path = f"{model_dir}/evaluation.json"
    evaluation_data = {}
    if os.path.exists(evaluation_file_path):
        with open(evaluation_file_path, "r") as json_file:
            evaluation_data = json.load(json_file)

    # Add new evaluation data
    new_evaluation = {
        "Timestamp": str(datetime.datetime.now()),
        "R2": r2,
        "MAE": mae,
        "Percentage Error": percentage_error,
        "MSE": mse,
        "Feature Importances": (feature_importance),
    }
    evaluation_data[len(evaluation_data)] = new_evaluation

    # Save updated evaluation data
    with open(evaluation_file_path, "w") as json_file:
        json.dump(evaluation_data, json_file, indent=4)

    # Compute median evaluation values from all instances
    r2_values = [evaluation_data[key]["R2"] for key in evaluation_data]
    mae_values = [evaluation_data[key]["MAE"] for key in evaluation_data]
    percentage_error_values = [
        evaluation_data[key]["Percentage Error"] for key in evaluation_data
    ]
    mse_values = [evaluation_data[key]["MSE"] for key in evaluation_data]

    median_evaluation_data = {
        "R2": np.median(r2_values),
        "MAE": np.median(mae_values),
        "Percentage Error": np.median(percentage_error_values),
        "MSE": np.median(mse_values),
    }

    with open(f"{model_dir}/median_evaluation.json", "w") as json_file:
        json.dump(median_evaluation_data, json_file, indent=4)

    print("\nModel Evaluation:")
    print(new_evaluation)
    print("\nMedian Evaluation:")
    print(median_evaluation_data)
    print("Feauter Importance...")
    print(feature_importance)

    # Images (Create or open existing folder)
    if not os.path.exists(f"{model_dir}/images"):
        os.makedirs(f"{model_dir}/images")
    img_dir = f"{model_dir}/images"
    
    save_expected_predicted(test_prices, test_predictions, img_dir)
    save_residuals(test_prices, test_predictions, img_dir)
    
    if model_type == 'CNN':
        print("\nSaving Best and Worst Image Predictions")
        save_worst_best_predictions(model, test_predictions, test_prices, test_images, img_dir)
    
    if model_type != 'CNN': 
        print("\nSaving Feature Importance")
        save_features_importance(feature_importance, img_dir)

    if model_type == 'CNN_AE_RF':
        print("\nSaving Reconstructions")
        save_reconstuctions(model.autoEncoder_, test_predictions, test_prices, test_images, img_dir)

    save_worst_best(test_predictions, test_prices, test_features, model_dir)
    print("\nDone!")


def train_save_model(
    model_func: object,
    args: tuple,
    test_images: np.array,
    test_features: np.array,
    test_prices: np.array,
    model_dir: str,
    use_gpu: bool,
    model_type:str
):
    if use_gpu:
        set_gpu()
    else:
        set_cpu()

    if model_type == "CNN":
        model, fit_history = model_func(*args)
    if model_type == 'RF':
        model = model_func(*args)
        fit_history = None
    if model_type == 'CNN_RF' or model_type == 'CNN_AE_RF':
        model = model_func(*args)
        fit_history = None
    save_model_and_evaluate(model, fit_history, test_images, test_features, test_prices, model_dir, model_type)


def train_save_models(
    model_func: object,
    args: tuple,
    test_images: np.array,
    test_prices: np.array,
    model_dir: str,
    use_gpu: bool,
):
    if use_gpu:
        set_gpu()
    else:
        set_cpu()

    models, fit_histories = model_func(*args)
    for model_idx, (model, fit_history) in enumerate(zip(models, fit_histories)):
        save_model_and_evaluate(
            model, fit_history, test_images, test_prices, f"{model_dir}_{model_idx}"
        )

# Running on CPU

In [4]:
MODELS_PATH: str = "./models"
USE_GPU: bool = False

### RF

In [5]:
#set postal_av_sqm_price as the last column 
train2_features = train2_features[['postal_code', 'type', 'size', 'basement_size', 'rooms', 'year_built','year_rebuilt', 'energy_label', 'postal_avg_sqm_price', 'lat', 'lng' ]]

In [6]:
display(train2_features.head(1))
display(train1_features.head(1))
display(test_features.head(1))

Unnamed: 0,postal_code,type,size,basement_size,rooms,year_built,year_rebuilt,energy_label,postal_avg_sqm_price,lat,lng
0,49,0,111,0,3.0,2020.0,2020.0,0,0.0,55.545454,12.234008


Unnamed: 0,postal_code,type,size,basement_size,rooms,year_built,year_rebuilt,energy_label,postal_avg_sqm_price,lat,lng
0,64,0,40,0,2.0,1944.0,1944.0,4,32687.5,55.736966,12.513117


Unnamed: 0,postal_code,type,size,basement_size,rooms,year_built,year_rebuilt,energy_label,postal_avg_sqm_price,lat,lng
0,12,0,56,0,2.0,1897,1897.0,4,57450.25,55.667935,12.547432


In [7]:
from models import RF
TYPE = "RF"
MODEL_NAME: str = "RF"
FUNCTION: object = RF
ARGS: tuple = (
    train2_features,
    train2_prices,
)
train_save_model(FUNCTION, ARGS, test_images, test_features, test_prices, f"{MODELS_PATH}/{MODEL_NAME}", USE_GPU, TYPE)

Setting CPU
Saving Model...
Evaluating Model...

Model Evaluation:
{'Timestamp': '2024-04-30 07:40:55.466485', 'R2': 0.8443016550932964, 'MAE': 652490.8588957055, 'Percentage Error': 14.57778872142314, 'MSE': 1010007370065.6442, 'Feature Importances': {'postal_code': 0.046806393235473774, 'type': 0.0, 'size': 0.7396949007138165, 'basement_size': 0.0, 'rooms': 0.010049919376703194, 'year_built': 0.02538667049740317, 'year_rebuilt': 0.03623272120740501, 'energy_label': 0.013073504497568486, 'postal_avg_sqm_price': 0.07807195163271148, 'lat': 0.016639173988125255, 'lng': 0.034044764850793205}}

Median Evaluation:
{'R2': 0.8303841292303515, 'MAE': 992721.2216782337, 'Percentage Error': 27.43139309991981, 'MSE': 1967624224234.4958}
Feauter Importance...
{'postal_code': 0.046806393235473774, 'type': 0.0, 'size': 0.7396949007138165, 'basement_size': 0.0, 'rooms': 0.010049919376703194, 'year_built': 0.02538667049740317, 'year_rebuilt': 0.03623272120740501, 'energy_label': 0.013073504497568486,

### CNN

In [None]:
from models import CNN_model1
TYPE = 'CNN'
MODEL_NAME: str = "MobileNetV3_Small_1"
FUNCTION: object = CNN_model1
ARGS: tuple = (
    MobileNetV3Small,
    train1_images,
    train1_prices,
    valid_images,
    valid_prices,
    [
    #Type 1
    Flatten(),
    Dense(512, activation="relu", kernel_regularizer=regularizers.l1(0.2)),
    # BatchNormalization(),
    layers.Dropout(0.2),
    Dense(256, activation="relu", kernel_regularizer=regularizers.l1(0.1)),
    layers.Dropout(0.1),
    Dense(128, activation="relu", kernel_regularizer=regularizers.l1(0.1)),
    layers.Dropout(0.1),
    Dense(64, activation="relu"),
    Dense(1, activation="linear"),
    ],
)
train_save_model(FUNCTION, ARGS, test_images, test_features, test_prices, f"{MODELS_PATH}/{MODEL_NAME}", USE_GPU, TYPE)

### CNN RF

In [8]:
img_model =  keras.models.load_model(f"{MODELS_PATH}/MobileNetV2/model")

In [18]:
from models import CNN_RF_model
TYPE = 'CNN_RF'
MODEL_NAME: str = "MobileNetV2_RF"
FUNCTION: object = CNN_RF_model
ARGS: tuple = (
    img_model, # keras.models.load_model(f"{MODELS_PATH}/MobileNetV2/model")
    train2_images,
    train2_features,
    train2_prices,
)
train_save_model(FUNCTION, ARGS, test_images, test_features,  test_prices, f"{MODELS_PATH}/{MODEL_NAME}", USE_GPU, TYPE)

Setting CPU
Saving Model...
Evaluating Model...

Model Evaluation:
{'Timestamp': '2024-04-30 08:56:52.974259', 'R2': 0.8544921508959845, 'MAE': 656923.5582822086, 'Percentage Error': 14.830990102535429, 'MSE': 943902133869.9386, 'Feature Importances': {'image_predictions': 0.015636355668632005, 'postal_code': 0.043718193072296706, 'type': 0.0, 'size': 0.7359901207398921, 'basement_size': 0.0, 'rooms': 0.00805295724476705, 'year_built': 0.03273396136580977, 'year_rebuilt': 0.02477864542461593, 'energy_label': 0.018258429424106534, 'postal_avg_sqm_price': 0.07480951252204446, 'lat': 0.019344562045315837, 'lng': 0.02667726249251954}}

Median Evaluation:
{'R2': 0.8329630754490587, 'MAE': 981157.2788288998, 'Percentage Error': 27.45060372348743, 'MSE': 1937707229852.4219}
Feauter Importance...
{'image_predictions': 0.015636355668632005, 'postal_code': 0.043718193072296706, 'type': 0.0, 'size': 0.7359901207398921, 'basement_size': 0.0, 'rooms': 0.00805295724476705, 'year_built': 0.0327339613




Saving Feature Importance

Done!


### CN_AE_RF

In [19]:
from models import autoEncoder
AE = autoEncoder(train2_images, latent_dim=128)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [20]:
from models import CNN_AE_RF_model
#img_model =  keras.models.load_model(f"{MODELS_PATH}/MobileNetV2/model")
TYPE = 'CNN_AE_RF'
MODEL_NAME: str = "MobileNetV2_AE_RF"
FUNCTION: object = CNN_AE_RF_model
ARGS: tuple = (
    img_model, #keras.models.load_model(f"{MODELS_PATH}/MobileNetV2/model")
    AE, #Or none for training it from scratch
    train2_images,
    train2_features,
    train2_prices,
)
train_save_model(FUNCTION, ARGS, test_images, test_features,  test_prices, f"{MODELS_PATH}/{MODEL_NAME}", USE_GPU, TYPE)

Setting CPU
Saving Model...
Evaluating Model...

Model Evaluation:
{'Timestamp': '2024-04-30 09:12:16.515559', 'R2': 0.8487168225509731, 'MAE': 667752.2085889571, 'Percentage Error': 15.332724598655286, 'MSE': 981366399765.0306, 'Feature Importances': {'image_predictions': 0.012469668380009177, 'reconstruction_error': 0.017205214861906053, 'postal_code': 0.04139172099648684, 'type': 0.0, 'size': 0.7347626649593174, 'basement_size': 0.0, 'rooms': 0.005898332643423903, 'year_built': 0.03389545760217403, 'year_rebuilt': 0.023105984610404806, 'energy_label': 0.014480188279061475, 'postal_avg_sqm_price': 0.07435613766932983, 'lat': 0.015460870634686649, 'lng': 0.026973759363199953}}

Median Evaluation:
{'R2': 0.8283218744225571, 'MAE': 890553.6065378301, 'Percentage Error': 21.67496515639211, 'MSE': 1494262171665.9766}
Feauter Importance...
{'image_predictions': 0.012469668380009177, 'reconstruction_error': 0.017205214861906053, 'postal_code': 0.04139172099648684, 'type': 0.0, 'size': 0.734




Saving Feature Importance

Saving Reconstructions
[ 67 120 153  25 124  96  49  18  29  78]
[ 51  21 146  99  16 119 150  85 107  65]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).




Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).



Done!


### N-CNN

In [None]:
from models import N_CNN_RF_model
from keras.applications import MobileNetV3Small
TYPE = 'CNN_RF'
MODEL_NAME: str = "N_CNN_MobileNetV2_RF"
FUNCTION: object = N_CNN_RF_model
ARGS: tuple = (
    4,
    MobileNetV3Small,
    np.concatenate((train1_images, train2_images), axis=0),
    pd.concat((train1_features, train2_features), axis=0),

    np.concatenate((train1_prices, train2_prices), axis=0),
)
train_save_model(FUNCTION, ARGS, test_images, test_features,  test_prices, f"{MODELS_PATH}/{MODEL_NAME}", USE_GPU, TYPE)

# Legacy

<!-- decoded_images = AE.decode(AE.encode(test_images))
from skimage.metrics import structural_similarity as ssim

img_shape = train2_images.shape
print(img_shape)
def recon_err1(images, encoded_images):
  return tf.square(images - encoded_images)

def recon_err2(images, encoded_images): 
  return tf.reduce_mean(tf.square(images - encoded_images), axis=(1,2,3))

def recon_err3(images, encoded_images):
  normalized_images = images / 255.0
  normalized_encoded_images = encoded_images / 255.0 
  def calc_ssim(normalized_img, normalized_encoded_img):
    return ssim(normalized_img, normalized_encoded_img, channel_axis=2, data_range=1)
  return [calc_ssim(img, encoded_img) for img, encoded_img in zip(normalized_images, normalized_encoded_images)]

#recon1 = recon_err1(test_images, decoded_images)
#recon2 = recon_err2(test_images, decoded_images)
recon3 = recon_err3(test_images, decoded_images)
print(recon3)
reconstructions_errors = recon3

#Get the reconstructions of the best and worst predictions
n = 10
best5 = np.argsort(reconstructions_errors)[:n]
worst5 = np.argsort(reconstructions_errors)[::-1][:n]
print(best5)
print(worst5)
for i in range(n):
    idx = best5[i]
    image = test_images[idx]
    print("Reconstruction Error", reconstructions_errors[idx])
    encoded_img = AE.encode(np.expand_dims(image, axis=0))
    decoded_img = AE.decode(encoded_img)
    encoded_img = np.squeeze(encoded_img)
    decoded_img = np.squeeze(decoded_img)
    #Turn decoded_img into intergers
    decoded_img = decoded_img.astype(int)
    fix, ax = plt.subplots(1, 2, figsize=(15, 5))
    ax[0].imshow(image)
    ax[0].set_title("Original Image")
    ax[1].imshow(decoded_img)
    ax[1].set_title("Reconstructed Image")
    plt.show()

print("\n\n\n\n")
for i in range(n):
    idx = worst5[i]
    image = test_images[idx]
    print("Reconstruction Error", reconstructions_errors[idx])
    encoded_img = AE.encode(np.expand_dims(image, axis=0))
    decoded_img = AE.decode(encoded_img)
    encoded_img = np.squeeze(encoded_img)
    decoded_img = np.squeeze(decoded_img)
    #Turn decoded_img into intergers
    decoded_img = decoded_img.astype(int)
    fix, ax = plt.subplots(1, 2, figsize=(15, 5))
    ax[0].imshow(image)
    ax[0].set_title("Original Image")
    ax[1].imshow(decoded_img)
    ax[1].set_title("Reconstructed Image")
    plt.show() -->