In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import xgboost as xgb
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization, RandomFlip, RandomRotation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
dir_images = "satellite_images" 
image_map = {}
if os.path.exists(dir_images):
    for f in os.listdir(dir_images):
        if f.endswith(".png"): image_map[f.replace(".png", "")] = os.path.join(dir_images, f)

df = pd.read_excel("processed_housing_dataset.xlsx")
df["id_str"] = df["id"].astype(str)
df = df[df["id_str"].isin(image_map.keys())].copy().sort_values("id").reset_index(drop=True)

all_paths = np.array([image_map[pid] for pid in df["id_str"]])
y_log = np.log1p(df["price"].values)

# Create Tabular Features
cols_drop = ["id", "id_str", "date", "price"]
X_tab = df.drop(columns=[c for c in cols_drop if c in df.columns]).select_dtypes(include=[np.number]).fillna(0).values


In [None]:
class ImageGenerator(tf.keras.utils.Sequence):
    def __init__(self, paths, labels, batch_size=32, shuffle=True):
        self.paths = paths
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(paths))
        if self.shuffle: np.random.shuffle(self.indices)
    
    def __len__(self): return int(np.ceil(len(self.paths) / self.batch_size))
    
    def __getitem__(self, index):
        batch_idx = self.indices[index*self.batch_size : (index+1)*self.batch_size]
        batch_paths = self.paths[batch_idx]
        batch_lbls = self.labels[batch_idx]
        
        images = []
        for p in batch_paths:
            img = cv2.imread(p)
            if img is None: img = np.zeros((224, 224, 3))
            else:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (224, 224))
                img = preprocess_input(img)
            images.append(img)
        return np.array(images), np.array(batch_lbls)
    
    def on_epoch_end(self): 
        if self.shuffle: np.random.shuffle(self.indices)

train_gen = ImageGenerator(all_paths, residuals, batch_size=32, shuffle=True)

def build_cnn():
    aug = tf.keras.Sequential([RandomFlip("horizontal_and_vertical"), RandomRotation(0.2)])
    base = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
    
    # Unfreeze top 50 layers for fine-tuning
    for layer in base.layers[:-50]: layer.trainable = False
    
    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = aug(inputs)
    x = base(x, training=True)
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='linear')(x) # Predicts residual
    
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(1e-4), loss='mse')
    return model

cnn = build_cnn()
cnn.fit(train_gen, epochs=6, verbose=1)