In [7]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Concatenate, Dropout, BatchNormalization, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.losses import Huber
from google.colab import drive
import pickle

# Mount Google Drive
drive.mount('/content/drive')

# Load your train_df
train_df = pd.read_csv('/content/drive/MyDrive/Modelling/train_df.csv')  # Change to your path

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# Load and preprocess image data
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    image = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size)
    image = tf.keras.preprocessing.image.img_to_array(image)
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Prepare image data and scalar features
image_dir = '/content/drive/MyDrive/Modelling/valid_images'
image_paths = []
scalar_features = []
labels = []

for index, row in tqdm(train_df.iterrows()):
    pothole_id = row['pothole_id']
    image_path = os.path.join(image_dir, f'{pothole_id}.jpg')

    if os.path.exists(image_path):
        image_paths.append(image_path)
        scalar_features.append([
            row['pothole_area_mm2'],
            row['pothole_length'],
            row['pothole_width'],
            row['mm_to_pixel_ratio']
        ])
        labels.append(row['bags_used_rounded'])

# Convert lists to numpy arrays
images = np.array([load_and_preprocess_image(path) for path in image_paths])
scalar_features = np.array(scalar_features)
labels = np.array(labels)

# Scale scalar features
scaler = StandardScaler()
scalar_features = scaler.fit_transform(scalar_features)

# Save the scaler
scaler_path = '/content/drive/MyDrive/Modelling/scaler.pkl'
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)

# Split the data into training and validation sets
X_train_images, X_val_images, X_train_scalar, X_val_scalar, y_train, y_val = train_test_split(
    images, scalar_features, labels, test_size=0.2, random_state=42
)

# Image augmentation with scale-preserving transformations
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    # No random zoom to preserve the scale
])

874it [00:00, 1775.78it/s]


In [8]:
def r2_score(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
    ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
    return 1 - ss_res / (ss_tot + tf.keras.backend.epsilon())

def create_model(input_shape_image, input_shape_scalar):
    # Image processing layers
    image_input = Input(shape=input_shape_image)
    x = data_augmentation(image_input)
    x = Conv2D(32, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = BatchNormalization()(x)

    # Scalar features processing layers
    scalar_input = Input(shape=input_shape_scalar)
    y = Dense(64, activation='relu')(scalar_input)
    y = Dropout(0.5)(y)
    y = BatchNormalization()(y)

    # Combine the outputs
    combined = Concatenate()([x, y])
    combined = Dense(64, activation='relu')(combined)
    combined = Dropout(0.5)(combined)
    combined = BatchNormalization()(combined)

    # Output layer with ReLU to prevent negative values
    output = Dense(1, activation='relu')(combined)

    # Create the model
    model = Model(inputs=[image_input, scalar_input], outputs=output)

    return model

# Model creation
input_shape_image = (224, 224, 3)
input_shape_scalar = (scalar_features.shape[1],)
model = create_model(input_shape_image, input_shape_scalar)

# Compile the model with Huber loss for robustness
model.compile(optimizer=Adam(learning_rate=1e-4), loss=Huber(), metrics=['mse', r2_score])

# Callbacks
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

reduce_lr_callback = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=1e-7,
    verbose=1
)

# Fit the model
history = model.fit(
    [X_train_images, X_train_scalar], y_train,
    validation_data=([X_val_images, X_val_scalar], y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping_callback, reduce_lr_callback],
    verbose=1
)

# Save the model
model.save('/content/drive/MyDrive/Modelling/own_CNN.keras')

Epoch 1/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 176ms/step - loss: 0.4584 - mse: 1.8848 - r2_score: -75.8476 - val_loss: 0.3288 - val_mse: 1.2247 - val_r2_score: -45.9996 - learning_rate: 1.0000e-04
Epoch 2/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 87ms/step - loss: 0.4738 - mse: 1.9592 - r2_score: -83.8022 - val_loss: 0.2570 - val_mse: 0.9832 - val_r2_score: -36.1951 - learning_rate: 1.0000e-04
Epoch 3/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 90ms/step - loss: 0.6094 - mse: 3.2712 - r2_score: -67.6413 - val_loss: 0.2384 - val_mse: 0.9088 - val_r2_score: -33.8293 - learning_rate: 1.0000e-04
Epoch 4/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 77ms/step - loss: 0.4865 - mse: 1.5286 - r2_score: -84.9013 - val_loss: 0.2258 - val_mse: 0.8644 - val_r2_score: -32.2760 - learning_rate: 1.0000e-04
Epoch 5/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 75ms/step - loss:

In [None]:
model_save_path_h5 = '/content/drive/MyDrive/Modelling/own_CNN.h5'

# Save the model in HDF5 format
model.save(model_save_path_h5)

print(f"Model saved to {model_save_path_h5}")