In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Concatenate, Dropout, BatchNormalization
from tensorflow.keras.applications import MobileNetV2, ResNet50
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os

Mounted at /content/drive


In [4]:
images_path = '/content/drive/MyDrive/Contrastive Learning/contrastive_learning_images'
labels_path = '/content/drive/MyDrive/Contrastive Learning/contrastive_learning_labels.csv'

# Load the labels
labels_df = pd.read_csv(labels_path)

# Split into training and validation sets
train_df, val_df = train_test_split(labels_df, test_size=0.2, random_state=42)

# Standardize the scalar features
scaler = StandardScaler()
train_df[['pothole_area_mm2', 'mm_to_pixel_ratio']] = scaler.fit_transform(train_df[['pothole_area_mm2', 'mm_to_pixel_ratio']])
val_df[['pothole_area_mm2', 'mm_to_pixel_ratio']] = scaler.transform(val_df[['pothole_area_mm2', 'mm_to_pixel_ratio']])

# Function to preprocess and load the image
def preprocess_image(image_path):
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = img_array / 255.0  # Normalize to [0, 1]
    return img_array

# Image data generator with augmentation (excluding scale transformations)
datagen = ImageDataGenerator(
    rotation_range=10,  # Less aggressive rotation
    width_shift_range=0.02,  # Smaller shifts
    height_shift_range=0.02,  # Smaller shifts
    shear_range=0.05,  # Reduced shear
    horizontal_flip=True,  # Flipping is okay
    fill_mode='nearest'
)

# Custom R² metric
def r2_metric(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    SS_res = tf.reduce_sum(tf.square(y_true - y_pred))
    SS_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
    return 1 - SS_res / (SS_tot + tf.keras.backend.epsilon())

def logcosh_loss(y_true, y_pred):
    return tf.keras.backend.mean(tf.math.log(tf.math.cosh(y_pred - y_true)), axis=-1)

# Build a simplified model using MobileNetV2
def build_simple_model():
    base_model = tf.keras.applications.EfficientNetB0(include_top=False, input_shape=(224, 224, 3))

    # Unfreeze some layers for fine-tuning
    for layer in base_model.layers[-10:]:
        layer.trainable = True

    image_input = Input(shape=(224, 224, 3), name='image_input')
    scalar_input = Input(shape=(2,), name='scalar_input')

    # Process the image through the CNN
    x = base_model(image_input)
    x = GlobalAveragePooling2D()(x)

    # Concatenate the scalar input
    x = Concatenate()([x, scalar_input])

    # Add even more simplified fully connected layers
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    x = Dense(32, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    output = Dense(1, activation='relu')(x)  # Output layer for regression

    model = Model(inputs=[image_input, scalar_input], outputs=output)

    # Compile the model with a lower learning rate
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss=logcosh_loss,
              metrics=[r2_metric])


    return model

# Custom data generator
def data_generator(df, images_path, batch_size, augment=False):
    num_samples = len(df)
    while True:  # Loop forever so the generator never terminates
        df = df.sample(frac=1).reset_index(drop=True)  # Shuffle the dataframe each epoch
        for offset in range(0, num_samples, batch_size):
            batch_df = df[offset:offset+batch_size]

            X_images = []
            X_scalars = []
            y = []

            for _, row in batch_df.iterrows():
                image_path = os.path.join(images_path, f"{row['pothole_id']}.jpg")
                image = preprocess_image(image_path)
                scalar_features = np.array([row['pothole_area_mm2'], row['mm_to_pixel_ratio']])

                X_images.append(image)
                X_scalars.append(scalar_features)
                y.append(row['bags_used'])

            X_images = np.array(X_images)
            X_scalars = np.array(X_scalars)
            y = np.array(y)

            if augment:
                X_images, y = next(datagen.flow(X_images, y, batch_size=batch_size, shuffle=False))

            yield ({'image_input': X_images, 'scalar_input': X_scalars}, y)

# Instantiate the simplified model
model = build_simple_model()

# Prepare data generators for training and validation
batch_size = 16
train_gen = data_generator(train_df, images_path, batch_size=batch_size, augment=True)
val_gen = data_generator(val_df, images_path, batch_size=batch_size, augment=False)

# Train the model
history = model.fit(
    train_gen,
    steps_per_epoch=len(train_df) // batch_size,
    validation_data=val_gen,
    validation_steps=len(val_df) // batch_size,
    epochs=500,  # Adjust as needed
    verbose=1,
    callbacks=[
        EarlyStopping(monitor='val_r2_metric', patience=100, restore_best_weights=True, mode='max', verbose=1),
        ModelCheckpoint('/content/drive/MyDrive/Contrastive Learning/bags_used_predictor_simple_huber.keras', monitor='val_loss', save_best_only=True, verbose=1)
    ]
)

# Save the final model
model.save('/content/drive/MyDrive/Contrastive Learning/bags_used_predictor_simple_final_huber.keras')

Epoch 1/500
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - loss: 0.6740 - r2_metric: -107.4085 
Epoch 1: val_loss improved from inf to 0.40503, saving model to /content/drive/MyDrive/Contrastive Learning/bags_used_predictor_simple_huber.keras
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m722s[0m 15s/step - loss: 0.6744 - r2_metric: -107.2201 - val_loss: 0.4050 - val_r2_metric: -50.4564
Epoch 2/500
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308ms/step - loss: 0.5971 - r2_metric: -105.6718
Epoch 2: val_loss improved from 0.40503 to 0.39307, saving model to /content/drive/MyDrive/Contrastive Learning/bags_used_predictor_simple_huber.keras
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 545ms/step - loss: 0.5975 - r2_metric: -105.6053 - val_loss: 0.3931 - val_r2_metric: -41.1537
Epoch 3/500
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 335ms/step - loss: 0.5797 - r2_metric: -71.0753
Epoch 3: val

In [4]:
model.save('/content/drive/MyDrive/Contrastive Learning/bags_used_predictor_final.keras')