In [4]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Concatenate, Dropout, BatchNormalization, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.losses import Huber
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import random
import pickle
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load your train_df
train_df = pd.read_csv('/content/drive/MyDrive/Modelling/train_df.csv')  # Change to your path


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    image = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size)
    image = tf.keras.preprocessing.image.img_to_array(image)
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Prepare image data and scalar features
image_dir = '/content/drive/MyDrive/Modelling/valid_images'
image_paths = []
scalar_features = []
labels = []

for index, row in train_df.iterrows():
    pothole_id = row['pothole_id']
    image_path = os.path.join(image_dir, f'{pothole_id}.jpg')

    if os.path.exists(image_path):
        image_paths.append(image_path)
        scalar_features.append([row['pothole_area_mm2']])
        labels.append(row['bags_used'])

# Convert lists to numpy arrays
images = np.array([load_and_preprocess_image(path) for path in image_paths])
scalar_features = np.array(scalar_features)
labels = np.array(labels)

# Scale the scalar features
scaler = StandardScaler()
scalar_features = scaler.fit_transform(scalar_features)

# Save the scaler for later use
scaler_path = '/content/drive/MyDrive/Modelling/scaler.pkl'
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)

In [7]:
# Assuming `images_train` and `images_val` are predefined
def augment_and_pair(image_1, image_2, label):
    # Augment both images in the pair
    augmented_1 = tf.image.random_flip_left_right(image_1)
    augmented_1 = tf.image.random_contrast(augmented_1, lower=0.8, upper=1.2)
    augmented_1 = tf.image.random_brightness(augmented_1, max_delta=0.2)

    augmented_2 = tf.image.random_flip_left_right(image_2)
    augmented_2 = tf.image.random_contrast(augmented_2, lower=0.8, upper=1.2)
    augmented_2 = tf.image.random_brightness(augmented_2, max_delta=0.2)

    return (augmented_1, augmented_2), label

# Create a dataset with both similar and dissimilar pairs
def create_pairs(images):
    pairs = []
    labels = []
    for i in range(len(images)):
        # Create a positive pair (similar)
        j = random.choice(range(len(images)))  # Pick a random index for a similar pair
        pairs.append((images[i], images[j]))
        labels.append(1)

        # Create a negative pair (dissimilar)
        k = random.choice(range(len(images)))  # Pick a random index for a dissimilar pair
        while k == i:
            k = random.choice(range(len(images)))
        pairs.append((images[i], images[k]))
        labels.append(0)

    return pairs, labels

# Generate pairs for training and validation
pairs_train, labels_train = create_pairs(images_train)
pairs_val, labels_val = create_pairs(images_val)

def simclr_dataset(pairs, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((pairs, labels))
    dataset = dataset.map(lambda x, y: augment_and_pair(x[0], x[1], y), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

batch_size = 32
simclr_train_ds = simclr_dataset(pairs_train, labels_train, batch_size)
simclr_val_ds = simclr_dataset(pairs_val, labels_val, batch_size)

In [None]:
# Improved Complex CNN Model
def create_complex_cnn_model(input_shape):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),

        layers.GlobalAveragePooling2D(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),  # Add dropout to reduce overfitting
        layers.Dense(256, activation='relu'),
    ])
    return model

def create_contrastive_model(input_shape):
    base_model = create_complex_cnn_model(input_shape)

    input_1 = layers.Input(shape=input_shape)
    input_2 = layers.Input(shape=input_shape)

    encoded_1 = base_model(input_1)
    encoded_2 = base_model(input_2)

    # Calculate the Euclidean distance between the two encodings
    distance = layers.Lambda(lambda tensors: tf.reduce_sum(tf.square(tensors[0] - tensors[1]), axis=1, keepdims=True))([encoded_1, encoded_2])

    # Convert the distance to a similarity score
    similarity = layers.Dense(1, activation='sigmoid')(distance)

    model = models.Model(inputs=[input_1, input_2], outputs=similarity)
    return model

input_shape = (224, 224, 3)
contrastive_model = create_contrastive_model(input_shape)

# Compile the model
contrastive_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks for early stopping and saving the best model
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = callbacks.ModelCheckpoint(
    filepath='/content/drive/MyDrive/Modelling/Checkpoints/best_contrastive_model.keras',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

# Train the model
contrastive_model.fit(
    simclr_train_ds,
    epochs=50,
    validation_data=simclr_val_ds,
    callbacks=[early_stopping, model_checkpoint],
    verbose=1
)

Epoch 1/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 692ms/step - accuracy: 0.4972 - loss: 0.6933
Epoch 1: val_loss improved from inf to 0.69319, saving model to /content/drive/MyDrive/Modelling/Checkpoints/best_contrastive_model.keras
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 912ms/step - accuracy: 0.4971 - loss: 0.6933 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 2/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 641ms/step - accuracy: 0.5093 - loss: 0.6931
Epoch 2: val_loss did not improve from 0.69319
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 685ms/step - accuracy: 0.5091 - loss: 0.6931 - val_accuracy: 0.5000 - val_loss: 0.6933
Epoch 3/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 634ms/step - accuracy: 0.4942 - loss: 0.6933
Epoch 3: val_loss did not improve from 0.69319
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 686ms/step - accuracy: 0.4944 - loss: 0