In [10]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Concatenate, Dropout, BatchNormalization
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import Huber
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load your train_df
train_df = pd.read_csv('/content/drive/MyDrive/Modelling/train_df.csv')  # Change to your path

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
# Load and preprocess image data
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    image = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size)
    image = tf.keras.preprocessing.image.img_to_array(image)
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Prepare image data and scalar features
image_dir = '/content/drive/MyDrive/Modelling/valid_images'
image_paths = []
scalar_features = []
labels = []

for index, row in tqdm(train_df.iterrows()):
    pothole_id = row['pothole_id']
    image_path = os.path.join(image_dir, f'{pothole_id}.jpg')

    if os.path.exists(image_path):
        image_paths.append(image_path)
        scalar_features.append([
            row['pothole_area_mm2'],
            row['pothole_length'],
            row['pothole_width'],
            row['mm_to_pixel_ratio']
        ])
        labels.append(row['bags_used_rounded'])

# Convert lists to numpy arrays
images = np.array([load_and_preprocess_image(path) for path in image_paths])
scalar_features = np.array(scalar_features)
labels = np.array(labels)

# Scale scalar features
scaler = StandardScaler()
scalar_features = scaler.fit_transform(scalar_features)

# Save the scaler using pickle
scaler_path = '/content/drive/MyDrive/Modelling/scaler.pkl'
import pickle
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)

print(f"Scaler saved to {scaler_path}")

# Split the data into training and validation sets
X_train_images, X_val_images, X_train_scalar, X_val_scalar, y_train, y_val = train_test_split(
    images, scalar_features, labels, test_size=0.2, random_state=42
)

# Image augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

874it [00:00, 2623.28it/s]


Scaler saved to /content/drive/MyDrive/Modelling/scaler.pkl


In [15]:
import pickle
scaler_path = '/content/drive/MyDrive/Modelling/scaler.pkl'

# Save the scaler using pickle
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)

print(f"Scaler saved to {scaler_path}")

Scaler saved to /content/drive/MyDrive/Modelling/scaler.pkl


In [None]:
# Model creation
def create_model(input_shape_image, input_shape_scalar):
    # Base model: EfficientNetB7
    base_model = EfficientNetB7(weights='imagenet', include_top=False, input_shape=input_shape_image)
    base_model.trainable = False  # Freeze the base model

    # Image processing layers
    image_input = Input(shape=input_shape_image)
    x = base_model(image_input, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = BatchNormalization()(x)

    # Scalar features processing layers
    scalar_input = Input(shape=input_shape_scalar)
    y = Dense(128, activation='relu')(scalar_input)
    y = Dropout(0.5)(y)
    y = BatchNormalization()(y)

    # Combine the outputs
    combined = Concatenate()([x, y])
    combined = Dense(128, activation='relu')(combined)
    combined = Dropout(0.5)(combined)
    combined = BatchNormalization()(combined)

    # Output layer
    output = Dense(1, activation='relu')(combined)

    # Create the model
    model = Model(inputs=[image_input, scalar_input], outputs=output)

    return model

input_shape_image = (224, 224, 3)
input_shape_scalar = (scalar_features.shape[1],)
model = create_model(input_shape_image, input_shape_scalar)

# Use Huber loss with a custom delta value
delta = 1.0  # You can adjust this value
huber_loss = Huber(delta=delta)

# Compile the model with Huber loss
model.compile(optimizer=Adam(learning_rate=1e-4), loss='poisson', metrics=['mse'])

# Define callbacks
checkpoint_path = '/content/drive/MyDrive/Modelling/checkpoints/model_checkpoint.weights.h5'
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    verbose=1
)

early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=50,
    restore_best_weights=True
)

# Fit the model
history = model.fit(
    [X_train_images, X_train_scalar], y_train,
    validation_data=([X_val_images, X_val_scalar], y_val),
    epochs=200,
    batch_size=32,
    callbacks=[checkpoint_callback, early_stopping_callback],
    verbose=1
)

Epoch 1/200


In [14]:
model.save('/content/drive/MyDrive/Modelling/pothole_bags_model.h5')



In [None]:
model = create_model(input_shape_image, input_shape_scalar)

# Load the saved weights
checkpoint_path = '/content/drive/MyDrive/Modelling/checkpoints/model_checkpoint.weights.h5'
model.load_weights(checkpoint_path)

# Compile the model again (required after loading the weights)
model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse', metrics=['mse'])

checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    verbose=1
)

early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

# Continue training
history = model.fit(
    [X_train_images, X_train_scalar], y_train,
    validation_data=([X_val_images, X_val_scalar], y_val),
    epochs=50,  # or the number of additional epochs you want to run
    batch_size=32,
    callbacks=[checkpoint_callback, early_stopping_callback],
    verbose=1
)

Epoch 1/50
