In [1]:
pip install shap grad-cam

Collecting grad-cam
  Downloading grad-cam-1.5.4.tar.gz (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ttach (from grad-cam)
  Downloading ttach-0.0.3-py3-none-any.whl.metadata (5.2 kB)
Downloading ttach-0.0.3-py3-none-any.whl (9.8 kB)
Building wheels for collected packages: grad-cam
  Building wheel for grad-cam (pyproject.toml) ... [?25l[?25hdone
  Created wheel for grad-cam: filename=grad_cam-1.5.4-py3-none-any.whl size=39680 sha256=c3a9e6f0e66703e3b2ea43cb561e92dde0fb7455d14dc90d0b98607913e6a499
  Stored in directory: /root/.cache/pip/wheels/50/b0/82/1f97b5348c7fe9f0ce0ba18497202cafa5dec4562bd5292680
Successfully built grad-cam
Installing collected packages: ttach, grad-cam
Successfully installed g

In [2]:
pip install tf-explain

Collecting tf-explain
  Downloading tf_explain-0.3.1-py3-none-any.whl.metadata (9.3 kB)
Downloading tf_explain-0.3.1-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tf-explain
Successfully installed tf-explain-0.3.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install keras-tuner

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Flatten, Dropout, BatchNormalization, Conv2D, MaxPooling2D, Add, LayerNormalization, Multiply
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from scipy.sparse import csr_matrix
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.optimizers.schedules import CosineDecay
from keras_tuner.tuners import RandomSearch  # Updated import
from tensorflow.keras.regularizers import l2
import joblib
import os

# Create output directory
output_dir = '/kaggle/working/models'
os.makedirs(output_dir, exist_ok=True)

# Load tabular data
tabular_data_path = '/kaggle/input/asifazmain/tabulardata1.csv'
tabular_data = pd.read_csv(tabular_data_path)

# Preprocessing Tabular Data
tabular_features = tabular_data.drop(columns=["faceImage"])
tabular_labels = tabular_data["Age(years)"]

# Normalize age labels
label_scaler = StandardScaler()
y_tabular_scaled = label_scaler.fit_transform(tabular_labels.values.reshape(-1, 1)).flatten()

# Save label scaler
joblib.dump(label_scaler, os.path.join(output_dir, 'label_scaler.pkl'))

# Extract features and labels
X_tabular = tabular_features.drop(columns=["Age(years)"])
y_tabular = y_tabular_scaled

# Handle categorical and numerical features
categorical_features = ["Blood Pressure (s/d)"]
numerical_features = [col for col in X_tabular.columns if col not in categorical_features]

# Preprocessing pipelines
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

# Combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numerical_transformer, numerical_features),
        ("cat", categorical_transformer, categorical_features)
    ],
    sparse_threshold=0
)

# Preprocess tabular data
X_tabular_preprocessed = preprocessor.fit_transform(X_tabular)

# Save preprocessor
joblib.dump(preprocessor, os.path.join(output_dir, 'preprocessor.pkl'))

# Convert sparse to dense if needed
if isinstance(X_tabular_preprocessed, csr_matrix):
    X_tabular_preprocessed = X_tabular_preprocessed.toarray()

# Image IDs
image_ids = tabular_data["faceImage"]

# Split data
X_tabular_train, X_tabular_test, y_train, y_test, image_ids_train, image_ids_test = train_test_split(
    X_tabular_preprocessed, y_tabular, image_ids, test_size=0.2, random_state=42
)

# Image preprocessing
image_data_path = '/kaggle/input/asifazmain/imagedata/imagedata/'
image_size = (128, 128)
batch_size = 4

train_image_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

test_image_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_image_generator = train_image_datagen.flow_from_dataframe(
    pd.DataFrame({'filename': image_ids_train.apply(lambda x: f'{x}.jpg')}),
    directory=image_data_path,
    x_col='filename',
    y_col=None,
    target_size=image_size,
    class_mode=None,
    batch_size=batch_size,
    shuffle=False
)

test_image_generator = test_image_datagen.flow_from_dataframe(
    pd.DataFrame({'filename': image_ids_test.apply(lambda x: f'{x}.jpg')}),
    directory=image_data_path,
    x_col='filename',
    y_col=None,
    target_size=image_size,
    class_mode=None,
    batch_size=batch_size,
    shuffle=False
)

# Align images and tabular data
def create_tf_dataset(image_gen, tabular_data, labels, batch_size):
    images = []
    valid_indices = []
    for i in range(len(image_gen)):
        batch = image_gen[i]
        batch_size_actual = batch.shape[0]
        start_idx = i * image_gen.batch_size
        end_idx = start_idx + batch_size_actual
        if end_idx > len(tabular_data):
            batch = batch[:len(tabular_data) - start_idx]
            images.append(batch)
            valid_indices.extend(range(start_idx, start_idx + batch.shape[0]))
            break
        images.append(batch)
        valid_indices.extend(range(start_idx, end_idx))
    
    images = np.concatenate(images, axis=0)
    tabular_data = tabular_data[valid_indices]
    labels = labels[valid_indices]
    
    dataset = tf.data.Dataset.from_tensor_slices((
        {
            'image_input': images,
            'tabular_input': tabular_data.astype(np.float32)
        },
        labels.astype(np.float32)
    ))
    return dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Create datasets
train_dataset = create_tf_dataset(train_image_generator, X_tabular_train, y_train, batch_size)
test_dataset = create_tf_dataset(test_image_generator, X_tabular_test, y_test, batch_size)

# CNN model
def build_model(hp):
    image_input = Input(shape=(*image_size, 3), name="image_input")
    x = Conv2D(
        filters=hp.Int('conv1_filters', min_value=32, max_value=96, step=32),
        kernel_size=3,
        activation="relu",
        padding="same"
    )(image_input)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    shortcut = x
    x = Conv2D(
        filters=hp.Int('conv2_filters', min_value=64, max_value=128, step=32),
        kernel_size=3,
        activation="relu",
        padding="same"
    )(x)
    x = BatchNormalization()(x)
    shortcut = Conv2D(
        filters=hp.Int('conv2_filters', min_value=64, max_value=128, step=32),
        kernel_size=1,
        padding="same"
    )(shortcut)
    x = Add()([x, shortcut])
    
    x = Conv2D(
        filters=hp.Int('conv3_filters', min_value=96, max_value=192, step=32),
        kernel_size=3,
        activation="relu",
        padding="same"
    )(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    shortcut = x
    x = Conv2D(
        filters=hp.Int('conv4_filters', min_value=128, max_value=256, step=64),
        kernel_size=3,
        activation="relu",
        padding="same"
    )(x)
    x = BatchNormalization()(x)
    shortcut = Conv2D(
        filters=hp.Int('conv4_filters', min_value=128, max_value=256, step=64),
        kernel_size=1,
        padding="same"
    )(shortcut)
    x = Add()([x, shortcut])
    
    x = Conv2D(
        filters=hp.Int('conv5_filters', min_value=192, max_value=384, step=64),
        kernel_size=3,
        dilation_rate=2,
        activation="relu",
        padding="same"
    )(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = Conv2D(
        filters=hp.Int('conv6_filters', min_value=256, max_value=512, step=64),
        kernel_size=3,
        activation="relu",
        padding="same"
    )(x)
    x = BatchNormalization()(x)
    
    x = Flatten()(x)
    x = Dense(
        units=hp.Int('image_dense_units', min_value=128, max_value=512, step=128),
        activation="relu",
        kernel_regularizer=l2(hp.Float('l2_reg', min_value=1e-4, max_value=5e-2, sampling='log'))
    )(x)
    x = Dropout(hp.Float('dropout_image', min_value=0.3, max_value=0.6))(x)
    x = BatchNormalization()(x)
    image_output = Dense(64, activation="relu", name="image_output")(x)

    tabular_input = Input(shape=(X_tabular_preprocessed.shape[1],), name="tabular_input")
    y = Dense(
        units=hp.Int('tabular_units_1', min_value=128, max_value=512, step=128),
        activation="relu",
        kernel_regularizer=l2(hp.Float('l2_reg', min_value=1e-4, max_value=5e-2, sampling='log'))
    )(tabular_input)
    y = LayerNormalization()(y)
    y = Dropout(hp.Float('dropout_tabular_1', min_value=0.3, max_value=0.6))(y)
    y = Dense(
        units=hp.Int('tabular_units_2', min_value=64, max_value=256, step=64),
        activation="relu",
        kernel_regularizer=l2(hp.Float('l2_reg', min_value=1e-4, max_value=5e-2, sampling='log'))
    )(y)
    y = LayerNormalization()(y)
    tabular_output = Dense(64, activation="relu", name="tabular_output")(y)

    concatenated = Concatenate()([image_output, tabular_output])
    fused = Multiply()([image_output, tabular_output])
    fused = Dense(64, activation="relu")(fused)
    combined = Concatenate()([concatenated, fused])
    
    z = Dense(
        units=hp.Int('concat_units', min_value=128, max_value=512, step=128),
        activation="relu",
        kernel_regularizer=l2(hp.Float('l2_reg', min_value=1e-4, max_value=5e-2, sampling='log'))
    )(combined)
    z = Dropout(hp.Float('dropout_concat', min_value=0.3, max_value=0.6))(z)
    z = BatchNormalization()(z)
    final_output = Dense(1, activation="linear", name="final_output")(z)

    lr_schedule = CosineDecay(
        initial_learning_rate=hp.Float('learning_rate', min_value=1e-5, max_value=1e-3, sampling='log'),
        decay_steps=10000
    )
    model = Model(inputs=[image_input, tabular_input], outputs=final_output)
    model.compile(
        optimizer=AdamW(learning_rate=lr_schedule, weight_decay=1e-4, clipnorm=1.0),
        loss="mse",
        metrics=["mae"]
    )
    return model

# Hyperparameter tuning
tuner = RandomSearch(
    build_model,
    objective='val_mae',
    max_trials=15,
    executions_per_trial=2,
    directory='tuner_results',
    project_name='cnn_6layers_low_mae'
)

# Callbacks
early_stopping = EarlyStopping(monitor='val_mae', patience=50, restore_best_weights=True, mode='min')
checkpoint = ModelCheckpoint(
    os.path.join(output_dir, 'best_model.keras'),  # Changed to .keras
    monitor='val_mae',
    save_best_only=True,
    mode='min',
    verbose=1
)

# Perform tuning
tuner.search(
    train_dataset,
    validation_data=test_dataset,
    epochs=100,
    callbacks=[early_stopping, checkpoint]
)

# Get best model
best_model = tuner.get_best_models(num_models=1)[0]

# Save the final model explicitly
best_model.save(os.path.join(output_dir, 'final_model.keras'))  # Changed to .keras

# Evaluate with additional metrics
loss, mae_scaled = best_model.evaluate(test_dataset)
print(f"Test Loss (Scaled): {loss}, Test MAE (Scaled): {mae_scaled}")

# Convert MAE back to original scale
mae_original = label_scaler.inverse_transform([[mae_scaled]])[0][0] - label_scaler.inverse_transform([[0]])[0][0]
print(f"Test MAE (Original Scale): {mae_original}")

# Get predictions for the test dataset
y_pred_scaled = []
y_true_scaled = []
for batch in test_dataset:
    inputs, labels = batch
    preds = best_model.predict(inputs, verbose=0)
    y_pred_scaled.extend(preds.flatten())
    y_true_scaled.extend(labels.numpy().flatten())

y_pred_scaled = np.array(y_pred_scaled)
y_true_scaled = np.array(y_true_scaled)

# Inverse-transform predictions and true labels to original scale
y_pred_original = label_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
y_true_original = label_scaler.inverse_transform(y_true_scaled.reshape(-1, 1)).flatten()

# Calculate overall accuracy (±0.5 years in original scale)
accuracy_0_5 = np.mean(np.abs(y_pred_original - y_true_original) <= 0.5) * 100
print(f"Overall Accuracy (±0.5 years): {accuracy_0_5:.2f}%")

# Calculate R-squared in original scale
r2 = r2_score(y_true_original, y_pred_original)
print(f"R² Score (Original Scale): {r2:.4f}")

# Optional: Accuracy with rounded ages
accuracy_rounded = np.mean(np.round(y_pred_original) == np.round(y_true_original)) * 100
print(f"Accuracy (Rounded to Nearest Integer): {accuracy_rounded:.2f}%")



Trial 15 Complete [00h 01m 37s]
val_mae: 0.5935455858707428

Best val_mae So Far: 0.46179117262363434
Total elapsed time: 00h 35m 09s


  saveable.load_own_variables(weights_store.get(inner_path))


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 142ms/step - loss: 1.7586 - mae: 0.3869
Test Loss (Scaled): 1.8496322631835938, Test MAE (Scaled): 0.4345267117023468
Test MAE (Original Scale): 7.485883473334514
Overall Accuracy (±0.5 years): 9.52%


NameError: name 'r2_score' is not defined