In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM, Reshape, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score, confusion_matrix
import os

In [None]:
# Load metadata for LSTM input
metadata = pd.read_csv('/mnt/data/df_train.csv')

# Define CNN model for feature extraction using ResNet50
def build_cnn(input_shape):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    x = Flatten()(base_model.output)
    x = Dense(128, activation='relu')(x)
    cnn_model = Model(base_model.input, x)
    return cnn_model

# Define LSTM model for semantic feature extraction
def build_lstm(input_shape):
    inputs = Input(shape=input_shape)
    x = LSTM(64, return_sequences=False)(inputs)
    lstm_model = Model(inputs, x)
    return lstm_model

# Combined model
def build_combined_model(cnn_input_shape, lstm_input_shape):
    cnn_model = build_cnn(cnn_input_shape)
    lstm_model = build_lstm(lstm_input_shape)

    combined_input = tf.keras.layers.Concatenate()([cnn_model.output, lstm_model.output])
    x = Dense(128, activation='relu')(combined_input)
    x = Dropout(0.3)(x)
    x = Dense(3, activation='softmax')(x)

    model = Model([cnn_model.input, lstm_model.input], x)
    return model

# Input shapes for CNN and LSTM
cnn_input_shape = (224, 224, 3)  # ResNet50 requires 224x224 RGB images
lstm_input_shape = (metadata.shape[1],)  # Number of features in metadata.csv

# Build and compile the model
model = build_combined_model(cnn_input_shape, lstm_input_shape)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Checkpoint callback for training resumption
checkpoint_path = "model_checkpoint.h5"
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
    save_best_only=True,
    monitor='val_loss',
    verbose=1
)

# Load weights if a checkpoint exists
if os.path.exists(checkpoint_path):
    model.load_weights(checkpoint_path)
    print("Loaded weights from checkpoint")

# Data preparation
# Assuming 'images' is a NumPy array of shape (num_samples, 224, 224, 3)
# and 'labels' is a NumPy array of shape (num_samples,)
images = np.random.rand(100, 224, 224, 3)  # Replace with actual image data
labels = np.random.randint(0, 3, size=(100,))  # Example labels: 0 - Normal, 1 - Benign, 2 - Malignant

# One-hot encode the labels
labels = to_categorical(labels, num_classes=3)

# Split the data into training and validation sets
X_train_img, X_val_img, X_train_meta, X_val_meta, y_train, y_val = train_test_split(
    images, metadata.values, labels, test_size=0.2, random_state=42
)

# Data augmentation for images
datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize to [0, 1] range
    rotation_range=90,
    zoom_range=0.5,
    horizontal_flip=True
)

# Train the model
batch_size = 8
epochs = 10

train_img_generator = datagen.flow(X_train_img, y_train, batch_size=batch_size)
val_img_generator = datagen.flow(X_val_img, y_val, batch_size=batch_size)

# Training and validation loop with metadata included manually
history = model.fit(
    [X_train_img, X_train_meta],
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=([X_val_img, X_val_meta], y_val),
    callbacks=[checkpoint_callback]
)

# Feature extraction from the combined model
feature_extractor = Model(inputs=model.input, outputs=model.layers[-3].output)  # Extract features from the Dense(128) layer

# Assuming we want to extract features from the training data
combined_features = feature_extractor.predict([X_train_img, X_train_meta], batch_size=batch_size)

# Ensemble learning using RandomForest and Boosting
# Here we assume `combined_features` is obtained after training the CNN-LSTM model and extracting features
labels = np.argmax(y_train, axis=1)  # Convert one-hot encoded labels to single class labels

# Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(combined_features, labels)

# XGBoost Classifier
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb_classifier.fit(combined_features, labels)

# Ensemble prediction (averaging the probabilities)
def ensemble_predict(feature_set):
    rf_preds = rf_classifier.predict_proba(feature_set)
    xgb_preds = xgb_classifier.predict_proba(feature_set)
    final_preds = (rf_preds + xgb_preds) / 2
    return np.argmax(final_preds, axis=1)

# Example prediction
example_features = np.random.rand(5, combined_features.shape[1])  # Example feature set for prediction
predictions = ensemble_predict(example_features)
print("Predictions:", predictions)

# Evaluate model using additional metrics
# Assuming validation features are extracted similarly as training features
combined_val_features = feature_extractor.predict([X_val_img, X_val_meta], batch_size=batch_size)

# Make predictions on validation data
val_predictions = ensemble_predict(combined_val_features)
val_labels = np.argmax(y_val, axis=1)

# Calculate evaluation metrics
accuracy = accuracy_score(val_labels, val_predictions)
precision = precision_score(val_labels, val_predictions, average='weighted')
recall = recall_score(val_labels, val_predictions, average='weighted')
f1 = f1_score(val_labels, val_predictions, average='weighted')
roc_auc = roc_auc_score(to_categorical(val_labels, num_classes=3), 
                        to_categorical(val_predictions, num_classes=3), 
                        multi_class='ovr')
kappa = cohen_kappa_score(val_labels, val_predictions)

# Confusion matrix for specificity calculation
conf_matrix = confusion_matrix(val_labels, val_predictions)
specificity = conf_matrix[0, 0] / (conf_matrix[0, 0] + conf_matrix[0, 1:].sum()) if conf_matrix.shape[0] > 1 else 0.0

print(f"Training Accuracy: {history.history['accuracy'][-1]}")
print(f"Validation Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall (Sensitivity): {recall}")
print(f"F1 Score: {f1}")
print(f"ROC AUC Score: {roc_auc}")
print(f"Specificity: {specificity}")
print(f"Cohen's Kappa: {kappa}")