### Using CNN to test percentage of cases with Malignant vs Benign

In [9]:
from keras.models import load_model

# Path to the saved model
model_path = '../OUTPUT/classifier_model.h5'
model = load_model(model_path)


In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 15, 15, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 7200)              0         
                                                                 
 dense (Dense)               (None, 128)               921728    
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 922,753
Trainable params: 922,753
Non-trainable params: 0
__________________________________________________

In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

# Load metadata
metadata_path = '../DATA/test_cnn/metadata.csv'
metadata = pd.read_csv(metadata_path)
metadata['filename'] = metadata['isic_id'] + '.jpg'  # Ensure 'isic_id' matches your actual ID column

# Split the data randomly into train and test sets
train_metadata, test_metadata = train_test_split(metadata, test_size=0.2, random_state=42)  # 20% data as test set

# Define paths
image_dir = '../DATA/test_cnn/'

# Initialize the data generator
datagen = ImageDataGenerator(rescale=1./255)

# Create generators using flow_from_dataframe
image_generator = datagen.flow_from_dataframe(
    dataframe=train_metadata,
    directory=image_dir,
    x_col='filename',
    y_col='benign_malignant',
    target_size=(32, 32),  # Adjusted to the calculated size
    batch_size=32,
    class_mode='binary'
)



Found 400 validated image filenames belonging to 2 classes.


In [35]:
# Predict using the model
preds = model.predict(image_generator)

# Convert predictions to labels
pred_labels = (preds > 0.5).astype(int)




In [37]:
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

# Count number of malignant cases
count_malignant = np.sum(pred_labels == 1)
nobs = len(pred_labels)  # Number of observations
print(count_malignant, nobs)
# Perform the test
stat, pval = proportions_ztest(count_malignant, nobs, value=0.01, alternative='larger')

print(f'Z-statistic: {stat}, P-value: {pval}')


11 400
Z-statistic: 2.1402124812336347, P-value: 0.01616879947706368
