## Importing necessary libraries

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers , models
import tensorflow_hub as hub
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.applications import NASNetLarge, NASNetMobile
import numpy



## Dataset preparation 

### Reading all the images

In [2]:
csv_file = pd.read_csv('ISIC_2019_Training_GroundTruth (2).csv')
image_path=[]
benign_malign = csv_file.iloc[:,1:2]
image_name = csv_file.iloc[:,0]
dir = "ISIC_2019_Training_Input"
for names in image_name:
    fpath = os.path.join(dir,names +".jpg")
    image_path.append(fpath)
benign=[]
mel=[]
index = 0
for  values in benign_malign.iloc[:,0]:
    if values == 0.0:
        benign.append(image_path[index])
    else:
        mel.append(image_path[index])
    index+=1
print("Length benign: ", len(benign))
print("Length malign: ", len(mel))

Length benign:  20809
Length malign:  4522


### Dividing the images in train , test , valid.

In [3]:
#For benign set of images
ben_train_paths, ben_test_paths = train_test_split(benign, test_size=0.2, random_state=42)

ben_train_paths, ben_val_paths = train_test_split(ben_train_paths, test_size=0.2, random_state=42)

# Print the sizes of each set
print("Benign dataset")
print("Train set size:", len(ben_train_paths))
print("Validation set size:", len(ben_val_paths))
print("Test set size:", len(ben_test_paths))

#For images with melenoma
mel_train_paths, mel_test_paths = train_test_split(mel, test_size=0.2, random_state=42)

mel_train_paths, mel_val_paths = train_test_split(mel_train_paths, test_size=0.2, random_state=42)

# Print the sizes of each set
print("Melanoma Dataset")
print("Train set size:", len(mel_train_paths))
print("Validation set size:", len(mel_val_paths))
print("Test set size:", len(mel_test_paths))

Benign dataset
Train set size: 13317
Validation set size: 3330
Test set size: 4162
Melanoma Dataset
Train set size: 2893
Validation set size: 724
Test set size: 905


## Folder Segregation


In [4]:
os.makedirs("test", exist_ok=True)
os.makedirs("train", exist_ok=True)
os.makedirs("validation", exist_ok=True)

os.makedirs("test/"+ "benign", exist_ok=True)
os.makedirs("test/"+ "mel", exist_ok=True)
os.makedirs("train/"+ "benign", exist_ok=True)
os.makedirs("train/"+ "mel", exist_ok=True)
os.makedirs("validation/"+ "benign", exist_ok=True)
os.makedirs("validation/"+ "mel", exist_ok=True)

In [5]:
for file_path in ben_train_paths:
    shutil.copy(file_path, "train/benign/")
for file_path in ben_test_paths:
    shutil.copy(file_path, "test/benign/")
for file_path in ben_val_paths:
    shutil.copy(file_path, "validation/benign/")
for file_path in mel_train_paths:
    shutil.copy(file_path, "train/mel/")
for file_path in mel_test_paths:
    shutil.copy(file_path, "test/mel/")
for file_path in mel_val_paths:
    shutil.copy(file_path, "validation/mel/")

In [8]:
# NasNet
# Load the pre-trained model (include_top=False for features)
'''
base_model = NASNetMobile(weights="imagenet", include_top=False, input_shape=(331, 331, 3))

# Access the output tensor of the pre-trained model
x = base_model.output

# Add Dense layers for classification

x = tf.keras.layers.Dense(units=1, activation='sigmoid')(x)

# Create a new functional model with the combined layers
model = models.Model(inputs=base_model.input, outputs=x)
'''

'\nbase_model = NASNetMobile(weights="imagenet", include_top=False, input_shape=(331, 331, 3))\n\n# Access the output tensor of the pre-trained model\nx = base_model.output\n\n# Add Dense layers for classification\n\nx = tf.keras.layers.Dense(units=1, activation=\'sigmoid\')(x)\n\n# Create a new functional model with the combined layers\nmodel = models.Model(inputs=base_model.input, outputs=x)\n'

In [12]:
model_url = "https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4"
NASNet_mobile = tf.keras.applications.MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))  # Download weights explicitly

# Create your classification head
model = tf.keras.Sequential([
    NASNet_mobile,
    layers.Flatten(),
    layers.Dense(128, activation="relu"),  # Optional hidden layer
    layers.Dense(1, activation="sigmoid")  # Output layer for binary classification
])

## CNN Training

In [13]:
img_height = img_width = 224
batch_size = 16
'''
# Define the CNN model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification (benign/malignant)
'''
# Compile the model
METRICS = [
    keras.metrics.BinaryAccuracy(name="accuracy"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
    keras.metrics.AUC(name="auc"),
]

model.compile(
    optimizer=keras.optimizers.Adam(lr=3e-4),
    loss=[keras.losses.BinaryCrossentropy(from_logits=False)],
    metrics=METRICS,
)

# Define data generators for training and validation sets (recommended for large datasets)
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# Create training and validation data generators
train_generator = train_datagen.flow_from_directory(
    'train',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    shuffle = True,
    seed = 123
)

val_generator = val_datagen.flow_from_directory(
    'validation', 
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
    
    
)

# Train the model
model.fit(train_generator,
          epochs=1,  # Adjust number of epochs based on your dataset size
          validation_data=val_generator)

# Save the model for later use
model.save('skin_cancer_detection.h5')

Found 16210 images belonging to 2 classes.
Found 4054 images belonging to 2 classes.


## Testing of the model

In [14]:


# Load test data generator (similar to training and validation)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    'test',  # Replace with path to your test data folder
    target_size=(img_height, img_width),
    batch_size=32,
    class_mode='binary'
)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(test_generator)

# Print test accuracy
print('Test accuracy:', test_acc)


Found 5067 images belonging to 2 classes.


ValueError: too many values to unpack (expected 2)