# Skin Disease classification using CNN Algorithm

# Import Libraries

In [1]:
import numpy as np
from tensorflow.keras.optimizers import Adam
from keras.layers import Activation, Dropout, Convolution2D, GlobalAveragePooling2D
from keras.models import Sequential
import tensorflow as tf
import os

In [2]:
IMG_SAVE_PATH = r'train' 

In [3]:
Str_to_Int = {
    'Actinic keratosis': 0,
    'Atopic Dermatitis': 1,
    'Benign keratosis': 2,
    'Dermatofibroma': 3,
    'Melanocytic nevus': 4,
    'Melanoma':5,
    'Squamous cell carcinoma': 6,
    'Tinea Ringworm Candidiasis':7,
    'Vascular lesion': 8
}

NUM_CLASSES = 9


def str_to_Int_mapper(val):
    return Str_to_Int[val]

# Data Preprocessing


In [4]:
import PIL
import cv2 
from PIL import Image
dataset = []
for directory in os.listdir(IMG_SAVE_PATH):
    path = os.path.join(IMG_SAVE_PATH, directory)
    for image in os.listdir(path):
        new_path = os.path.join(path, image)
        try:
            imgpath=PIL.Image.open(new_path)
            #if imgpath.shape!=(240,240,3):
            imgpath=imgpath.convert('RGB')
            img = np.asarray(imgpath)
            img = cv2.resize(img, (240,240))
            img=img/255.
            dataset.append([img, directory]) 
        except FileNotFoundError:
            print('Image file not found. Skipping...')

In [5]:
data, labels = zip(*dataset)
temp = list(map(str_to_Int_mapper, labels))

In [6]:
len(data)

845

In [7]:
#data[0]

In [8]:
import keras
labels = keras.utils.to_categorical(temp)

In [9]:
count=0
for i in data:
    if i.shape!=(240, 240, 3):
        count=count+1
print(count)      

0


# CNN

In [10]:
def cnn():
    model=Sequential()

    model.add(Conv2D(8, kernel_size=(3,3), activation='relu', input_shape=(240,240,3)))
    model.add(Conv2D(16, kernel_size=(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(9, activation='softmax'))
    
    return model

In [11]:
def split_data(X, y, train_size=0.7, val_size=0.15):
    total_size = len(X)
    
    train_end = int(train_size * total_size)
    val_end = train_end + int(val_size * total_size)
    
    X_train = X[:train_end]
    y_train = y[:train_end]
    
    X_val = X[train_end:val_end]
    y_val = y[train_end:val_end]
    
    X_test = X[val_end:]
    y_test = y[val_end:]
    
    return X_train, y_train, X_val, y_val, X_test, y_test

X_train, y_train, X_val, y_val, X_test, y_test = split_data(np.array(data), np.array(labels))

In [None]:
import optuna
from keras.layers import Dense , Dropout , Conv2D , MaxPooling2D, Flatten

def objective(trial):
    model = Sequential()
    
    kernel_size = trial.suggest_int('kernel_size', 3, 8)    
    
    model.add(Conv2D(trial.suggest_int('conv1_units', 4, 32), kernel_size=(kernel_size, kernel_size), activation='relu', input_shape=(240, 240, 3)))
    model.add(Conv2D(trial.suggest_int('conv2_units', 4, 32), kernel_size=(kernel_size, kernel_size), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(trial.suggest_uniform('dropout1', 0.5, 1)))
              
    model.add(Conv2D(trial.suggest_int('conv3_units', 4, 32), kernel_size=(kernel_size, kernel_size), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(trial.suggest_int('conv4_units', 4, 32), kernel_size=(kernel_size, kernel_size), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(trial.suggest_uniform('dropout2', 0.5, 1)))
              
    model.add(Flatten())
    model.add(Dense(trial.suggest_int('dense_units', 32, 128), activation='relu'))
    model.add(Dropout(trial.suggest_uniform('dropout3', 0.5, 1)))
    model.add(Dense(9, activation='softmax'))
    
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.2)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    history = model.fit(X_train, y_train, epochs=5, validation_data=(X_val, y_val), batch_size=64)
    
    return history.history['val_accuracy'][-1]

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=3)

best_params = study.best_params

  from .autonotebook import tqdm as notebook_tqdm
[I 2023-11-03 20:39:30,807] A new study created in memory with name: no-name-35aac7a6-a54b-4397-a9e6-3bdd51c5c019
  model.add(Dropout(trial.suggest_uniform('dropout1', 0.5, 1)))
  model.add(Dropout(trial.suggest_uniform('dropout2', 0.5, 1)))
  model.add(Dropout(trial.suggest_uniform('dropout3', 0.5, 1)))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-11-03 20:44:43,808] Trial 0 finished with value: 0.0 and parameters: {'kernel_size': 6, 'conv1_units': 16, 'conv2_units': 27, 'dropout1': 0.7767879553070001, 'conv3_units': 8, 'conv4_units': 24, 'dropout2': 0.6339202185210618, 'dense_units': 68, 'dropout3': 0.9320201689561083, 'learning_rate': 0.06987226366249664}. Best is trial 0 with value: 0.0.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-11-03 20:47:32,623] Trial 1 finished with value: 0.0 and parameters: {'kernel_size': 4, 'conv1_units': 7, 'conv2_units': 29, 'dropout1': 0.9055192726652057, 'conv3_units': 23, 'conv4_units': 5, 'dropout2': 0.6958552955512065, 'dense_units': 66, 'dropout3': 0.5988626810058928, 'learning_rate': 0.13650671838528017}. Best is trial 0 with value: 0.0.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2023-11-03 20:51:33,051] Trial 2 finished with value: 0.0 and parameters: {'kernel_size': 6, 'conv1_units': 18, 'conv2_units': 16, 'dropout1': 0.8685856735810668, 'conv3_units': 16, 'conv4_units': 14, 'dropout2': 0.6457689696892548, 'dense_units': 111, 'dropout3': 0.7436134228848712, 'learning_rate': 0.12738038811974076}. Best is trial 0 with value: 0.0.


In [3]:
from itertools import chain
X_train.shape
flattened = X_train.reshape(X_train.shape[0], -1)

len(flattened)


NameError: name 'X_train' is not defined

In [21]:
y_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]], dtype=float32)

In [12]:
import xgboost as xgb
features = X_train.reshape(X_train.shape[0], -1)

# Create an XGBoost DMatrix from your features and labels
dtrain = xgb.DMatrix(data=features, label=y_train)

# Set up XGBoost parameters
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',  # Use an appropriate evaluation metric
}

# Train the XGBoost model
model = xgb.train(params, dtrain, num_boost_round=1)

# Make predictions
predictions = model.predict(dtrain)

In [13]:
def train_and_record_accuracy(model, train_images, train_labels, test_images, test_labels, num_epochs):
    history = {"train_accuracy": [], "val_accuracy": []}
    for epoch in range(num_epochs):
        model.fit(train_images, train_labels, epochs=1, validation_data=(test_images, test_labels), batch_size=64)
        train_accuracy = model.evaluate(train_images, train_labels, verbose=0)[1]
        val_accuracy = model.evaluate(test_images, test_labels, verbose=0)[1]
        history["train_accuracy"].append(train_accuracy)
        history["val_accuracy"].append(val_accuracy)
        print(f"Epoch {epoch + 1}: Train Accuracy = {train_accuracy:.4f}, Validation Accuracy = {val_accuracy:.4f}")
    return history

In [14]:
import pandas as pd
post_search_df = study.trials_dataframe()
post_search_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_conv1_units,params_conv2_units,params_conv3_units,params_conv4_units,params_dense_units,params_dropout1,params_dropout2,params_dropout3,params_kernel_size,params_learning_rate,state
0,0,0.0,2023-11-03 20:39:30.807939,2023-11-03 20:44:43.807108,0 days 00:05:12.999169,16,27,8,24,68,0.776788,0.63392,0.93202,6,0.069872,COMPLETE
1,1,0.0,2023-11-03 20:44:43.809105,2023-11-03 20:47:32.623467,0 days 00:02:48.814362,7,29,23,5,66,0.905519,0.695855,0.598863,4,0.136507,COMPLETE
2,2,0.0,2023-11-03 20:47:32.625968,2023-11-03 20:51:33.050330,0 days 00:04:00.424362,18,16,16,14,111,0.868586,0.645769,0.743613,6,0.12738,COMPLETE


In [15]:
from IPython.display import Image

fig1 = optuna.visualization.plot_slice(study)
Image(fig1.to_image(format="png"))

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.

In [None]:
fig2 = optuna.visualization.plot_param_importances(study)
Image(fig2.to_image(format="png"))

In [17]:
best_model = Sequential()
best_model.add(Conv2D(best_params['conv1_units'], kernel_size=(3, 3), activation='relu', input_shape=(240, 240, 3)))
best_model.add(Conv2D(best_params['conv2_units'], kernel_size=(3, 3), activation='relu'))
best_model.add(MaxPooling2D(pool_size=(2, 2)))
best_model.add(Dropout(best_params['dropout1']))
best_model.add(Conv2D(best_params['conv3_units'], kernel_size=(3, 3), activation='relu'))
best_model.add(MaxPooling2D(pool_size=(2, 2)))
best_model.add(Conv2D(best_params['conv4_units'], kernel_size=(3, 3), activation='relu'))
best_model.add(MaxPooling2D(pool_size=(2, 2)))
best_model.add(Dropout(best_params['dropout2']))
best_model.add(Flatten())
best_model.add(Dense(best_params['dense_units'], activation='relu'))
best_model.add(Dropout(best_params['dropout3']))
best_model.add(Dense(9, activation='softmax'))

best_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

num_epochs = 10
history = train_and_record_accuracy(best_model, X_train, y_train, X_test, y_test, num_epochs)

train_accuracy_values = history["train_accuracy"]
val_accuracy_values = history["val_accuracy"]

Epoch 1: Train Accuracy = 0.1878, Validation Accuracy = 0.0000
Epoch 2: Train Accuracy = 0.2149, Validation Accuracy = 0.0000
Epoch 3: Train Accuracy = 0.2657, Validation Accuracy = 0.0000
Epoch 4: Train Accuracy = 0.1946, Validation Accuracy = 0.0000
Epoch 5: Train Accuracy = 0.2166, Validation Accuracy = 0.0000
Epoch 6: Train Accuracy = 0.2809, Validation Accuracy = 0.0000
Epoch 7: Train Accuracy = 0.2606, Validation Accuracy = 0.0000
Epoch 8: Train Accuracy = 0.2741, Validation Accuracy = 0.0000
Epoch 9: Train Accuracy = 0.1692, Validation Accuracy = 0.0000
Epoch 10: Train Accuracy = 0.3723, Validation Accuracy = 0.0000


In [19]:
import pickle

with open("best_model.pkl", 'wb') as file:
    pickle.dump(best_model, file)

In [None]:
import matplotlib.pyplot as plt

epochs = range(1, len(train_accuracy_values) + 1)

# Plot the training and validation accuracy
plt.figure(figsize=(10, 5))
plt.plot(epochs, train_accuracy_values, 'b', label='Training Accuracy')
plt.plot(epochs, val_accuracy_values, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(False)

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Display the plot
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
study.best_params

{'kernel_size': 8,
 'conv1_units': 20,
 'conv2_units': 25,
 'dropout1': 0.9242546115857789,
 'conv3_units': 8,
 'conv4_units': 6,
 'dropout2': 0.6887097227436749,
 'dense_units': 125,
 'dropout3': 0.6843400442253731,
 'learning_rate': 0.09215853447702334}