In [None]:
 !pip install split_folders

In [None]:
!python --version

In [None]:
import os
import shutil
import splitfolders
import pandas as pd

In [None]:
from typing import Union

class ClassifyDatasets:
    def __init__(self, datasets: str = None, input_dataset: str = None) -> None:
        self.datasets = datasets
        self.input_dataset = input_dataset  # extracting input data in here
        if not os.path.exists(self.input_dataset):
            os.makedirs(self.input_dataset)

    def list_dirs_files(self, path: str = None) -> list:
        return os.listdir(path)
    
    def unpack_archive(self, zip_path: str, extract_dir: str) -> None:
        if os.path.splitext(zip_path)[1] in self.archived_extensions:
            shutil.unpack_archive(zip_path, extract_dir=extract_dir)

    def _process_csvfile(self, csvfile: str = None) -> tuple:
        df = pd.read_csv(csvfile)
        df = df.fillna('unknown')
        columns = ["isic_id", "diagnosis", "image_type"]
        df["isic_id"] = df["isic_id"].apply(lambda x: x + ".JPG")
        zipped_columns = tuple(df[columns].values)
        return zipped_columns

    def _copy_file(self, src_path: str, dest_path: str) -> bool:
        try:
            shutil.copy(src_path, dest_path)
            return True
        except FileNotFoundError:
            print(f'ERROR: File not found error {src_path}')
        except shutil.Error:
            print(f'Destination file already exists {src_path}')
            self.duplicate_count += 1
        return False
    
    def _process_zipped_columns(self, zipped_columns: zip = None, image_files_list: list = []) -> int:
        count = 0
        self.duplicate_count = 0
        for column in zipped_columns:
            image_path = os.path.join(self.zipfile_path, column[0])
            classified_dataset_path = os.path.join(self.input_dataset, column[1])
            if os.path.exists(image_path) and column[2] == 'dermoscopic':
                print(image_path)
                if not os.path.exists(classified_dataset_path):
                    os.makedirs(classified_dataset_path)
                if self._copy_file(image_path, classified_dataset_path):
                    count += 1
        return count
    
    def classify_dataset_using_archived(self, image_files_list: list = []) -> Union[int, None]:
        zipped_columns = self._process_csvfile(csvfile=self.csvfile)
        # print(len(zipped_columns), len(image_files_list))
        print(self.zipfile_path)
        return self._process_zipped_columns(zipped_columns, image_files_list)

    def filter_image_list(self, path: str = None):
        if '.JPG' in path:
            return path
        return None
    
    def process_datasets(self, dataset_dir: str = None) -> Union[bool, int, None]:
        if dataset_dir is None:
            return False
        dataset_with_metadata = os.path.join(self.datasets, dataset_dir)
        list_dataset_files = self.list_dirs_files(dataset_with_metadata)
        self.csvfile = None
        self.zipfile_path = os.path.join(dataset_with_metadata, 'archive')
        image_files_list = map(self.filter_image_list,
                               self.list_dirs_files(self.zipfile_path))
        image_files_list = [image_path for image_path in image_files_list
                            if image_path is not None]
        # print(len(image_files_list))
        for file in list_dataset_files:
            if os.path.splitext(file)[1] == '.csv':
                self.csvfile = os.path.join(dataset_with_metadata, file)
        if self.csvfile:
            count = self.classify_dataset_using_archived(image_files_list)
            return count
        else:
            print(f'Zip and CSV not found in {dataset_with_metadata}')
        

    def _main(self):
        list_dirs_files = self.list_dirs_files(self.datasets)
        result_processed_datasets = list(map(self.process_datasets,
                                             list_dirs_files))
        print(result_processed_datasets)


In [None]:
os.getcwd()

In [None]:
!ls ../input/dermoscopic/datasets
input_folder_path = "/kaggle/input/dermoscopic/datasets"
input_dataset = 'input_dataset'

In [None]:
cd = ClassifyDatasets(datasets=input_folder_path,
                      input_dataset=input_dataset)
print(cd._main())

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
epochs = 15
second_epochs = 15
input_location = "../input/skin-diseases-image-dataset/IMG_CLASSES"
output = "output" 
list_of_folders = os.listdir(input_location)
print(list_of_folders)

In [None]:
def generate_datasets(folders_to_copy, loc=input_location, output=output, is_temp_loc=True):
    if is_temp_loc:
        temp_loc = "temp"  # Temporary directory to hold the folders to copy
        # Copy the selected folders to the temporary location
        for folder_name in folders_to_copy:
            source_folder = os.path.join(loc, folder_name)
            destination_folder = os.path.join(temp_loc, folder_name)
            shutil.copytree(source_folder, destination_folder)
        # Use splitfolders to create the splits from the temporary location
        splitfolders.ratio(temp_loc, output=output, seed=42, ratio=(0.80, 0.1, 0.1))
        # Remove the temporary location
        shutil.rmtree(temp_loc)
    else:
        splitfolders.ratio(loc, output=output, seed=42, ratio=(0.80, 0.1, 0.1))


In [None]:
# removing first dir and then recreating dataset into dirs.
def generate_output_dir(output=output, exist_ok=True):
#     shutil.rmtree(output)
    os.makedirs(f'{output}', exist_ok=exist_ok)
    os.makedirs(f'{output}/train', exist_ok=exist_ok)
    os.makedirs(f'{output}/val', exist_ok=exist_ok)
    os.makedirs(f'{output}/test', exist_ok=exist_ok)

In [None]:
# Specify the folders you want to copy
# folders_to_copy = ["1. Eczema 1677", "3. Atopic Dermatitis - 1.25k","8. Seborrheic Keratoses and other Benign Tumors - 1.8k"]  # Replace with the actual folder names
folders_to_copy = list_of_folders[:]
num_classification = len(folders_to_copy)
generate_output_dir()
generate_datasets(folders_to_copy, loc=input_location, output=output, is_temp_loc=False)


> In kaggle we need to create a directory for our data after splitting but if we do this using jupyter notebook on our PC/laptop,we can just specify the input path and output path.

In [None]:
import os
for dirpath,dirname,filename in os.walk(f"./{output}"):
    print(f"There are {len(dirname)} and {len(filename)} in '{dirpath}'.")

> After modifying our input data and before the start of modelling its always best to visualize some random images of the dataset

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mping
import random

def plot_random_image(target_dir,target_class):
    target_folder = target_dir + target_class
    random_image = random.sample(os.listdir(target_folder),1)
    img = mping.imread(target_folder + "/" + random_image[0])
    plt.imshow(img)
    plt.title(target_class)
    plt.axis("off");
    return img
    

In [None]:
def plot_random_figures(loc=[], output=output):
    global num_classification
    fig = plt.figure(figsize=(10, 7))
    if num_classification > 4:
        temp_num_clss = 4
    else:
        temp_num_clss = num_classification
    for index in range(0, temp_num_clss):
        fig.add_subplot(2,2,index + 1)
        plot_random_image(target_dir = f"./{output}/test/",target_class = loc[index])
plot_random_figures(loc=folders_to_copy, output=output)

# ***Modelling***

In [None]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy("mixed_float16")

early_stop = tf.keras.callbacks.EarlyStopping(monitor = "val_loss",patience = 6,
                                             min_delta = 0.0001)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = "val_loss",factor = 0.2,
                                                patience = 4,min_lr = 1e-7)

> When we use mixed_precision training the computation speed is increased by 3x times based on the GPU available. Mixed precision enables training using float16 half-precision variables whenever possible.

In [None]:
from tensorflow.keras.preprocessing import image_dataset_from_directory

train_dir = f"./{output}/train"
test_dir =  f"./{output}/test"
val_dir = f"./{output}/val"

def generate_train_test_validation(train_dir=train_dir, test_dir=test_dir, val_dir=val_dir):
    train_data = image_dataset_from_directory(train_dir,label_mode = "categorical",
                                              image_size = (224,224),batch_size = 32,
                                             shuffle = True,seed = 42)
    test_data = image_dataset_from_directory(test_dir,label_mode = "categorical",
                                              image_size = (224,224),batch_size = 32,
                                             shuffle = False,seed = 42)
    val_data = image_dataset_from_directory(val_dir,label_mode = "categorical",
                                          image_size = (224,224),batch_size = 32,
                                         shuffle = False,seed = 42)
    return train_data, test_data, val_data
train_data, test_data, val_data = generate_train_test_validation(train_dir, test_dir, val_dir)

> image_dataset_from_directory() imports and converts our input data into tf.data.Dataset format and it is generally faster than ImageDataGenerator().

In [None]:
# class names in training datasets
class_names = train_data.class_names
print(len(class_names))
print(class_names)

In [None]:
def auto_tune_data(train_data, test_data, val_data):
    train_data = train_data.prefetch(buffer_size = tf.data.AUTOTUNE)
    test_data = test_data.prefetch(buffer_size = tf.data.AUTOTUNE)
    val_data = val_data.prefetch(buffer_size = tf.data.AUTOTUNE)
    return train_data, test_data, val_data
train_data, test_data, val_data = auto_tune_data(train_data, test_data, val_data)

> Data is prefetched to reduce computation time.

In [None]:
base_model = tf.keras.applications.EfficientNetB5(include_top = False)
base_model.trainable = False

> First we are going to be training feature extractor EfficientNetB5 model. Feature extractor transfer learning involves using the pretrained weights of a model trained on another dataset similar to own for our own problem. Here the output layer of pretrained model is modified according our own problem.

In [None]:
for layer_num,layer in enumerate(base_model.layers):
    print(layer_num,layer.name,layer.trainable)

> As we can see EfficientNetB5 consists of 575 layers without including the output layer and the most important thing to note among these layers is the rescaling layer present right after the input layer,this means that we dont have to rescale our data during preprocessing.

In [None]:
from tensorflow.keras.layers.experimental import preprocessing

def create_data_aug_layer():
    data_aug = tf.keras.Sequential([
        preprocessing.RandomWidth(0.2),
        preprocessing.RandomHeight(0.2),
        preprocessing.RandomRotation(0.2),
        preprocessing.RandomFlip("horizontal")
    ],name = "data_augmentation_layer")
    return data_aug

> Data augmentation is used here to prevent overfitting, we can experiment without data augmentation and check whether the model overfits or not,but since we are using a transfer learning Architecture such as EfficientNet,its best to include data augmentation since the probability of our model overfitting is very high.

In [None]:
# build classification model
def build_classification_model(units=num_classification):
    print(units)
    data_aug = create_data_aug_layer()
    inputs = layers.Input(shape = (224,224,3),name = "input_layer")
    x = data_aug(inputs)
    x = base_model(x)
    x = layers.GlobalAvgPool2D(name = "pooling_layer")(x)
    x = layers.Dense(32,activation = "relu",kernel_initializer = tf.keras.initializers.he_normal())(x)
    x = layers.Dense(units)(x)
    outputs = layers.Activation("softmax",dtype = tf.float32)(x)
    model = tf.keras.Model(inputs,outputs)
    return model
model = build_classification_model(units=num_classification)

In [None]:
model.summary()

In [None]:
for layer_num,layer in enumerate(model.layers):
    print(layer_num,layer.name,layer.trainable,layer.dtype,layer.dtype_policy)

> We can clearly see here that mixed_precision policy is implemented and our EfficientNetB5 model is completely frozen. Now we can compile and fit our model.

In [None]:
def compile_model(model=model, learning_rate=0.001):
    model.compile(
        loss = tf.keras.losses.CategoricalCrossentropy(),
        optimizer = tf.keras.optimizers.Adam(learning_rate),
        metrics = ["accuracy"])
compile_model(model=model)

In [None]:
history_1 = model.fit(train_data,epochs = epochs,steps_per_epoch = len(train_data),
                     validation_data = val_data,validation_steps = int(0.25*len(val_data)),
                     callbacks = [early_stop,reduce_lr])

In [None]:
print("Validation Accuracy",model.evaluate(val_data))
print("Testing Accuracy",model.evaluate(test_data)) 

In [None]:
def plot_loss_curves(history):
  """
  Returns separate loss curves for training and validation metrics.
  """ 
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  accuracy = history.history['accuracy']
  val_accuracy = history.history['val_accuracy']

  epochs = range(len(history.history['loss']))

  # Plot loss
  plt.plot(epochs, loss, label='training_loss')
  plt.plot(epochs, val_loss, label='val_loss')
  plt.title('Loss')
  plt.xlabel('Epochs')
  plt.legend()

  # Plot accuracy
  plt.figure()
  plt.plot(epochs, accuracy, label='training_accuracy')
  plt.plot(epochs, val_accuracy, label='val_accuracy')
  plt.title('Accuracy')
  plt.xlabel('Epochs')
  plt.legend();

In [None]:
plot_loss_curves(history_1)

> * Training Accuracy - 74.4%
> * Testing Accuracy  - 71.2%
> * Validation Accuracy - 71.7%

> Note: Specified number of epochs as 15 but training stopped at 12 and it wasn't beacuse of earlystopping callback,not sure why it stopped early, if you got any ideas please do mention it.

# Fine-Tuned EfficientNetB5

In [None]:
base_model.trainable = True

for layer in base_model.layers[:-30]:
    layer.trainable = False

> Now in order to improve our model's performance we unfreeze the top 30 layers closer to the output layer and let them train on our data instead of using pre-trained weights.

In [None]:
for layer_num,layer in enumerate(model.layers):
    print(layer_num,layer.name,layer.trainable,layer.dtype_policy)

In [None]:
for layer_num,layer in enumerate(base_model.layers):
    print(layer_num,layer.name,layer.trainable)

In [None]:
compile_model(model=model, learning_rate=1e-4)

In [None]:
second_iter_epochs = epochs + second_epochs
history_2 = model.fit(train_data,epochs = second_iter_epochs,steps_per_epoch = len(train_data),
                     initial_epoch = history_1.epoch[-1],
                     validation_data = val_data,validation_steps = int(0.25*len(val_data)),
                     callbacks = [early_stop,reduce_lr])

In [None]:
print("Validation Accuracy",model.evaluate(val_data))
print("Testing Accuracy",model.evaluate(test_data))

In [None]:
def compare_historys(original_history, new_history, initial_epochs):
    """
    Compares two model history objects.
    """
    # Get original history measurements
    acc = original_history.history["accuracy"]
    loss = original_history.history["loss"]

    print(len(acc))

    val_acc = original_history.history["val_accuracy"]
    val_loss = original_history.history["val_loss"]

    # Combine original history with new history
    total_acc = acc + new_history.history["accuracy"]
    total_loss = loss + new_history.history["loss"]

    total_val_acc = val_acc + new_history.history["val_accuracy"]
    total_val_loss = val_loss + new_history.history["val_loss"]

    print(len(total_acc))
    print(total_acc)

    # Make plots
    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label='Training Accuracy')
    plt.plot(total_val_acc, label='Validation Accuracy')
    plt.plot([initial_epochs-1, initial_epochs-1],
              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(total_loss, label='Training Loss')
    plt.plot(total_val_loss, label='Validation Loss')
    plt.plot([initial_epochs-1, initial_epochs-1],
              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()

In [None]:
compare_historys(history_1,history_2,initial_epochs = 12)

> *  Training accuracy - 85.58%
> *  Testing Accuracy - 77.08%
> *  Validation Accuracy - 76.02%

# Model Evalutation

In [None]:
pred_probs = model.predict(test_data)
pred_probs[0]

In [None]:
len(pred_probs)

In [None]:
pred_classes = pred_probs.argmax(axis =1)
print(pred_classes)
print(class_names[pred_classes[0]])

In [None]:
y_labels = []
for image,label in test_data.unbatch():
    y_labels.append(label.numpy().argmax())
y_labels[:20]

In [None]:
print(len(pred_classes))
print(len(y_labels))

In [None]:
from sklearn.metrics import classification_report
print("Classification report\n",classification_report(y_labels,pred_classes))

In [None]:
classification_dict = classification_report(y_labels,pred_classes,output_dict = True)
classification_dict

In [None]:
classification_f1_scores = {}
for k,v in classification_dict.items():
    if k == "accuracy":
        break
    else:
        classification_f1_scores[class_names[int(k)]] = v["f1-score"]
classification_f1_scores

In [None]:
f1_scores = pd.DataFrame({"class_name":list(classification_f1_scores.keys()),
                         "F1-Scores":list(classification_f1_scores.values())})
f1_scores.sort_values("F1-Scores",ascending = False)

> From the F1-Scores dataframe we can clearly see that our model perform best on Melanocytic Nevi with an F1-Score of 0.92 and perform the worst on Atopic Dematitis.

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False): 
  """Makes a labelled confusion matrix comparing predictions and ground truth labels.

  If classes is passed, confusion matrix will be labelled, if not, integer class values
  will be used.

  Args:
    y_true: Array of truth labels (must be same shape as y_pred).
    y_pred: Array of predicted labels (must be same shape as y_true).
    classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
    figsize: Size of output figure (default=(10, 10)).
    text_size: Size of output figure text (default=15).
    norm: normalize values or not (default=False).
    savefig: save confusion matrix to file (default=False).
  
  Returns:
    A labelled confusion matrix plot comparing y_true and y_pred.

  Example usage:
    make_confusion_matrix(y_true=test_labels, # ground truth test labels
                          y_pred=y_preds, # predicted labels
                          classes=class_names, # array of class label names
                          figsize=(15, 15),
                          text_size=10)
  """  
  # Create the confustion matrix
  cm = confusion_matrix(y_true, y_pred)
  cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
  n_classes = cm.shape[0] # find the number of classes we're dealing with

  # Plot the figure and make it pretty
  fig, ax = plt.subplots(figsize=figsize)
  cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
  fig.colorbar(cax)

  # Are there a list of classes?
  if classes:
    labels = classes
  else:
    labels = np.arange(cm.shape[0])
  
  # Label the axes
  ax.set(title="Confusion Matrix",
         xlabel="Predicted label",
         ylabel="True label",
         xticks=np.arange(n_classes), # create enough axis slots for each class
         yticks=np.arange(n_classes), 
         xticklabels=labels, # axes will labeled with class names (if they exist) or ints
         yticklabels=labels)
  
  # Make x-axis labels appear on bottom
  ax.xaxis.set_label_position("bottom")
  ax.xaxis.tick_bottom()

  ### Added: Rotate xticks for readability & increase font size (required due to such a large confusion matrix)
  plt.xticks(rotation=70, fontsize=text_size)
  plt.yticks(fontsize=text_size)

  # Set the threshold for different colors
  threshold = (cm.max() + cm.min()) / 2.

  # Plot the text on each cell
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    if norm:
      plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)
    else:
      plt.text(j, i, f"{cm[i, j]}",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)

  # Save the figure to the current working directory
  if savefig:
    fig.savefig("confusion_matrix.png")

In [None]:
make_confusion_matrix(y_labels,pred_classes,classes = class_names,figsize = (20,20))

* > From the confusion matrix we can clearly observe that the model is getting confused betwwen Melanocytic Nevi and Melanoma, Melanocyctic Nevi and Benign Kerotosis like Lesions, Tinea Ringworm Candidiasis and other Fungai Infections and Psoriasis pictures Lichen Planus and releated diseases.
* > In order to examine why our model is getting confused between the above mentioned diseases we can look at the data ourselves or consult a doctor to find out whethere these disesase can be classified properly just by looking at their images, often when it comes to skin diseases it cannot be classified properly just by looking at the image further testing is required.

**Let us see what our most wrong predictions are to understand more about our model's performance**

In [None]:
filepaths = []
for filepath in test_data.list_files(f"./{output}/test/*/*.jpg", 
                                     shuffle=False):
  filepaths.append(filepath.numpy())
filepaths[:10]

In [None]:
prediction_df = pd.DataFrame({"img_path": filepaths,
                        "y_true": y_labels,
                        "y_pred": pred_classes,
                        "pred_conf": pred_probs.max(axis=1), # get the maximum prediction probability value
                        "y_true_classname": [class_names[i] for i in y_labels],
                        "y_pred_classname": [class_names[i] for i in pred_classes]}) 
prediction_df.head()

In [None]:
prediction_df["correct_pred"] = prediction_df["y_true"]==prediction_df["y_pred"]
prediction_df.head()

In [None]:
top_50_wrong = prediction_df[prediction_df["correct_pred"] == False].sort_values("pred_conf", ascending=False)[:50]
top_50_wrong.head(10)

In [None]:
tf.saved_model.save(model, "skin_disease_saved_model")
model.save("skin_disease_model.h5", overwrite=True)

# Dump the state of kaggle

In [None]:
# import dill
# dill.dump_session('/kaggle/working/state.db')

In [None]:
# import inspect

# for name, obj in inspect.getmembers(globals()):
#     print(name)  
#     if inspect.isfunction(obj) and inspect.isrecursive(obj):
#         print(name)


# load the state of kaggle `state.db`

In [None]:
# import dill
# dill.load_session('/kaggle/working/state.db')

# create zip of saved model

In [None]:
!ls
!rm skin_disease_saved_model.zip
!zip -r /kaggle/working/skin_disease_saved_model.zip /kaggle/working/skin_disease_saved_model

In [None]:
import os
print(os.getcwd())
!ls ../

In [None]:
from IPython.display import FileLink
FileLink(r'skin_disease_saved_model.zip')

In [None]:
FileLink(r'skin_disease_model.h5')

# load model and evaluate testing data

In [None]:
from tensorflow.keras.models import load_model

# Load the model using the TensorFlow SavedModel format
loaded_saved_model = tf.saved_model.load("skin_disease_saved_model")

# Load the model using the Keras HDF5 format
loaded_h5_model = load_model("skin_disease_model.h5")

# Compile the loaded model with new configurations
loaded_h5_model.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"]
)

In [None]:
!python --version

In [None]:
# # evaluate testing data
# test_results_h5 = loaded_h5_model.evaluate(test_data)

# # print("Test results (SavedModel):", test_results_saved)
# print("Test results (HDF5):", test_results_h5)

In [None]:
!ls ../input/individual-images

In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np
from tensorflow.keras.applications.vgg16 import decode_predictions

In [None]:

def image_predict(image_path=None, model=None):
    # Path to the single test image file
    single_image_path = image_path

    # Load and preprocess the image
    img = image.load_img(single_image_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array /= 255.0  # Normalize the image pixel values
    
    # Predict using the model
    predictions = model.predict(img_array)
    
    # The predictions will contain probabilities for each class
    print("Predicted probabilities:", predictions)
    # Get the index of the predicted class
    pred = [[1.0070900e-05, 3.9449986e-04, 8.2579613e-01, 5.3181608e-05, 3.7938682e-04,
  1.0911715e-01, 4.4978820e-02, 5.4754537e-06, 1.8970979e-02, 2.9431484e-04]]
    predicted_class_index = np.argmax(predictions[0])
#     predicted_class_name = class_names[predicted_class_index]
#     print("Predicted class:", predicted_class_name)
#     return predicted_class_name
    
    

    # Convert the predictions to a NumPy array
#     predictions_array = np.array(pred)

    # Calculate the percentages
#     predicted_percentages = predictions_array * 100

#     normalized_predictions = (predictions_array / np.sum(predictions_array)) * 100
#     print(normalized_predictions)

    # Calculate the percentage based on the maximum predicted probability
    predicted_probability = predictions[0][predicted_class_index]
    predicted_percentage = predicted_probability * 100
#     max_predicted_probability = np.max(pred[0])
#     print(predicted_probability, max_predicted_probability)
#     predicted_percentage = (predicted_probability / max_predicted_probability) * 100
    print(predicted_percentage)
    rounded_percentage = round(predicted_percentage, 2)
    print(rounded_percentage)



#     print(results)

print(image_predict("/kaggle/input/disease4/ISIC_0066759.jpg",model=loaded_h5_model))

# **Retrain the model by using previous trained model**

# Get newly training, validation and testing datasets

In [None]:
# Specify the folders you want to copy
new_train_dir = "new_train_data"
folders_to_retrain = ["10. Warts Molluscum and other Viral Infections - 2103","7. Psoriasis pictures Lichen Planus and related diseases - 2k"]  # Replace with the actual folder names
num_classification = len(folders_to_retrain)
generate_output_dir(output=new_train_dir)
generate_datasets(folders_to_retrain, loc=input_location, output=new_train_dir)

In [None]:

train_dir_re = f"./{new_train_dir}/train"
test_dir_re =  f"./{new_train_dir}/test"
val_dir_re = f"./{new_train_dir}/val"

train_data_re, test_data_re, val_data_re = generate_train_test_validation(train_dir_re, test_dir_re, val_dir_re)
train_data_re, test_data_re, val_data_re = auto_tune_data(train_data_re, test_data_re, val_data_re)

In [None]:
# Compile the loaded model with new configurations
compile_model(loaded_h5_model)

# Assuming you have new_train_data and new_val_data for retraining
history_retrain = loaded_h5_model.fit(
    new_train_data,
    epochs=15,
    steps_per_epoch=len(new_train_data),
    validation_data=new_val_data,
    validation_steps=int(0.25 * len(new_val_data)),
    callbacks=[early_stop, reduce_lr]
)


In [None]:
# get versions of imported packages
# import some packages to check

import pkg_resources
import types
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            # Split ensures you get root package, 
            # not just imported function
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]

        # Some packages are weird and have different
        # imported names vs. system names
        if name == "PIL":
            name = "Pillow"
        elif name == "sklearn":
            name = "scikit-learn"

        yield name
imports = list(set(get_imports()))

requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

for r in requirements:
    print("{}=={}".format(*r))

In [None]:
!pip show protobuf