In [1]:
import os
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models
from tqdm import tqdm
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Precision, Recall, AUC
# from sklearn.model_selection import train_test_split

In [2]:
#Function to preprocess image - resize and normalize
def preprocess_image(image_path, target_size=(224, 224)):
    image = cv2.imread(image_path) #Read the image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #Convert BGR to RGB
    image = cv2.resize(image, target_size) #Resize the image
    image = image.astype(np.float32) / 255.0  #Normalize pixel values between 0 and 1
    return image

In [3]:
# Load the metadata file
df = pd.read_csv('/Users/arberite/Desktop/Assigment/dataset/metadata.csv', index_col = 0)

In [4]:
#Create Train Sets

#Set the directory path for the training images
train_dir_path = '/Users/arberite/Desktop/Assigment/dataset/images/train/'

#Preprocess the training images and labels
X_train = []
y_train = []
for dir_item in tqdm(os.listdir(train_dir_path)):
    image_id = dir_item.split('.')[0]  # Extract image id
    lesion_type = df.loc[image_id]['lesion_type']  # Get lesion type based on image id
    image_path = os.path.join(train_dir_path, dir_item)  # Get image path
    X_train.append(preprocess_image(image_path))  # Append preprocessed image to list
    y_train.append(lesion_type)  # Append lesion type to list
    
# Convert lists to numpy arrays    
X_train = np.array(X_train)
y_train = np.array(y_train)

100%|██████████████████████████████████████| 8515/8515 [00:37<00:00, 227.07it/s]


In [5]:
#Create Test Sets

#Set the directory path for the testing images
test_dir_path = '/Users/arberite/Desktop/Assigment/dataset/images/test/'

#Preprocess the testing images and labels
X_test = []
y_test = []
for dir_item in tqdm(os.listdir(test_dir_path)):
    image_id = dir_item.split('.')[0]
    lesion_type = df.loc[image_id]['lesion_type']
    
    #print(image_id, lesion_type)
    image_path = os.path.join(test_dir_path, dir_item)
    X_test.append(preprocess_image(image_path))
    y_test.append(lesion_type)
    
X_test = np.array(X_test)
y_test = np.array(y_test)

100%|██████████████████████████████████████| 1000/1000 [00:04<00:00, 232.29it/s]


In [6]:
#Create Validation Sets

#Set the directory path for the validation images
validation_dir_path = '/Users/arberite/Desktop/Assigment/dataset/images/validation/'

X_val = []
y_val = []
for dir_item in tqdm(os.listdir(validation_dir_path)):
    image_id = dir_item.split('.')[0]
    lesion_type = df.loc[image_id]['lesion_type']
    
    #print(image_id, lesion_type)
    image_path = os.path.join(validation_dir_path, dir_item)
    X_val.append(preprocess_image(image_path))
    y_val.append(lesion_type)
    
X_val = np.array(X_val)
y_val = np.array(y_val)

100%|████████████████████████████████████████| 500/500 [00:02<00:00, 233.16it/s]


In [7]:
# Function to create the CNN model
def create_simple_cnn(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

In [8]:
#Get input shape and number of classes
input_shape = X_train.shape[1:]
num_classes = len(np.unique(y_train))

In [9]:
#Create the model
model = create_simple_cnn(input_shape, num_classes)

In [10]:
#Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [11]:
#Summary of the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 26, 26, 128)       0

In [12]:
# Train the model
batch_size = 64
epochs = 2
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val))
y_test_pred = model.predict(X_test)

Epoch 1/2
Epoch 2/2


In [13]:
#Make predictions on the test set
y_test_pred = model.predict(X_test)



In [14]:
#Get the predicted labels
y_test_pred_labels = np.argmax(y_test_pred, axis=1)

In [15]:
# Save the trained model
model.save('prediction_model.h5') 

  saving_api.save_model(


In [18]:
print("Pandas version:", pd.__version__)
print("Numpy version:", np.__version__)
print("OpenCV version:", cv2.__version__)
print("TensorFlow version:", tf.__version__)

Pandas version: 1.5.3
Numpy version: 1.24.3
OpenCV version: 4.8.0
TensorFlow version: 2.13.0
