# Package

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
import pickle
import time
import itertools
import PIL
from tqdm import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, LeakyReLU
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import to_categorical

from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger

from sklearn.metrics import confusion_matrix






In [2]:
IMG_SIZE = 64

# Preprocess Data

## Training Data

In [3]:
DATADIR = r"C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\Training Data"
CATEGORIES = ["Kanker", "Non-Kanker"]

In [4]:
training_data = []


def create_training_data():
    for category in CATEGORIES:

        path = os.path.join(DATADIR,category)  # create path to class
        class_num = CATEGORIES.index(category)  # get the classification  (0,1,2,etc)

        for img in tqdm(os.listdir(path)):  # iterate over each image per class
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_COLOR)  # convert to array
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
                training_data.append([new_array, class_num])  # add this to our training_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass
            #except OSError as e:
            #    print("OSErrroBad img most likely", e, os.path.join(path,img))
            #except Exception as e:
            #    print("general exception", e, os.path.join(path,img))

create_training_data()

100%|██████████████████████████████████████████████████████████████████████████████| 1954/1954 [00:41<00:00, 47.34it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2561/2561 [00:55<00:00, 45.84it/s]


In [5]:
print(len(training_data))

4515


In [6]:
#Shuffle training data
random.shuffle(training_data)

In [7]:
#Turning into matrix for feature and label
X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)


X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
y = np.array(y)

In [8]:
print(X.shape)
print(y.shape)

(4515, 64, 64, 3)
(4515,)


## Test Data

In [9]:
DATADIR_TEST = r"C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\Test Data"
CATEGORIES = ["Kanker", "Non-Kanker"]

In [10]:
test_data = []


def create_test_data():
    for category in CATEGORIES:

        path = os.path.join(DATADIR_TEST,category)  # create path to class
        class_num = CATEGORIES.index(category)  # get the classification  (0,1,2,etc)

        for img in tqdm(os.listdir(path)):  # iterate over each image per class
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_COLOR)  # convert to array
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
                test_data.append([new_array, class_num])  # add this to our training_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass
            #except OSError as e:
            #    print("OSErrroBad img most likely", e, os.path.join(path,img))
            #except Exception as e:
            #    print("general exception", e, os.path.join(path,img))

create_test_data()

100%|████████████████████████████████████████████████████████████████████████████████| 307/307 [00:06<00:00, 45.04it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1205/1205 [00:25<00:00, 46.75it/s]


In [11]:
print(len(test_data))

1512


In [12]:
#Turning into matrix for feature and label
X_Test= []
y_Test = []

for features,label in test_data:
    X_Test.append(features)
    y_Test.append(label)


X_Test= np.array(X_Test).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
y_Test = np.array(y_Test)

In [13]:
print(X_Test.shape)
print(y_Test.shape)

(1512, 64, 64, 3)
(1512,)


# Save Data

In [14]:
#Training Data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_out = open(path + "X.pickle.64","wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open(path + "y.pickle.64","wb")
pickle.dump(y, pickle_out)
pickle_out.close()

In [15]:
#Test Data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_out = open(path + "X.pickle.64.Test","wb")
pickle.dump(X_Test, pickle_out)
pickle_out.close()

pickle_out = open(path + "y.pickle.64.Test","wb")
pickle.dump(y_Test, pickle_out)
pickle_out.close()

# Load Data

In [16]:
#Training data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_in = open(path + "X.pickle.224","rb")
X = pickle.load(pickle_in)

pickle_in = open(path + "y.pickle.224","rb")
y = pickle.load(pickle_in)

In [7]:
#Test data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_in = open(path + "X.pickle.224.Test","rb")
X_Test = pickle.load(pickle_in)

pickle_in = open(path + "y.pickle.224.Test","rb")
y_Test = pickle.load(pickle_in)

# Build CNN Architecture

In [17]:
#Normalize Data
X = X/255.0

In [18]:
y_one_hot = to_categorical(y, num_classes=2)

In [66]:
NAME = "Deteksi Kanker Berdasarkan Penyakit-CNN-{}".format(int(time.time()))

model = Sequential([
    Conv2D(filters=64, kernel_size=(3, 3), activation=LeakyReLU(alpha=0.01), padding = 'same', strides = 2, input_shape=(X.shape[1:])),
    AveragePooling2D(pool_size=(3, 3), strides=2),
    Conv2D(filters=128, kernel_size=(3, 3), activation=LeakyReLU(alpha=0.01), padding = 'same', strides = 2, input_shape=(X.shape[1:])),
    AveragePooling2D(pool_size=(3, 3), strides=2),
    Flatten(),
    Dense(units=64, activation=LeakyReLU(alpha=0.01)),
    Dense(units=128, activation=LeakyReLU(alpha=0.01)),
    Dense(units=256, activation=LeakyReLU(alpha=0.01)),
    Dense(units=2, activation='softmax')
])

In [67]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 112, 112, 64)      1792      
                                                                 
 average_pooling2d_24 (Aver  (None, 55, 55, 64)        0         
 agePooling2D)                                                   
                                                                 
 conv2d_29 (Conv2D)          (None, 28, 28, 128)       73856     
                                                                 
 average_pooling2d_25 (Aver  (None, 13, 13, 128)       0         
 agePooling2D)                                                   
                                                                 
 flatten_13 (Flatten)        (None, 21632)             0         
                                                                 
 dense_63 (Dense)            (None, 64)              

In [68]:
#ModelCheckpoint callback saves a model at some interval. 
filepath=r"C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705-5500=1205)\weights-improvement-{epoch:02d}-{val_recall:.2f}.hdf5" #File name includes epoch and validation accuracy.
#Use Mode = max for accuracy and min for loss. 
checkpoint = ModelCheckpoint(filepath, monitor='val_recall', verbose=1, save_best_only=True, mode='max')


early_stop = EarlyStopping(monitor='val_recall', patience=10, verbose=1, mode='max')

#CSVLogger logs epoch, acc, loss, val_acc, val_loss
log_csv = CSVLogger('Model sendiri 6.csv', separator=',', append=False)

callbacks_list = [checkpoint, early_stop, log_csv]
#callbacks_list = [checkpoint, log_csv]

In [69]:
model.compile(loss='categorical_crossentropy',
              optimizer='Adam',
              metrics=['Recall'],
              )

model.fit(X, y_one_hot,
          batch_size=32,
          epochs=100,
          validation_split=0.2,
          callbacks=callbacks_list)

Epoch 1/100
Epoch 1: val_recall improved from -inf to 0.42525, saving model to C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705-5500=1205)\weights-improvement-01-0.43.hdf5
Epoch 2/100
Epoch 2: val_recall improved from 0.42525 to 0.58583, saving model to C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705-5500=1205)\weights-improvement-02-0.59.hdf5
Epoch 3/100
Epoch 3: val_recall improved from 0.58583 to 0.60465, saving model to C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705-5500=1205)\weights-improvement-03-0.60.hdf5
Epoch 4/100
Epoch 4: val_recall improved from 0.60465 to 0.61794, saving model to C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705

Epoch 24/100
Epoch 24: val_recall did not improve from 0.70210
Epoch 25/100
Epoch 25: val_recall improved from 0.70210 to 0.70432, saving model to C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705-5500=1205)\weights-improvement-25-0.70.hdf5
Epoch 26/100
Epoch 26: val_recall improved from 0.70432 to 0.72868, saving model to C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705-5500=1205)\weights-improvement-26-0.73.hdf5
Epoch 27/100
Epoch 27: val_recall did not improve from 0.72868
Epoch 28/100
Epoch 28: val_recall did not improve from 0.72868
Epoch 29/100
Epoch 29: val_recall did not improve from 0.72868
Epoch 30/100
Epoch 30: val_recall did not improve from 0.72868
Epoch 31/100
Epoch 31: val_recall did not improve from 0.72868
Epoch 32/100
Epoch 32: val_recall did not improve from 0.72868
Epoch 33/100
Epoch 33: val_reca

<keras.src.callbacks.History at 0x2da5d2732e0>

# Load Model Built

In [None]:
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Resnet50. Size 224, Batch 32, FC 1000, Iter 125, val 0.2\\'
model_path = path + "weights-improvement-10-0.85" + ".hdf5"
model = tf.keras.models.load_model(model_path)

# Predict

## Predict From Existed and Prepared Data

In [None]:
X_Test = X_Test/255.0

In [None]:
predictions = model.predict(x=X_Test, steps=len(X_Test), verbose=1)
print("")
print(predictions)
print("")
print(f'Prediction shape: {predictions.shape}')

In [None]:
#Confusion matrix Berdasarkan Penyakit
cm = confusion_matrix(y_true=y_Test, y_pred=np.argmax(predictions, axis=-1))

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix')

    print(cm)
    print("")

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


cm_plot_labels = ["Kanker", "Non-Kanker"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

In [None]:
def calculate_accuracy(confusion_matrix):
    correct_predictions = np.sum(np.diag(confusion_matrix))
    total_predictions = np.sum(confusion_matrix)
    accuracy = correct_predictions / total_predictions
    return accuracy

# Assuming 'cm' is the confusion matrix obtained
accuracy = calculate_accuracy(cm)
print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
from sklearn.metrics import classification_report

# Assuming 'cm' is the confusion matrix obtained
class_names = ["Kanker", "Non-Kanker"]

# Calculate precision, recall, and F1-score for each class
classification_rep = classification_report(y_true=y_Test, y_pred=np.argmax(predictions, axis=-1),
                                           target_names=class_names)

print("Classification Report:")
print(classification_rep)

#Precision = TP/(TP + FP)
#Recall = TP/(TP +  FN)

## Predict Input From User  (Belum Beres)

In [None]:
#Harusnya bukan begini karena ini bukan colab
input_predict = files.upload()

In [None]:
CATEGORIES = ["Kanker", "Non-Kanker"]
def prepare(content):
    img_array = cv2.imdecode(np.frombuffer(content, np.uint8), cv2.IMREAD_COLOR)
    new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    new_array = new_array.reshape(-1, IMG_SIZE, IMG_SIZE, 3)
    normalized_data = new_array / 255.0
    return normalized_data

file_content = next(iter(input_predict.values()))
prediction = model.predict([prepare(file_content)])

In [None]:
print(f'Kategori yang ada = {CATEGORIES}')
print(f'Peluang Kelas = {prediction}')
print("")

predicted_class = np.argmax(prediction)

#akiec, bcc, mel
if predicted_class == 0 or predicted_class == 1 or predicted_class == 4:
  kanker = "kanker"
  print(f'Prediksi dari gambar berkelas {CATEGORIES[predicted_class]}, {kanker}.')
#bkl, df, nv, vasc
else:
  kanker = "non-kanker"
  print(f'Prediksi dari gambar adalah {CATEGORIES[predicted_class]}, {kanker}.')