# Package

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
import pickle
import time
import itertools
import PIL
from tqdm import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, LeakyReLU
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import to_categorical

from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger

from sklearn.metrics import confusion_matrix






In [2]:
IMG_SIZE = 224

# Preprocess Data

## Training Data

In [None]:
DATADIR = r"C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\Training Data"
CATEGORIES = ["Kanker", "Non-Kanker"]

In [None]:
training_data = []


def create_training_data():
    for category in CATEGORIES:

        path = os.path.join(DATADIR,category)  # create path to class
        class_num = CATEGORIES.index(category)  # get the classification  (0,1,2,etc)

        for img in tqdm(os.listdir(path)):  # iterate over each image per class
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_COLOR)  # convert to array
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
                training_data.append([new_array, class_num])  # add this to our training_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass
            #except OSError as e:
            #    print("OSErrroBad img most likely", e, os.path.join(path,img))
            #except Exception as e:
            #    print("general exception", e, os.path.join(path,img))

create_training_data()

In [None]:
print(len(training_data))

In [None]:
#Shuffle training data
random.shuffle(training_data)

In [None]:
#Turning into matrix for feature and label
X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)


X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
y = np.array(y)

In [None]:
print(X.shape)
print(y.shape)

## Test Data

In [None]:
DATADIR_TEST = r"C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\Test Data"
CATEGORIES = ["Kanker", "Non-Kanker"]

In [None]:
test_data = []


def create_test_data():
    for category in CATEGORIES:

        path = os.path.join(DATADIR_TEST,category)  # create path to class
        class_num = CATEGORIES.index(category)  # get the classification  (0,1,2,etc)

        for img in tqdm(os.listdir(path)):  # iterate over each image per class
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_COLOR)  # convert to array
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
                test_data.append([new_array, class_num])  # add this to our training_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass
            #except OSError as e:
            #    print("OSErrroBad img most likely", e, os.path.join(path,img))
            #except Exception as e:
            #    print("general exception", e, os.path.join(path,img))

create_test_data()

In [None]:
print(len(test_data))

In [None]:
#Turning into matrix for feature and label
X_Test= []
y_Test = []

for features,label in test_data:
    X_Test.append(features)
    y_Test.append(label)


X_Test= np.array(X_Test).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
y_Test = np.array(y_Test)

In [None]:
print(X_Test.shape)
print(y_Test.shape)

# Save Data

In [None]:
#Training Data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_out = open(path + "X.pickle.224","wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open(path + "y.pickle.224","wb")
pickle.dump(y, pickle_out)
pickle_out.close()

In [None]:
#Test Data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_out = open(path + "X.pickle.224.Test","wb")
pickle.dump(X_Test, pickle_out)
pickle_out.close()

pickle_out = open(path + "y.pickle.224.Test","wb")
pickle.dump(y_Test, pickle_out)
pickle_out.close()

# Load Data

In [3]:
#Training data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_in = open(path + "X.pickle.224","rb")
X = pickle.load(pickle_in)

pickle_in = open(path + "y.pickle.224","rb")
y = pickle.load(pickle_in)

In [4]:
#Test data
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Skripsi dataset\Undersampling Training 4515 (NV 6705-5500=1205)\Pembagian Berdasarkan Kanker atau Tidak\\'
pickle_in = open(path + "X.pickle.224.Test","rb")
X_Test = pickle.load(pickle_in)

pickle_in = open(path + "y.pickle.224.Test","rb")
y_Test = pickle.load(pickle_in)

# Build CNN Architecture

In [5]:
#Normalize Data
X = X/255.0

In [7]:
model = Sequential()
pretrained_model = tf.keras.applications.ResNet50V2(include_top = False,
                                                 input_shape = (224,224,3),
                                                  pooling = 'max', classes = 2,
                                                  weights = None)

for layer in pretrained_model.layers:
    layer.trainable = True
    
model.add(pretrained_model)
model.add(Flatten())
model.add(Dense (1000, activation = 'relu'))
model.add(Dense (2 , activation = 'softmax'))




In [8]:
pretrained_model.summary()

Model: "resnet50v2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 pool1_pad (ZeroPadding2D)   (None, 114, 114, 64)         0         ['conv1_conv[0][0]']          
                                                                                         

 tchNormalization)                                                                                
                                                                                                  
 conv2_block3_preact_relu (  (None, 56, 56, 256)          0         ['conv2_block3_preact_bn[0][0]
 Activation)                                                        ']                            
                                                                                                  
 conv2_block3_1_conv (Conv2  (None, 56, 56, 64)           16384     ['conv2_block3_preact_relu[0][
 D)                                                                 0]']                          
                                                                                                  
 conv2_block3_1_bn (BatchNo  (None, 56, 56, 64)           256       ['conv2_block3_1_conv[0][0]'] 
 rmalization)                                                                                     
          

 conv3_block2_1_relu (Activ  (None, 28, 28, 128)          0         ['conv3_block2_1_bn[0][0]']   
 ation)                                                                                           
                                                                                                  
 conv3_block2_2_pad (ZeroPa  (None, 30, 30, 128)          0         ['conv3_block2_1_relu[0][0]'] 
 dding2D)                                                                                         
                                                                                                  
 conv3_block2_2_conv (Conv2  (None, 28, 28, 128)          147456    ['conv3_block2_2_pad[0][0]']  
 D)                                                                                               
                                                                                                  
 conv3_block2_2_bn (BatchNo  (None, 28, 28, 128)          512       ['conv3_block2_2_conv[0][0]'] 
 rmalizati

                                                                                                  
 conv3_block4_3_conv (Conv2  (None, 14, 14, 512)          66048     ['conv3_block4_2_relu[0][0]'] 
 D)                                                                                               
                                                                                                  
 conv3_block4_out (Add)      (None, 14, 14, 512)          0         ['max_pooling2d_1[0][0]',     
                                                                     'conv3_block4_3_conv[0][0]'] 
                                                                                                  
 conv4_block1_preact_bn (Ba  (None, 14, 14, 512)          2048      ['conv3_block4_out[0][0]']    
 tchNormalization)                                                                                
                                                                                                  
 conv4_blo

 D)                                                                 0]']                          
                                                                                                  
 conv4_block3_1_bn (BatchNo  (None, 14, 14, 256)          1024      ['conv4_block3_1_conv[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv4_block3_1_relu (Activ  (None, 14, 14, 256)          0         ['conv4_block3_1_bn[0][0]']   
 ation)                                                                                           
                                                                                                  
 conv4_block3_2_pad (ZeroPa  (None, 16, 16, 256)          0         ['conv4_block3_1_relu[0][0]'] 
 dding2D)                                                                                         
          

 conv4_block5_2_relu (Activ  (None, 14, 14, 256)          0         ['conv4_block5_2_bn[0][0]']   
 ation)                                                                                           
                                                                                                  
 conv4_block5_3_conv (Conv2  (None, 14, 14, 1024)         263168    ['conv4_block5_2_relu[0][0]'] 
 D)                                                                                               
                                                                                                  
 conv4_block5_out (Add)      (None, 14, 14, 1024)         0         ['conv4_block4_out[0][0]',    
                                                                     'conv4_block5_3_conv[0][0]'] 
                                                                                                  
 conv4_block6_preact_bn (Ba  (None, 14, 14, 1024)         4096      ['conv4_block5_out[0][0]']    
 tchNormal

                                                                                                  
 conv5_block2_preact_relu (  (None, 7, 7, 2048)           0         ['conv5_block2_preact_bn[0][0]
 Activation)                                                        ']                            
                                                                                                  
 conv5_block2_1_conv (Conv2  (None, 7, 7, 512)            1048576   ['conv5_block2_preact_relu[0][
 D)                                                                 0]']                          
                                                                                                  
 conv5_block2_1_bn (BatchNo  (None, 7, 7, 512)            2048      ['conv5_block2_1_conv[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv5_blo

In [9]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50v2 (Functional)     (None, 2048)              23564800  
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 1000)              2049000   
                                                                 
 dense_1 (Dense)             (None, 2)                 2002      
                                                                 
Total params: 25615802 (97.72 MB)
Trainable params: 25570362 (97.54 MB)
Non-trainable params: 45440 (177.50 KB)
_________________________________________________________________


In [10]:
#ModelCheckpoint callback saves a model at some interval. 
filepath=r"C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Undersampling Training 4515 (NV 6705-5500=1205)\weights-improvement-{epoch:02d}-{val_recall:.2f}.hdf5" #File name includes epoch and validation accuracy.
#Use Mode = max for accuracy and min for loss. 
checkpoint = ModelCheckpoint(filepath, monitor='val_recall', verbose=1, save_best_only=True, mode='max')


early_stop = EarlyStopping(monitor='val_recall', patience=10, verbose=1, mode='max')

#CSVLogger logs epoch, acc, loss, val_acc, val_loss
log_csv = CSVLogger('Resnet 50V2, Batch 32, Val 0.2, W=None.csv', separator=',', append=False)

callbacks_list = [checkpoint, early_stop, log_csv]
#callbacks_list = [checkpoint, log_csv]

In [11]:
model.compile(loss='categorical_crossentropy',
              optimizer='Adam',
              metrics=['Recall'],
              )

y_one_hot = to_categorical(y, num_classes=2)

model.fit(X, y_one_hot,
          batch_size=32,
          epochs=100,
          validation_split=0.2,
          callbacks=callbacks_list)




MemoryError: Unable to allocate 4.05 GiB for an array with shape (3612, 224, 224, 3) and data type float64

# Load Model Built

In [None]:
path = r'C:\Users\Z6\Desktop\Skripsi 10120060 (Jangan Sentuh)\Model\Pembagian Berdasarkan Kanker atau Tidak\Resnet50. Size 224, Batch 32, FC 1000, Iter 125, val 0.2\\'
model_path = path + "weights-improvement-10-0.85" + ".hdf5"
model = tf.keras.models.load_model(model_path)

# Predict

## Predict From Existed and Prepared Data

In [None]:
X_Test = X_Test/255.0

In [None]:
predictions = model.predict(x=X_Test, steps=len(X_Test), verbose=1)
print("")
print(predictions)
print("")
print(f'Prediction shape: {predictions.shape}')

In [None]:
#Confusion matrix Berdasarkan Penyakit
cm = confusion_matrix(y_true=y_Test, y_pred=np.argmax(predictions, axis=-1))

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix')

    print(cm)
    print("")

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


cm_plot_labels = ["Kanker", "Non-Kanker"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

In [None]:
def calculate_accuracy(confusion_matrix):
    correct_predictions = np.sum(np.diag(confusion_matrix))
    total_predictions = np.sum(confusion_matrix)
    accuracy = correct_predictions / total_predictions
    return accuracy

# Assuming 'cm' is the confusion matrix obtained
accuracy = calculate_accuracy(cm)
print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
from sklearn.metrics import classification_report

# Assuming 'cm' is the confusion matrix obtained
class_names = ["Kanker", "Non-Kanker"]

# Calculate precision, recall, and F1-score for each class
classification_rep = classification_report(y_true=y_Test, y_pred=np.argmax(predictions, axis=-1),
                                           target_names=class_names)

print("Classification Report:")
print(classification_rep)

#Precision = TP/(TP + FP)
#Recall = TP/(TP +  FN)

## Predict Input From User  (Belum Beres)

In [None]:
#Harusnya bukan begini karena ini bukan colab
input_predict = files.upload()

In [None]:
CATEGORIES = ["Kanker", "Non-Kanker"]
def prepare(content):
    img_array = cv2.imdecode(np.frombuffer(content, np.uint8), cv2.IMREAD_COLOR)
    new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    new_array = new_array.reshape(-1, IMG_SIZE, IMG_SIZE, 3)
    normalized_data = new_array / 255.0
    return normalized_data

file_content = next(iter(input_predict.values()))
prediction = model.predict([prepare(file_content)])

In [None]:
print(f'Kategori yang ada = {CATEGORIES}')
print(f'Peluang Kelas = {prediction}')
print("")

predicted_class = np.argmax(prediction)

#akiec, bcc, mel
if predicted_class == 0 or predicted_class == 1 or predicted_class == 4:
  kanker = "kanker"
  print(f'Prediksi dari gambar berkelas {CATEGORIES[predicted_class]}, {kanker}.')
#bkl, df, nv, vasc
else:
  kanker = "non-kanker"
  print(f'Prediksi dari gambar adalah {CATEGORIES[predicted_class]}, {kanker}.')