In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd '/content/drive/MyDrive/Image Retrieval'
!ls

/content/drive/MyDrive/Image Retrieval
 augmentation.ipynb
 binary_scenario
 binary_scenario_augmented_400X
 checkpoint
 citra_split.ipynb
 comparing_graph.ipynb
 comparing_retrieval.ipynb
 core
'data binary splitting.ipynb'
'data subclass splitting.ipynb'
 extract_feature_binary.ipynb
 extract_feature_subclass.ipynb
 original
 paper
 reconstruction
 retrieval_1_100X.ipynb
 retrieval_1_100X_subclass.ipynb
 retrieval_1_200X.ipynb
 retrieval_1_200X_subclass.ipynb
 retrieval_1_400X.ipynb
 retrieval_1_400X_subclass.ipynb
 retrieval_1_40X_subclass.ipynb
 subclass_400
 subclass_scenario
 train_auto_encoder_magnification_1_100.ipynb
 train_auto_encoder_magnification_1_200.ipynb
 train_auto_encoder_magnification_1_400.ipynb
 train_auto_encoder_magnification_1_40.ipynb
 train_auto_encoder_subclass_1_100.ipynb
 train_auto_encoder_subclass_1_200.ipynb
 train_auto_encoder_subclass_1_400.ipynb
 train_auto_encoder_subclass_1_40.ipynb
 training_1_100
 training_1_100.h5
 training_1_100.json
 training_

In [3]:
import copy
import numpy as np
import os
import json
import cv2
import sklearn.metrics as metric
from core.AutoEncoder1 import ConvAutoEncoder
from tensorflow.keras.models import Model
import pandas as pd

In [4]:
def euclidean(a, b):
	# compute and return the euclidean distance between two vectors
	return np.linalg.norm(a - b)

In [5]:
def perform_search(query_features, indexed_train, max_results=5):
	retrieved = []
	for idx in range(0, len(indexed_train["features"])):
		distance = euclidean(query_features, indexed_train["features"][idx])
		retrieved.append((distance, idx))
	retrieved = sorted(retrieved)[:max_results]
	return retrieved

In [6]:
base_dataset = "subclass_scenario"
magnification = "40X"
class_dir = ['tubular_adenoma', 'phyllodes_tumor', 'papillary_carcinoma',
             'mucinous_carcinoma', 'lobular_carcinoma', 'fibroadenoma',
             'ductal_carcinoma', 'adenosis']
IMAGE_SIZE = (256, 256)

In [7]:
print("[INFO] indexing file images BreaKHis dataset...")
# indexing file images
dataset = []
for class_item in class_dir:
    cur_dir = os.path.join(base_dataset, 'test', magnification ,class_item)
    for file in os.listdir(cur_dir):
        dataset.append(os.path.join(cur_dir, file))

[INFO] indexing file images BreaKHis dataset...


In [8]:
print("len to retrieving:", len(dataset))

len to retrieving: 199


In [9]:
print("[INFO] load images BreaKHis dataset...")
#  load images
images = []
for image_path in dataset:
    if ".png" in image_path:
        image = cv2.imread(image_path)
        image = cv2.resize(image, IMAGE_SIZE)
        images.append(image)

[INFO] load images BreaKHis dataset...


In [10]:
# normalization
print("[INFO] normalization...")
test_x = np.array(images).astype("float32") / 255.0

[INFO] normalization...


In [12]:
auto_encoder = ConvAutoEncoder.build(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
# load our auto_encoder from disk
print("[INFO] loading auto encoder model...")
auto_encoder.load_weights("training_1_40_subclass/cp.ckpt")
with open('training_1_indexed_40_subclass.json') as f:
  training_indexed = json.load(f)

[INFO] loading auto encoder model...


In [13]:
# create the encoder model which consists of *just* the encoder
# portion of the auto encoder
encoder = Model(inputs=auto_encoder.input,
	outputs=auto_encoder.get_layer("encoded").output)

# quantify the contents of our input images using the encoder
print("[INFO] encoding images...")
features_retrieved = encoder.predict(test_x)

[INFO] encoding images...


In [14]:
query_indexes = list(range(0, test_x.shape[0]))
label_builder = list(np.unique(training_indexed["labels"]))
class_builder = {label_unique:[] for label_unique in label_builder}
recalls = copy.deepcopy(class_builder)
precisions = copy.deepcopy(class_builder)
# loop over the testing indexes
for i in query_indexes:
    queryFeatures = features_retrieved[i]
    results = perform_search(queryFeatures, training_indexed, max_results=5)
    labels_ret = [training_indexed["labels"][r[1]] for r in results]
    label_true = dataset[i].split("/")[3]
    label_trues = [label_true for _ in labels_ret]
    recall = metric.recall_score(label_trues, labels_ret, average='weighted')
    precision = metric.precision_score(label_trues, labels_ret, average='weighted')
    recalls[label_true].append(recall)
    precisions[label_true].append(precision)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
print("recall values:")
comb_recall, comb_precision = [], []
for key in recalls.keys():
    average_val = np.average(recalls[key])
    print(key, average_val)
    comb_recall.append(average_val)
print("combined recall", np.average(comb_recall))

print("\nprecision values:")
for key in precisions.keys():
    average_val = np.average(precisions[key])
    print(key, average_val)
    comb_precision.append(average_val)
print("combined precision", np.average(comb_precision))



recall values:
adenosis 0.3272727272727273
ductal_carcinoma 0.791111111111111
fibroadenoma 0.72
lobular_carcinoma 0.49333333333333335
mucinous_carcinoma 0.32999999999999996
papillary_carcinoma 0.2
phyllodes_tumor 0.44000000000000006
tubular_adenoma 0.17142857142857146
combined recall 0.43414321789321786

precision values:
adenosis 0.7272727272727273
ductal_carcinoma 0.9555555555555556
fibroadenoma 0.84
lobular_carcinoma 0.8666666666666667
mucinous_carcinoma 0.85
papillary_carcinoma 0.35714285714285715
phyllodes_tumor 0.9
tubular_adenoma 0.5714285714285714
combined precision 0.7585082972582973
