In [None]:
!mkdir -p data
!wget https://isic-challenge-data.s3.amazonaws.com/2020/ISIC_2020_Training_JPEG.zip
!mv ISIC_2020_Training_JPEG.zip data/jpeg.zip
!unzip data/jpeg.zip -d data/jpeg
!rename.ul jpg jpeg data/jpeg/train/*.jpg
!wget https://isic-challenge-data.s3.amazonaws.com/2020/ISIC_2020_Training_GroundTruth.csv
!mv ISIC_2020_Training_GroundTruth.csv data/train.csv
!rm data/jpeg.zip


In [1]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
from model import MyModel
from data_utils import *


2022-01-20 12:19:14.025843: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
DATA_DIR = 'data'
IMAGES_DIR = os.path.join(DATA_DIR, 'jpeg', 'train')
TRAIN_CSV = os.path.join(DATA_DIR, 'train.csv')
TF_PREFIX = 'train'
TRAIN_SPLIT = 0.9
SMALL_SPLIT = 0.01
BATCH_SIZE = 32
IMAGE_SIZE = (224, 224)



In [3]:
np.random.seed(0)
original_df = pd.read_csv(TRAIN_CSV).sample(frac=1).reset_index(drop=True)
original_df.set_index('image_name', inplace = True)

validation_df = original_df.iloc[:2000]
original_df = original_df.iloc[2000:]

split_point = int(len(original_df) * TRAIN_SPLIT)
small_split_point = int(len(original_df) * SMALL_SPLIT)

train_df = original_df.iloc[:split_point]
test_df = original_df[split_point:]
small_df = original_df[:small_split_point]

train_df.shape, test_df.shape, small_df.shape, validation_df.shape


((28013, 7), (3113, 7), (311, 7), (2000, 7))

In [4]:
transformer = CsvTransformer(TRAIN_CSV)


In [9]:
def map_fn(filename):
    image = tf.image.decode_jpeg(tf.io.read_file(filename))
    image = tf.image.resize(image, IMAGE_SIZE)

    image_name = tf.strings.split(filename, sep='/')[-1]
    image_name = tf.strings.split(image_name, sep='.')[0]
    data = transformer.get_data_vector(image_name)
    target = tf.cast(transformer.get_vector_from_image_name('target', image_name), dtype=tf.int32)

    return {"image": image, 
            "image_name": image_name,
            "data": data}, target

def get_dataset(df: pd.DataFrame, images_dir, batch_size: int, cache = True):
    filenames = images_dir + '/' + df.index.values + ".jpeg"

    ds = tf.data.Dataset.from_tensor_slices(filenames)
    ds = ds.map(map_fn)
    if cache:
        ds = ds.cache()
    ds = ds.prefetch(tf.data.AUTOTUNE).batch(batch_size)

    return ds

In [10]:
#train_dataset = get_dataset(train_df, BATCH_SIZE, cache = True)
small_train_dataset = get_dataset(train_df.iloc[:2000], IMAGES_DIR, BATCH_SIZE)
test_dataset = get_dataset(test_df, IMAGES_DIR, BATCH_SIZE)
small_dataset = get_dataset(small_df, IMAGES_DIR, BATCH_SIZE)
validation_dataset = get_dataset(validation_df, IMAGES_DIR, BATCH_SIZE, cache = False)

In [11]:
dfs = [(test_df, "test"), (small_df, "small")]

for df, df_name in dfs:

    neg, pos = np.bincount(df['target'])
    total = neg + pos
    print('{} Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
        df_name, total, pos, 100 * pos / total))


test Examples:
    Total: 3113
    Positive: 52 (1.67% of total)

small Examples:
    Total: 311
    Positive: 3 (0.96% of total)



In [12]:
weights_model = MyModel.create_standard_version(load_weights_path="weights/", compile=True)
weights_model



<model.MyModel at 0x7f9429bc5fa0>

In [None]:
weights_model.evaluate(test_dataset)

2022-01-20 12:22:57.367733: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)




In [None]:
weights_model.evaluate(validation_dataset)

In [None]:

def get_stats(dataset, threshold):
    predictions, targets = [], []
    for el, target in dataset:
        preds = weights_model.predict(el)
        predictions.append(preds)
        targets.append(target.numpy())
    
    predictions, targets = np.vstack(predictions), np.expand_dims(np.hstack(targets), 1)
    print(f"pred stats - mean: {predictions.mean()}, std: {predictions.std()}")
    deciles = np.percentile(predictions, np.arange(10, 100, 10))
    print(f"pred deciles: {deciles}")
    thresh_predictions = (predictions > threshold).astype("int")
    res = tf.math.confusion_matrix(labels=targets.flatten(), predictions=thresh_predictions.flatten())
    true_positives, false_positives, true_negatives, false_negatives = res[1, 1], res[0, 1], res[0, 0], res[1, 0]
    print("true_positives: %d, false_positives: %d, true_negatives: %d, false_negatives: %d" % (true_positives, false_positives, true_negatives, false_negatives))
    sensitivity = true_positives / (true_positives + false_negatives) * 100.0
    specificity = true_negatives /(true_negatives + false_positives) * 100.0

    print(f"sensitivity: {sensitivity:.2f}%, specificity: {specificity:.2f}%")

In [None]:
get_stats(dataset = small_train_dataset, threshold = 0.023)

In [None]:
get_stats(dataset = test_dataset, threshold = 0.023)

In [None]:
get_stats(dataset = validation_dataset, threshold = 0.023)