## Preliminaries

### TensorFlow

Running `import tensorflow` will import the default version; currently 1.x. Version 2.x can be used by running a cell with the `tensorflow_version` magic **before** running `import tensorflow`.  For add-ons: `!pip install tensorflow-addons`



<br>

### Libraries



In [1]:
import tensorflow as tf
import tensorboard.plugins.hparams.api as hp

import logging
import typing
import sklearn.model_selection as model_selection

import numpy as np
import pandas as pd

import os
import requests
import io
import glob
import pathlib

import math
import datetime

import matplotlib.pyplot as plt
import matplotlib.image as image

import decimal

2023-09-30 12:31:01.084880: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-30 12:31:01.287397: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
tf.__version__

'2.13.0'

In [3]:
if os.path.basename(os.getcwd()) != 'patterns':
    root = str(pathlib.Path(os.getcwd()).parent)
    os.chdir(root)

In [4]:
os.path.basename(os.getcwd())

'patterns'

<br>
<br>

### Device

In [5]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

2023-09-30 12:31:05.560651: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:05.673850: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:05.673894: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:06.591680: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:06.591746: I tensorflow/compile

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 16005709848501014081
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 5652873216
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 8480120354395469103
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9"
 xla_global_id: 416903419]

<br>

### Logging

In [6]:
logging.basicConfig(level=logging.INFO,
                    format='%\n(message)s\n%(asctime)s.%(msecs)03d',
                    datefmt='%Y-%m-%d %H:%M:%S', force=True)
logger = logging.getLogger(__name__)

<br>

### Directories

Clean-up

In [7]:
%%bash

pathway=$(pwd)
echo $pathway

rm -rf $pathway/checkpoints && rm -rf $pathway/cache


/home/geometry/library/discourses/patterns


<br>

Create directories for (a) model checkpoint files, and (b) caches.

In [8]:
!mkdir -p checkpoints
!mkdir -p cache/training
!mkdir -p cache/validating

<br>
<br>

## Images

In [9]:
import src.elements.attributes
import src.elements.metadata
import src.elements.settings
import src.elements.source

import src.functions.streams

import src.modelling.splits
import src.modelling.pipeline

import src.sampling.interface


<br>

Instances

In [10]:
attributes = src.elements.attributes.Attributes()
metadata = src.elements.metadata.Metadata()
settings = src.elements.settings.Settings()
source = src.elements.source.Source()

<br>

Content

In [11]:
print(f'{attributes._fields},\n{metadata._fields},\n{settings._fields}, \n{source._fields}')

('ext', 'rows', 'columns', 'channels', 'rotations'),
('url', 'key', 'features', 'path', 'labels'),
('sample', 'replace', 'class_sample_size', 'epochs', 'random_state', 'minimum_class_instances', 'batch_size', 'train_size_initial', 'train_size_evaluation', 'early_stopping_patience', 'model_checkpoints_directory', 'error_matrix_variables'), 
('url', 'index_from', 'index_to', 'index_zero_filling', 'ext', 'directory')


<br>
<br>

### Background

In [12]:
%%bash

cd images
ls *.png | wc -l


62699


<br>
<br>

### Sample Of

In [13]:
sample = src.sampling.interface.Interface(settings=settings, metadata=metadata, source=source).exc()

In [14]:
sample.head()

Unnamed: 0,image_path,name,age_approx,anatom_site_general,sex,angle,drawn,image,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
0,images/ISIC_0070170-090.png,ISIC_0070170-090.png,65.0,lower extremity,male,90,1,ISIC_0070170,False,False,False,False,True,False,False,False
1,images/ISIC_0031457-180.png,ISIC_0031457-180.png,65.0,lower extremity,male,180,1,ISIC_0031457,False,False,False,True,False,False,False,False
2,images/ISIC_0072254-090.png,ISIC_0072254-090.png,40.0,lower extremity,male,90,1,ISIC_0072254,False,True,False,False,False,False,False,False
3,images/ISIC_0054606-180.png,ISIC_0054606-180.png,70.0,anterior torso,male,180,1,ISIC_0054606,False,False,False,False,False,False,True,False
4,images/ISIC_0065149-270.png,ISIC_0065149-270.png,75.0,anterior torso,male,270,1,ISIC_0065149,False,False,False,False,False,False,False,True


In [15]:
sample.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   image_path           4000 non-null   object 
 1   name                 4000 non-null   object 
 2   age_approx           4000 non-null   float64
 3   anatom_site_general  4000 non-null   object 
 4   sex                  4000 non-null   object 
 5   angle                4000 non-null   int64  
 6   drawn                4000 non-null   int64  
 7   image                4000 non-null   object 
 8   AK                   4000 non-null   bool   
 9   BCC                  4000 non-null   bool   
 10  BKL                  4000 non-null   bool   
 11  DF                   4000 non-null   bool   
 12  MEL                  4000 non-null   bool   
 13  NV                   4000 non-null   bool   
 14  SCC                  4000 non-null   bool   
 15  VASC                 4000 non-null   b

<br>
<br>

## Splitting

<br>

#### Partitions

In [16]:
partitions = src.modelling.splits.Splits(
    settings=settings, metadata=metadata).exc(sample=sample)

In [17]:
partitions.training.shape

(2800, 16)

<br>
<br>

#### Tensors

In [18]:
pipeline = src.modelling.pipeline.Pipeline(
    attributes=attributes, metadata=metadata, settings=settings)

In [19]:
training = pipeline.exc(data=partitions.training, testing=False)
validating = pipeline.exc(data=partitions.validating, testing=False)
testing = pipeline.exc(data=partitions.testing, testing=True)

2023-09-30 12:31:27.585530: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:27.585650: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:27.585669: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:27.585876: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-09-30 12:31:27.585901: I tensorflow/compile

In [20]:
training.element_spec, validating.element_spec, testing.element_spec

((TensorSpec(shape=(None, None, None, 3), dtype=tf.float32, name=None),
  TensorSpec(shape=(None, 8), dtype=tf.bool, name=None)),
 (TensorSpec(shape=(None, None, None, 3), dtype=tf.float32, name=None),
  TensorSpec(shape=(None, 8), dtype=tf.bool, name=None)),
 TensorSpec(shape=(None, None, None, 3), dtype=tf.float32, name=None))

In [21]:
type(training)

tensorflow.python.data.ops.prefetch_op._PrefetchDataset

<br>
<br>

## Key Variables

### Labels

In [22]:
labels = metadata.labels
labels

['AK', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'SCC', 'VASC']

<br>

### Fields

In [23]:
fields = np.setdiff1d(sample.columns, metadata.labels)
fields

array(['age_approx', 'anatom_site_general', 'angle', 'drawn', 'image',
       'image_path', 'name', 'sex'], dtype=object)

<br>

### Epochs, Batch Size, Image Size

Note

> MATRIX $\rightarrow$ input shape $\rightarrow$ (width, height, channels)

In [24]:
epochs = settings.epochs
BATCH_SIZE = settings.batch_size
MATRIX = (attributes.columns, attributes.rows, attributes.channels)

<br>
<br>

## CNN

### References

* https://keras.io/applications/
* https://www.tensorflow.org/tutorials/images/transfer_learning



<br>

### Base

Exclude the final Global Average Pooling Layer and the densely connected layer/s.

In [25]:
vgg = tf.keras.applications.VGG19(include_top=False, input_shape=MATRIX, weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


<br>

Settings

Case: Do not train the parameters of the transfered models

In [26]:
vgg.trainable = False

<br>

Summary thus far

* `vgg.summary()`
* `for layer in vgg.layers:  print(layer.name)`
* `[layer.name for layer in vgg.layers]`
* `tf.keras.utils.plot_model(vgg, show_shapes=True, expand_nested=True)`

In [27]:
vgg.summary()

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [28]:
vgg.input

<KerasTensor: shape=(None, 224, 224, 3) dtype=float32 (created by layer 'input_1')>

In [29]:
vgg.output

<KerasTensor: shape=(None, 7, 7, 512) dtype=float32 (created by layer 'block5_pool')>

<br>

### Final Segment: Flatten, FCD, FCD

*Particular*

In [30]:
flattening = tf.keras.layers.Flatten()

In [31]:
alpha = tf.keras.layers.Dense(512, activation='relu', name='Alpha')
alpha_drop = tf.keras.layers.Dropout(rate=0.3, name='AlphaDrop')
beta = tf.keras.layers.Dense(512, activation='relu', name='Beta')
beta_drop = tf.keras.layers.Dropout(rate=0.3, name='BetaDrop')

<br>

*Common*

In [32]:
classifier = tf.keras.layers.Dense(len(labels), activation=tf.nn.softmax)

<br>

### Model

In [33]:
tree = tf.keras.models.Sequential([vgg, flattening, alpha, alpha_drop,
                            beta, beta_drop, classifier])

In [34]:
model = tf.keras.models.Sequential([vgg, flattening, alpha, alpha_drop,
                            beta, beta_drop, classifier])

In [35]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg19 (Functional)          (None, 7, 7, 512)         20024384  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 Alpha (Dense)               (None, 512)               12845568  
                                                                 
 AlphaDrop (Dropout)         (None, 512)               0         
                                                                 
 Beta (Dense)                (None, 512)               262656    
                                                                 
 BetaDrop (Dropout)          (None, 512)               0         
                                                                 
 dense (Dense)               (None, 8)                

<br>

### Compile

In [36]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy())

<br>

### Fit Function Parameters

Epochs

In [37]:
print(f"Number of epochs: {epochs}")

Number of epochs: 60


<br>

Steps per epoch

> steps_per_epoch = steps_per_epoch if partitions.training.shape[0] % BATCH_SIZE == 0 else steps_per_epoch + 1



In [39]:
steps_per_epoch = math.ceil( partitions.training.shape[0]/BATCH_SIZE )
print(f'Training: {partitions.training.shape}')
print(f"Steps per epoch: {steps_per_epoch}")

Training: (2800, 16)
Steps per epoch: 88


<br>

Validation steps

In [None]:
validation_steps = math.ceil(partitions..shape[0] / BATCH_SIZE)
# validation_steps = validation_steps if validating_.shape[0] % BATCH_SIZE == 0 else validation_steps + 1
print(f'Validation: {validating_.shape}')
print(f"Validation Steps: {validation_steps}")

In [None]:
test_steps = steps=math.ceil(testing_.shape[0] / BATCH_SIZE)
# test_steps = test_steps if testing_.shape[0] % BATCH_SIZE == 0 else test_steps + 1
print(f'Testing: {testing_.shape}')
print(f'Testing Steps: {test_steps}')

<br>

### Early Stopping

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    verbose=1,
    patience=8,
    mode='min',
    restore_best_weights=True)


<br>

### Model Checkpoint

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath='/content/checkpoints/model_{epoch}.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    save_freq='epoch')

<br>

### Proceed

In [None]:
history = model.fit(x=training,
                    steps_per_epoch=steps_per_epoch,
                    epochs=epochs,
                    verbose='auto',
                    callbacks=[early_stopping, model_checkpoint],
                    validation_data=validating,
                    validation_steps=validation_steps,
                    validation_freq=1)

<br>
<br>

### Properties of the History Callback

#### Type

In [None]:
type(history)

<br>

#### Model Summary

In [None]:
history.model.summary()

<br>

#### Configurations per Attached Layers

In [None]:
for i in history.model.layers:
  print(i.get_config())

<br>

#### Loss Records

In [None]:
print('\n')
print(f"Keys: {history.history.keys()}\n")


# measures = np.array( [history.epoch, history.history['loss'], history.history['val_loss'] ] ).T
# measuresof = pd.DataFrame(measures, columns=['epoch', 'training_loss', 'validation_loss'])
# measuresof['epoch'] = measuresof.epoch.astype(int)

measuresof = pd.DataFrame(data={'epoch': np.array(history.epoch),
                                'training_loss': np.array(history.history['loss']),
                                'validation_loss': np.array(history.history['val_loss']) })
measuresof.info()


In [None]:
print(measuresof)

<br>

#### Predicting


`testing_p`:

*   https://colab.research.google.com/drive/1VUKfZMM0OvfgSiyEZch_-jI0gocDHpZ_#scrollTo=SkO7toZGqwYL



In [70]:
test_pred = history.model.predict(testing_p, steps=test_steps)
print(f'\n{type(test_pred)}')
print(f'\n{test_pred.shape}')
print(f'\n{test_pred}')


<class 'numpy.ndarray'>

(780, 8)

[[1.9048020e-01 1.5179668e-01 1.1169320e-01 ... 7.3379353e-02
  8.0241941e-02 1.2815093e-01]
 [1.3249104e-03 1.2126936e-01 1.7083633e-03 ... 5.3341538e-02
  7.9592496e-01 2.5324707e-04]
 [7.6133788e-02 1.6869329e-01 1.3538252e-01 ... 2.0644665e-01
  9.4475538e-02 9.8796114e-02]
 ...
 [7.6144934e-02 4.7196262e-02 1.8681829e-01 ... 1.2626271e-01
  2.9189961e-02 1.9672801e-01]
 [7.6144934e-02 4.7196262e-02 1.8681829e-01 ... 1.2626271e-01
  2.9189961e-02 1.9672801e-01]
 [5.4372195e-04 1.9317244e-01 9.9090554e-05 ... 1.4153143e-03
  7.8696585e-01 3.2624826e-06]]


In [71]:
test_input = testing_[['image_paths']]
print(test_input)

                                  image_paths
1476  images/ISIC_0014628_downsampled-090.png
3139              images/ISIC_0031201-270.png
3832              images/ISIC_0030555-270.png
1082              images/ISIC_0071069-180.png
3008              images/ISIC_0031329-270.png
...                                       ...
786               images/ISIC_0025471-180.png
2078              images/ISIC_0065828-270.png
1980              images/ISIC_0070103-000.png
2108              images/ISIC_0070980-180.png
2746              images/ISIC_0033031-180.png

[780 rows x 1 columns]


In [72]:
test_output = pd.DataFrame(test_pred, columns=labels)
print(test_output)

          MEL        NV       BCC            AK       BKL        DF      VASC  \
0    0.190480  0.151797  0.111693  5.747471e-02  0.206783  0.073379  0.080242   
1    0.001325  0.121269  0.001708  4.427134e-05  0.026133  0.053342  0.795925   
2    0.076134  0.168693  0.135383  7.177285e-02  0.148299  0.206447  0.094476   
3    0.268292  0.179805  0.073932  2.548975e-02  0.240177  0.039787  0.085343   
4    0.076145  0.047196  0.186818  2.156095e-01  0.122050  0.126263  0.029190   
..        ...       ...       ...           ...       ...       ...       ...   
775  0.067132  0.147632  0.135383  7.957631e-02  0.139315  0.258463  0.076522   
776  0.076145  0.047196  0.186818  2.156095e-01  0.122050  0.126263  0.029190   
777  0.076145  0.047196  0.186818  2.156095e-01  0.122050  0.126263  0.029190   
778  0.076145  0.047196  0.186818  2.156095e-01  0.122050  0.126263  0.029190   
779  0.000544  0.193172  0.000099  5.559305e-08  0.017800  0.001415  0.786966   

          SCC  
0    0.1281

In [73]:
pd.concat([test_input, test_output], axis=1, ignore_index=True)

Unnamed: 0,0,1,2,3,4,5,6,7,8
1476,images/ISIC_0014628_downsampled-090.png,,,,,,,,
3139,images/ISIC_0031201-270.png,,,,,,,,
3832,images/ISIC_0030555-270.png,,,,,,,,
1082,images/ISIC_0071069-180.png,,,,,,,,
3008,images/ISIC_0031329-270.png,,,,,,,,
...,...,...,...,...,...,...,...,...,...
774,,0.125163,0.090145,0.159662,1.260680e-01,0.161445,0.107124,0.054427,0.175966
775,,0.067132,0.147632,0.135383,7.957631e-02,0.139315,0.258463,0.076522,0.095976
776,,0.076145,0.047196,0.186818,2.156095e-01,0.122050,0.126263,0.029190,0.196728
777,,0.076145,0.047196,0.186818,2.156095e-01,0.122050,0.126263,0.029190,0.196728


In [74]:
test_input.join(test_output)

Unnamed: 0,image_paths,MEL,NV,BCC,AK,BKL,DF,VASC,SCC
1476,images/ISIC_0014628_downsampled-090.png,,,,,,,,
3139,images/ISIC_0031201-270.png,,,,,,,,
3832,images/ISIC_0030555-270.png,,,,,,,,
1082,images/ISIC_0071069-180.png,,,,,,,,
3008,images/ISIC_0031329-270.png,,,,,,,,
...,...,...,...,...,...,...,...,...,...
786,images/ISIC_0025471-180.png,,,,,,,,
2078,images/ISIC_0065828-270.png,,,,,,,,
1980,images/ISIC_0070103-000.png,,,,,,,,
2108,images/ISIC_0070980-180.png,,,,,,,,


<br>
<br>

### The Saved Models

In [75]:
!cd checkpoints && ls -l

total 1623832
-rw-r--r-- 1 root root 237541576 Sep 25 18:09 model_14.h5
-rw-r--r-- 1 root root 237541576 Sep 25 18:10 model_17.h5
-rw-r--r-- 1 root root 237541576 Sep 25 18:06 model_1.h5
-rw-r--r-- 1 root root 237541576 Sep 25 18:06 model_2.h5
-rw-r--r-- 1 root root 237541576 Sep 25 18:06 model_3.h5
-rw-r--r-- 1 root root 237541576 Sep 25 18:07 model_6.h5
-rw-r--r-- 1 root root 237541576 Sep 25 18:08 model_9.h5


<br>

An example

In [76]:
list_of_checkpoints = glob.glob('checkpoints/*.h5')
print(list_of_checkpoints)

['checkpoints/model_1.h5', 'checkpoints/model_2.h5', 'checkpoints/model_17.h5', 'checkpoints/model_9.h5', 'checkpoints/model_14.h5', 'checkpoints/model_6.h5', 'checkpoints/model_3.h5']


In [77]:
rng = np.random.default_rng(seed=5)
code = rng.integers(low=0, high=(len(list_of_checkpoints) - 1), size=1)[0]
print(code)

4


In [81]:
random_model = tf.keras.models.load_model(f'{list_of_checkpoints[code]}')
random_model_predictions = random_model.predict(testing_p, steps=test_steps)
print(random_model_predictions)

[[9.57333073e-02 1.03312969e-01 1.59602597e-01 ... 1.43591404e-01
  6.27234504e-02 1.56186104e-01]
 [6.68303482e-03 1.95415735e-01 6.68148370e-03 ... 1.29330084e-01
  6.10966027e-01 1.48870156e-03]
 [1.40112847e-01 1.24886684e-01 1.33774132e-01 ... 1.04847111e-01
  7.85754174e-02 1.52267545e-01]
 ...
 [7.18198344e-02 6.83472604e-02 1.81775182e-01 ... 1.40529633e-01
  4.12811637e-02 1.68315202e-01]
 [7.18198344e-02 6.83472604e-02 1.81775182e-01 ... 1.40529633e-01
  4.12811637e-02 1.68315202e-01]
 [7.93763844e-04 9.64659601e-02 5.40196052e-05 ... 1.41012669e-03
  8.88074815e-01 2.55195255e-06]]


<br>
<br>

#### Random & Best

In [82]:
(random_model_predictions != test_pred).any()

True

<br>
<br>

## Evaluations

In [83]:
testing_.head()

Unnamed: 0,image,age_approx,anatom_site_general,sex,angle,name,drawn,image_paths,MEL,NV,BCC,AK,BKL,DF,VASC,SCC
1476,ISIC_0014628_downsampled,75.0,unknown,male,90,ISIC_0014628_downsampled-090.png,1,images/ISIC_0014628_downsampled-090.png,0,0,0,0,1,0,0,0
3139,ISIC_0031201,45.0,unknown,male,270,ISIC_0031201-270.png,1,images/ISIC_0031201-270.png,0,0,0,0,0,0,1,0
3832,ISIC_0030555,70.0,lower extremity,male,270,ISIC_0030555-270.png,1,images/ISIC_0030555-270.png,0,0,0,0,0,1,0,0
1082,ISIC_0071069,85.0,head/neck,male,180,ISIC_0071069-180.png,1,images/ISIC_0071069-180.png,1,0,0,0,0,0,0,0
3008,ISIC_0031329,55.0,head/neck,female,270,ISIC_0031329-270.png,1,images/ISIC_0031329-270.png,0,0,0,0,1,0,0,0


In [84]:
testing_[labels].sum(axis=0)

MEL     97
NV      98
BCC     98
AK      97
BKL     98
DF      97
VASC    97
SCC     98
dtype: int64

In [85]:
testing_.shape

(780, 16)

<br>

### Test Data Predictions

* In vector form for histograms: `data.reshape(data.size, -1)`

<br>

A set of predictions, and the corresponding ground truth

In [86]:
predictions = test_pred
ground_truth = testing_[labels].values

<br>

Thresholds

In [87]:
thresholds = np.arange(0, 0.9, 0.05)

<br>

A function that resets predictions w.r.t. a threshold, and subsequently determines the class that has the highest plausibility value, per record.  The class that has the highest values is set to 1, others are set to zero.  Remember, the prediction values are in the range $[0 \quad 1]$.

In [88]:
def limits(threshold, data):
  data = np.where(data > threshold, data, 0)
  return ( (data == data.max(axis=1, keepdims=True)) & (data > 0) ).astype(int)

In [89]:
def normal(x):
  y = decimal.Decimal(x)
  return float(decimal.Decimal(y.quantize(decimal.Decimal('.01'), rounding=decimal.ROUND_HALF_UP)))

<br>

Confusion matrix functions

In [90]:
def true_positive(threshold, data, truth):
  prediction = limits(threshold, data)
  instances = ((truth == prediction) & (truth == 1)).astype(int)

  n_per_class = instances.sum(axis=0, keepdims=True).squeeze(axis=0).tolist()

  return [normal(threshold)] + n_per_class


In [91]:
def true_negative(threshold, data, truth):
  prediction = limits(threshold, data)
  instances = ((truth == prediction) & (truth == 0)).astype(int)

  n_per_class = instances.sum(axis=0, keepdims=True).squeeze(axis=0).tolist()

  return [normal(threshold)] + n_per_class


In [92]:
def false_positive(threshold, data, truth):
  prediction = limits(threshold, data)
  instances = ((prediction == 1) & (truth == 0)).astype(int)

  n_per_class = instances.sum(axis=0, keepdims=True).squeeze(axis=0).tolist()

  return [normal(threshold)] + n_per_class

In [93]:
def false_negative(threshold, data, truth):
  prediction = limits(threshold, data)
  instances = ((prediction == 0) & (truth == 1)).astype(int)

  n_per_class = instances.sum(axis=0, keepdims=True).squeeze(axis=0).tolist()

  return [normal(threshold)] + n_per_class

<br>



In [94]:
tn = [true_negative(threshold, predictions, ground_truth) for threshold in thresholds]
fn = [false_negative(threshold, predictions, ground_truth) for threshold in thresholds]
tp = [true_positive(threshold, predictions, ground_truth) for threshold in thresholds]
fp = [false_positive(threshold, predictions, ground_truth) for threshold in thresholds]

In [95]:
pd.DataFrame(np.array(tn), columns=['threshold'] + labels)

Unnamed: 0,threshold,MEL,NV,BCC,AK,BKL,DF,VASC,SCC
0,0.0,622.0,645.0,679.0,404.0,641.0,637.0,660.0,649.0
1,0.05,622.0,645.0,679.0,404.0,641.0,637.0,660.0,649.0
2,0.1,622.0,645.0,679.0,404.0,641.0,637.0,660.0,649.0
3,0.15,622.0,645.0,679.0,404.0,641.0,637.0,660.0,649.0
4,0.2,622.0,646.0,682.0,409.0,666.0,640.0,660.0,682.0
5,0.25,629.0,651.0,682.0,683.0,681.0,649.0,660.0,682.0
6,0.3,644.0,658.0,682.0,683.0,682.0,663.0,660.0,682.0
7,0.35,655.0,666.0,682.0,683.0,682.0,680.0,664.0,682.0
8,0.4,664.0,675.0,682.0,683.0,682.0,683.0,669.0,682.0
9,0.45,672.0,682.0,682.0,683.0,682.0,683.0,671.0,682.0


In [96]:
pd.DataFrame(np.array(tp), columns=['thresholds'] + labels)

Unnamed: 0,thresholds,MEL,NV,BCC,AK,BKL,DF,VASC,SCC
0,0.0,44.0,30.0,0.0,84.0,17.0,24.0,51.0,7.0
1,0.05,44.0,30.0,0.0,84.0,17.0,24.0,51.0,7.0
2,0.1,44.0,30.0,0.0,84.0,17.0,24.0,51.0,7.0
3,0.15,44.0,30.0,0.0,84.0,17.0,24.0,51.0,7.0
4,0.2,44.0,30.0,0.0,83.0,7.0,23.0,51.0,0.0
5,0.25,44.0,30.0,0.0,0.0,0.0,20.0,51.0,0.0
6,0.3,37.0,27.0,0.0,0.0,0.0,13.0,50.0,0.0
7,0.35,30.0,20.0,0.0,0.0,0.0,2.0,48.0,0.0
8,0.4,23.0,7.0,0.0,0.0,0.0,0.0,48.0,0.0
9,0.45,17.0,3.0,0.0,0.0,0.0,0.0,43.0,0.0


In [97]:
pd.DataFrame(np.array(fp), columns=['thresholds'] + labels)

Unnamed: 0,thresholds,MEL,NV,BCC,AK,BKL,DF,VASC,SCC
0,0.0,61.0,37.0,3.0,279.0,41.0,46.0,23.0,33.0
1,0.05,61.0,37.0,3.0,279.0,41.0,46.0,23.0,33.0
2,0.1,61.0,37.0,3.0,279.0,41.0,46.0,23.0,33.0
3,0.15,61.0,37.0,3.0,279.0,41.0,46.0,23.0,33.0
4,0.2,61.0,36.0,0.0,274.0,16.0,43.0,23.0,0.0
5,0.25,54.0,31.0,0.0,0.0,1.0,34.0,23.0,0.0
6,0.3,39.0,24.0,0.0,0.0,0.0,20.0,23.0,0.0
7,0.35,28.0,16.0,0.0,0.0,0.0,3.0,19.0,0.0
8,0.4,19.0,7.0,0.0,0.0,0.0,0.0,14.0,0.0
9,0.45,11.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0


In [98]:
pd.DataFrame(np.array(fn), columns=['thresholds'] + labels)

Unnamed: 0,thresholds,MEL,NV,BCC,AK,BKL,DF,VASC,SCC
0,0.0,53.0,68.0,98.0,13.0,81.0,73.0,46.0,91.0
1,0.05,53.0,68.0,98.0,13.0,81.0,73.0,46.0,91.0
2,0.1,53.0,68.0,98.0,13.0,81.0,73.0,46.0,91.0
3,0.15,53.0,68.0,98.0,13.0,81.0,73.0,46.0,91.0
4,0.2,53.0,68.0,98.0,14.0,91.0,74.0,46.0,98.0
5,0.25,53.0,68.0,98.0,97.0,98.0,77.0,46.0,98.0
6,0.3,60.0,71.0,98.0,97.0,98.0,84.0,47.0,98.0
7,0.35,67.0,78.0,98.0,97.0,98.0,95.0,49.0,98.0
8,0.4,74.0,91.0,98.0,97.0,98.0,97.0,49.0,98.0
9,0.45,80.0,95.0,98.0,97.0,98.0,97.0,54.0,98.0


<br>
<br>

## Appendix

In [99]:
import datetime

In [100]:
namestring = datetime.datetime.now()

In [101]:
namestring.strftime('%Y%m%d.%H%M%S')

'20230925.181625'