In [3]:
import sys
sys.version_info

sys.version_info(major=3, minor=11, micro=6, releaselevel='final', serial=0)

In [2]:
import torch
import torch.nn as nn
from torch.cuda.amp import autocast
from easydict import EasyDict as edict
import yaml
import pandas as pd
import os, argparse, sys
# from ISIC2020_Dataset import ISIC2020TorchDataset, NORM_CHANNEL_MEAN, NORM_CHANNEL_STD
from torch.utils.data import DataLoader
import numpy as np
import torchvision.transforms as T
from time import perf_counter
from sklearn.metrics import confusion_matrix, roc_auc_score
from torchsummary import summary

# from DarwinNet_659MF import DarwinNetV2, IntDarwinNetV2, ResHead

In [4]:
import os
from torchvision.datasets.folder import pil_loader
from torch.utils.data import Dataset

NORM_CHANNEL_MEAN = [0.806, 0.621, 0.592]
NORM_CHANNEL_STD = [0.151, 0.177, 0.203]

class ISIC2020TorchDataset(Dataset):
    def __init__(self, data_files, isic_image_dir, targets=None, transform=None):
        self.isic_image_dir = isic_image_dir
        self.data = data_files
        self.targets = targets
        self.transform = transform

    def __len__(self):
        print(len(self.data))
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image_path = os.path.join(self.isic_image_dir, self.data[idx])
        image = pil_loader(image_path)
        if self.transform is not None:
            image = self.transform(image)
            
        if self.targets is not None:
            target = self.targets[idx]
            return image, target
        else: 
            return image, self.data[idx]

In [6]:
"""

Module for building ResNet Module according to different config file
"""
from torch.nn import Module
import torch.nn as nn
import warnings
from collections import OrderedDict


def conv2d(w_in, w_out, k, *, stride=1, groups=1, dilation=1, bias=False):
    """Helper for building a conv2d layer."""
    assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues."
    s, p, g, b = stride, (k - 1) // 2, groups, bias
    return nn.Conv2d(w_in, w_out, k, stride=s, padding=p, groups=g, bias=b)


def norm2d(w_in):
    """Helper for building a norm2d layer."""
    return nn.BatchNorm2d(num_features=w_in, eps=1e-5, momentum=0.1)


def pool2d(_w_in, k, *, stride=1):
    """Helper for building a pool2d layer."""
    assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues."
    return nn.MaxPool2d(k, stride=stride, padding=(k - 1) // 2)


def pool2d_average(_w_in, k, *, stride=1):
    """Helper for building a pool2d layer."""
    assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues."
    return nn.AvgPool2d(k, stride=stride, padding=(k - 1) // 2)


def gap2d(_w_in):
    """Helper for building a gap2d layer."""
    return nn.AdaptiveAvgPool2d((1, 1))


def linear(w_in, w_out, *, bias=False):
    """Helper for building a linear layer."""
    return nn.Linear(w_in, w_out, bias=bias)


def activation():
    """Helper for building an activation layer."""
    return torch.nn.SiLU()


def get_transformation_function(func_name):
    "Returns the transformation function for ResNet Module"
    functions_available = {"basic": BasicTransform, "bottleneck": BottleneckTransform}
    try:
        func = functions_available[func_name]
        return func
    except:
        raise (ValueError("Function not available"))


class BasicTransform(Module):
    """Basic transformation: 3x3, BN, AF, 3x3, BN."""

    expansion: int = 1

    def __init__(
        self,
        inplanes,
        channels,
        stride=1,
        downsample=None,
        groups=1,
        base_width=64,
        dilation=1,
        attn=False,
    ):
        super(BasicTransform, self).__init__()
        if groups != 1 or base_width != 64:
            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")

        self.stride = stride
        self.inplanes = inplanes
        self.channels = channels

        if stride != 1:
            self.avgpool = nn.AvgPool2d(2, stride=stride)
            self.conv1 = conv2d(self.inplanes, self.channels, 3, groups=4)
        else:
            self.conv1 = conv2d(
                self.inplanes, self.channels, 3, stride=self.stride, groups=4
            )

        # self.conv1 = conv2d(self.inplanes, self.channels, 3, stride=self.stride)
        self.bn1 = norm2d(self.channels)
        self.relu = activation()
        self.conv2 = conv2d(self.channels, self.channels, 3, groups=4)
        self.bn2 = norm2d(self.channels)
        self.downsample = downsample
        self.attn = attn
        # self.softmax = nn.Softmax()

    def forward(self, x):
        identity = x

        if self.stride != 1:
            out = self.avgpool(x)
            out = self.conv1(out)
        else:
            out = self.conv1(x)
        # out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.attn:
            # print("Using Attn in Basic Block")
            if self.downsample is not None:
                identity = self.downsample(x)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    out = torch.nn.functional.softmax(out)
                out = torch.mul(out, identity)
            else:
                out += identity
        else:
            if self.downsample is not None:
                identity = self.downsample(x)
            out += identity

        out = self.relu(out)

        return out


class BottleneckTransform(Module):
    """Bottleneck transformation: 1x1, BN, AF, 3x3, BN, AF, 1x1, BN."""

    expansion: int = 2

    def __init__(
        self,
        inplanes,
        channels,
        stride=1,
        downsample=None,
        groups=1,
        base_width=64,
        dilation=1,
        attn=False,
    ):
        super(BottleneckTransform, self).__init__()
        width = int(channels * (base_width / 64.0)) * groups
        self.stride = stride
        self.conv1 = conv2d(inplanes, width, 1, groups=1)
        self.bn1 = norm2d(width)

        if stride != 1:
            self.avgpool = nn.AvgPool2d(2, stride=stride)
            self.conv2 = conv2d(width, width, 3, stride=1, groups=4, dilation=dilation)
        else:
            self.conv2 = conv2d(
                width, width, 3, stride=stride, groups=4, dilation=dilation
            )

        # self.conv2 = conv2d(width, width, 3, stride=stride, groups=groups, dilation=dilation)
        self.bn2 = norm2d(width)
        self.conv3 = conv2d(width, channels * self.expansion, 1, groups=1)
        self.bn3 = norm2d(channels * self.expansion)
        self.relu = activation()
        self.downsample = downsample
        self.stride = stride
        self.width = width
        self.inplanes = inplanes
        self.channels = channels
        self.groups = groups
        self.dilation = dilation
        self.downsample = downsample
        self.attn = attn

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        if self.stride != 1:
            out = self.avgpool(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.attn:
            if self.downsample is not None:
                identity = self.downsample(x)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    out = torch.nn.functional.softmax(out)
                out = torch.mul(out, identity)
            else:
                out += identity
        else:
            if self.downsample is not None:
                identity = self.downsample(x)
            out += identity

        out = self.relu(out)

        return out


class ResHead(Module):
    """ResNet head: AvgPool, 1x1."""

    def __init__(self, w_in, num_classes):
        super(ResHead, self).__init__()
        self.w_in = w_in
        self.num_classes = num_classes
        self.avg_pool = gap2d(w_in)
        self.fc = linear(w_in, num_classes, bias=True)

    def forward(self, x):
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        return x
        # x = self.fc(x)
        # return x


class DarwinStemIN(Module):
    """ResNet stem for ImageNet: 7x7, BN, AF, MaxPool."""

    def __init__(self, w_in, w_out):
        super(DarwinStemIN, self).__init__()
        self.w_in = w_in
        self.w_out = w_out

        self.relu = activation()

        self.conv1 = conv2d(w_in, w_out, 3, stride=2)
        self.bn1 = norm2d(w_out)
        self.conv2 = conv2d(w_out, w_out, 3, stride=1, groups=4)
        self.bn2 = norm2d(w_out)

        self.pool = pool2d_average(w_out, 3, stride=2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.pool(x)

        return x


class DarwinNetV2(Module):
    """DarwinNetV2 model."""

    def __init__(self, blockspecs, input_shape, num_classes, in_chans = 3, model_structure=[[None]]):
        super(DarwinNetV2, self).__init__()
        self.blockspecs = blockspecs
        
        self.channels = [max(4, b[0] - b[0] % 4) for b in self.blockspecs]
        self.model_depth = [b[1] for b in self.blockspecs]
        self.attn = [b[6] for b in self.blockspecs]
        
        
        self.replace_stride_with_dilation = [False for i in range(len(self.channels))]
        self.module_types = ["basic", "basic", "bottleneck", "bottleneck"]

        self.num_channels = len(self.channels)
        self.blocks = []
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.dilation = 1
        self.base_width = 64
        self.groups = 1
        assert self.num_channels == len(self.model_depth)
        self._construct_imagenet()

    def _construct_imagenet(self):
        depth = self.model_depth
        self.inplanes = 32
        self.stem = DarwinStemIN(self.input_shape[2], self.inplanes)

        for i in range(len(self.channels)):
            if i == 0:
                self.blocks.append(
                    self._make_layer(
                        get_transformation_function(self.module_types[i]),
                        self.channels[i],
                        stride=1,
                        depth=depth[i],
                        attn=self.attn[i],
                    )
                )
            else:
                self.blocks.append(
                    self._make_layer(
                        get_transformation_function(self.module_types[i]),
                        self.channels[i],
                        stride=2,
                        depth=depth[i],
                        dilate=self.replace_stride_with_dilation[i - 1],
                        attn=self.attn[i],
                    )
                )
        self.blocks = nn.ModuleList(self.blocks)
        self.head = ResHead(
            self.channels[self.num_channels - 1]
            * get_transformation_function(
                self.module_types[self.num_channels - 1]
            ).expansion,
            self.num_classes,
        )
        # model = nn.Sequential(*[self.stem, self.blocks, self.head])
        # return model

    def _make_layer(self, module_type, channels, stride, depth, dilate=False, attn=False):
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1

        if stride != 1:
            downsample = nn.Sequential(
                nn.AvgPool2d(2, stride=stride),
                conv2d(
                    self.inplanes,
                    channels * module_type.expansion,
                    1,
                    stride=1,
                    groups=1,
                ),
                norm2d(channels * module_type.expansion),
            )
        elif self.inplanes != channels * module_type.expansion:
            downsample = nn.Sequential(
                conv2d(
                    self.inplanes,
                    channels * module_type.expansion,
                    1,
                    stride=stride,
                    groups=1,
                ),
                norm2d(channels * module_type.expansion),
            )

        layers = []
        layers.append(
            module_type(
                self.inplanes,
                channels,
                stride,
                downsample,
                self.groups,
                self.base_width,
                previous_dilation,
                attn=attn,
            )
        )
        self.inplanes = channels * module_type.expansion
        for _ in range(1, depth):
            layers.append(
                module_type(
                    self.inplanes,
                    channels,
                    groups=self.groups,
                    base_width=self.base_width,
                    dilation=self.dilation,
                    attn=attn,
                )
            )

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.stem(x)
        for i in range(self.num_channels):
            x = self.blocks[i](x)
        x = self.head(x)
        return x
    
class IntDarwinNetV2(DarwinNetV2):
    def __init__(self, output_layer, *args):
        self.output_layer = output_layer
        super().__init__(*args)

        self._layers = []
        for l in list(self._modules.keys()):
            self._layers.append(l)
            if l == output_layer:
                break
        self.layers = OrderedDict(zip(self._layers,[getattr(self,l) for l in self._layers]))

    def _forward_impl(self, x):
        for l in self._layers:
            x = self.layers[l](x)

        return x

    def forward(self, x):
        return self._forward_impl(x)


In [7]:
def args_parser():
    parser = argparse.ArgumentParser(description='CancerNet-SCa x AttendNeXt Inferece')
    parser.add_argument('--weightspath', default='final-model.pth', type=str, help='Path to trained model weights')
    parser.add_argument('-mc', '--model-config', default='DarwinNet_XA_325MF.yml', help='Path to model config file')
    parser.add_argument('-dd', '--data-dir', type=str, default='ISIC-images/', help='Sample image directory')
    parser.add_argument('-tf', '--split-file', type=str, default='metadata.csv', help='CSV file with actual results for image directory')
    parser.add_argument('-sz', '--size', type=int, default=224, help='Square image size')
    parser.add_argument('-mp', '--mixed-precision', action='store_true', help='Flag to enable mixed-precision')
    parser.add_argument('-df', '--data-file-name', type=str, default='sample_data_predictions.csv', help='Name of csv file for predictions')
    parser.add_argument('-gi', '--gpu-id', type=int, default=0, help='GPU device ID')
    parser.add_argument("-f", required=False)

    args = parser.parse_args()
    return args

args = args_parser()
# start_time = perf_counter()

print("Starting script")

num_classes = 1

HYPERPARAMETERS = yaml.load(open(args.model_config, "r"), Loader=yaml.FullLoader)
config = edict(HYPERPARAMETERS)
device = 'cuda:{}'.format(args.gpu_id) if args.gpu_id >= 0 else 'cpu'
model = DarwinNetV2(config.model_cfg, config.input_shape, num_classes)
state_dict = torch.load(args.weightspath, map_location=torch.device('cpu'))
model.load_state_dict(state_dict)
model = model.to(torch.device('cpu'))
# summary(model,input_size=(3, 224, 224))


test_tform = T.Compose([
    T.Resize((args.size, args.size)),
    T.ToTensor(),
    T.Normalize(NORM_CHANNEL_MEAN, NORM_CHANNEL_STD)
])


data_df = pd.read_csv(args.split_file)
data_files = data_df['isic_id'].to_numpy()
actual_vals = data_df['benign_malignant'].to_numpy()
actual_diag = data_df['diagnosis'].to_numpy()
isic_dataset = ISIC2020TorchDataset(
    data_files, args.data_dir, transform=test_tform
)
test_loader = DataLoader(isic_dataset, batch_size=256, shuffle=False, num_workers=0)

# all_predictions = []
all_image_names = []
all_embeddings = []
model.eval()
start_time = perf_counter()
with torch.no_grad():
    for data, image_names in test_loader:
        data = data.to(torch.device('cpu'))
        with autocast(enabled=args.mixed_precision):
            embeddings = model((data))
            print(embeddings.detach().cpu().numpy().shape)
            # logits = torch.squeeze(model(data))
            # predictions = torch.sigmoid(logits)
            # save embeddings here
        all_embeddings.append(embeddings.detach().cpu().numpy())
        for image_name in image_names:
            all_image_names.append(image_name)
end_time = perf_counter()
actual_vals = np.array(actual_vals)
diagnosis = np.array(actual_diag)
elapsed_time = end_time - start_time
print(f"Elapsed time: {elapsed_time}")
print(np.asarray(all_embeddings).shape)
print(all_image_names)

Starting script
58
(58, 744)
Elapsed time: 13.391657224972732
(1, 58, 744)
['ISIC_1091339.JPG', 'ISIC_3069438.JPG', 'ISIC_3149813.JPG', 'ISIC_8803373.JPG', 'ISIC_9078177.JPG', 'ISIC_0015719.JPG', 'ISIC_0052212.JPG', 'ISIC_0247330.JPG', 'ISIC_0457385.JPG', 'ISIC_0647125.JPG', 'ISIC_0721601.JPG', 'ISIC_1000616.JPG', 'ISIC_1137455.JPG', 'ISIC_1444924.JPG', 'ISIC_1831970.JPG', 'ISIC_1891091.JPG', 'ISIC_1903848.JPG', 'ISIC_2075121.JPG', 'ISIC_2355513.JPG', 'ISIC_2696751.JPG', 'ISIC_2728181.JPG', 'ISIC_2803072.JPG', 'ISIC_3065175.JPG', 'ISIC_3187493.JPG', 'ISIC_3230575.JPG', 'ISIC_3305120.JPG', 'ISIC_3424187.JPG', 'ISIC_3437472.JPG', 'ISIC_3582787.JPG', 'ISIC_3678711.JPG', 'ISIC_3733653.JPG', 'ISIC_4207079.JPG', 'ISIC_4875503.JPG', 'ISIC_4970724.JPG', 'ISIC_5158910.JPG', 'ISIC_6403739.JPG', 'ISIC_6950870.JPG', 'ISIC_7059484.JPG', 'ISIC_7084553.JPG', 'ISIC_7246266.JPG', 'ISIC_7594243.JPG', 'ISIC_7703479.JPG', 'ISIC_7886280.JPG', 'ISIC_8114263.JPG', 'ISIC_8495716.JPG', 'ISIC_8671159.JPG', 'ISI

In [33]:
from PIL import Image
import matplotlib.pyplot as plt
import math

def most_common(lst):
    return max(set(lst), key=lst.count)
#     counter = 0
#     my_max = ""
#     max_index = 0

#     if len(lis) == 1:
#         my_max = ""
#         max_index = 0
#     else:
#         for i in range(1,len(lis)):
#             if lis[i] == lis[i-1]:
#                 counter += 1
#             else:
#                 counter = 1
#             if counter > my_max:
#                 my_max = counter
#                 max_index = i

def plot_similar_images(indices_list, input_ind):
    correct_vals = 0
    input_name = all_image_names[input_ind]
    img_path = os.path.join(args.data_dir + input_name)
    print("Input:")
    print(img_path)
    acc = actual_vals[input_ind]
    print(f"Benign/Malignant: {actual_vals[input_ind]}")
    img = Image.open(img_path).convert("RGB")
#     plt.imshow(img)
#     plt.show()
    print("Results")
    diagnoses = []
    for index in indices_list:
        img_name = all_image_names[index]
        img_path = os.path.join(args.data_dir + img_name)
        print(img_path)
        print(f"Benign/Malignant: {actual_vals[index]}")
        prediction = actual_vals[index]
#         print(f"Diagnosis: {prediction}")
        diagnoses.append(prediction)
        img = Image.open(img_path).convert("RGB")
#         plt.imshow(img)
#         plt.show()
        
    common_diag = most_common(diagnoses)  
    print(common_diag)
    if common_diag == acc:
        correct_vals += 1
    print(correct_vals)
    return correct_vals

In [37]:
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

def find_nearest_k(k, input_ind, all_embeddings):
    knn = NearestNeighbors(n_neighbors=k, metric="cosine")
    all_embeddings = np.asarray(all_embeddings)
    all_embeddings = all_embeddings.reshape((all_embeddings.shape[1], -1))
    embeddings = np.delete(all_embeddings, (input_ind), axis=0)
    knn.fit(embeddings)
    _, indices = knn.kneighbors(all_embeddings[input_ind].reshape((1, -1)))
    indices_list = indices.tolist()[0]
#     print(indices_list)
    for i in range(len(indices_list)):
        if indices_list[i] >= input_ind:
            indices_list[i] = indices_list[i] + 1
        
#     print(indices_list)
    return indices_list, all_embeddings
# start_time = perf_counter()
# indices_list, _= find_nearest_k(5, 2, all_embeddings)
# end_time = perf_counter()

# elapsed_time = end_time - start_time
# print(elapsed_time * 1000)
corr = 0
for i in range(58):
    indices_list, _= find_nearest_k(7, i, all_embeddings)
    corr += plot_similar_images(indices_list, input_ind)
print(corr)

Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_1831970.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_1000616.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_4970724.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_3149813.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_4875503.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_3733653.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_0647125.JPG
Benign/Malignant: malignant
malignant
1
Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_7703479.JPG
Benign/Malignant: benign
ISIC-images/ISIC_1241693.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_4875503.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_4970724.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_4489445.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_0721601.JPG
Benign/Malignant: benign
ISIC-images/ISIC_3437472.JPG
Benign/Malignant: benign
malignant
1
Input:
ISIC-images/ISIC_7594243.JPG
Benign/

ISIC-images/ISIC_4875503.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_9458769.JPG
Benign/Malignant: benign
ISIC-images/ISIC_0721601.JPG
Benign/Malignant: benign
benign
0
Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_9301333.JPG
Benign/Malignant: benign
ISIC-images/ISIC_0052212.JPG
Benign/Malignant: benign
ISIC-images/ISIC_2803072.JPG
Benign/Malignant: benign
ISIC-images/ISIC_6403739.JPG
Benign/Malignant: benign
ISIC-images/ISIC_8803373.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_7886280.JPG
Benign/Malignant: benign
ISIC-images/ISIC_3424187.JPG
Benign/Malignant: malignant
benign
0
Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_0247330.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_6950870.JPG
Benign/Malignant: benign
ISIC-images/ISIC_3149813.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_3733653.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_3187493.JPG
Benign/Malignant: benign
ISIC-image

ISIC-images/ISIC_9301333.JPG
Benign/Malignant: benign
ISIC-images/ISIC_7059484.JPG
Benign/Malignant: benign
ISIC-images/ISIC_8803373.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_2728181.JPG
Benign/Malignant: benign
benign
0
Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_2696751.JPG
Benign/Malignant: benign
ISIC-images/ISIC_3149813.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_0647125.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_1831970.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_3733653.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_4489445.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_1241693.JPG
Benign/Malignant: malignant
malignant
1
Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_2728181.JPG
Benign/Malignant: benign
ISIC-images/ISIC_4207079.JPG
Benign/Malignant: benign
ISIC-images/ISIC_6403739.JPG
Benign/Malignant: benign
ISIC-images/ISIC_0052212.JPG
Benign/Malignant: benign
ISIC

ISIC-images/ISIC_5158910.JPG
Benign/Malignant: benign
ISIC-images/ISIC_3230575.JPG
Benign/Malignant: benign
ISIC-images/ISIC_8811120.JPG
Benign/Malignant: benign
ISIC-images/ISIC_7886280.JPG
Benign/Malignant: benign
ISIC-images/ISIC_9458769.JPG
Benign/Malignant: benign
ISIC-images/ISIC_0015719.JPG
Benign/Malignant: benign
benign
0
Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_3187493.JPG
Benign/Malignant: benign
ISIC-images/ISIC_2075121.JPG
Benign/Malignant: benign
ISIC-images/ISIC_3305120.JPG
Benign/Malignant: benign
ISIC-images/ISIC_3678711.JPG
Benign/Malignant: benign
ISIC-images/ISIC_1444924.JPG
Benign/Malignant: benign
ISIC-images/ISIC_7084553.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_2696751.JPG
Benign/Malignant: benign
benign
0
Input:
ISIC-images/ISIC_7594243.JPG
Benign/Malignant: malignant
Results
ISIC-images/ISIC_3069438.JPG
Benign/Malignant: malignant
ISIC-images/ISIC_7703479.JPG
Benign/Malignant: benign
ISIC-images/ISIC_49707

#### pca = PCA(n_components=10).fit_transform(embeddings)
kmeans = KMeans(init='k-means++', n_clusters=5, n_init=4)
kmeans.fit(pca)

output = kmeans.predict(pca)
x = embeddings

plt.figure(figsize=(6,6))
plt.scatter(x[output==0,0], x[output==0,1], label='category 0')
plt.scatter(x[output==1,0], x[output==1,1], label='category 1')
plt.scatter(x[output==2,0], x[output==2,1], label='category 2')
plt.scatter(x[output==3,0], x[output==3,1], label='category 3')
plt.scatter(x[output==4,0], x[output==4,1], label='category 4')
plt.legend()
plt.title("k-means clustering $k=5$")
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.show()