In [5]:
# finetune googlenet in tf

import tensorflow as tf
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


In [6]:
# randomly distribute images from train and val to random500_0 to random500_19 so that each has 500 images

import os
import shutil
import random
import pandas as pd


from sklearn.model_selection import train_test_split
import os
import cv2

def load_isic_2019():
    """
    Load ISIC_2019 dataset and convert it to IIRC format

    Args:
        root (string): The location of the dataset
        intask_valid_train_ratio (float): the percentage of the training set to be taken for the in-task validation set
            , a training-like validation set used for valdation during the task training (default: 0.1)
        posttask_valid_train_ratio (float): the percentage of the training set to be taken for the post-task validation
            set, a test-like validation set used for valdation after the task training (default: 0.1)

    Returns:
        Dict[str, DatasetStructType]: datasets, a dictionary with the keys corresponding to the four splits (train,
        intask_validation, posttask_validation, test), and the values being a list of the samples that belong to
        each split (with the images provided in Image.Image type) in the DatasetTypeStruct structure
    """
    raw_data_meta_df = pd.read_csv('../ISIC_2019/ISIC_2019_Training_GroundTruth.csv')

    isic_data_map = {
        "MEL": "Melanoma",  
        "NV": "Melanocytic_nevus" ,
        "BCC": "Basal_cell_carcinoma",
        "AK": "Actinic_keratosis",
        "BKL": "Benign_keratosis",
        "DF": "Dermatofibroma",
        "VASC": "Vascular_lesion",
        "SCC": "Squamous_cell_carcinoma"
    }
    
    labels = list(raw_data_meta_df.columns[1:-1])
    class_to_idx = {isic_data_map[label]: idx for idx, label in enumerate(labels)}

    

    X = raw_data_meta_df.iloc[:]['image'] # only image names, not actual images
    y = raw_data_meta_df.iloc[:, 1:]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y)

    unknown_labels = []

    raw_data_train = []
    for ind  in range(len(X_train)):
        img_name = X_train.iloc[ind]
        labels = y_train.iloc[ind]
        label = labels[labels == 1].index[0]
            
        # image = cv2.imread(os.path.join("../ISIC_2019", "ISIC_2019_Training_Input", img_name+".jpg"), cv2.IMREAD_COLOR)
        # image = cv2.resize(image, (256, 256), cv2.INTER_AREA) # remove later
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
        if label == "UNK":
            unknown_labels.append(img_name)
        else:
            label = class_to_idx[isic_data_map[label]] 
            raw_data_train.append((img_name, label))


    raw_data_test = []
    for ind  in range(len(X_test)):
        img_name = X_test.iloc[ind]
        labels = y_test.iloc[ind]
        label = labels[labels == 1].index[0]
        if label == "UNK":
            continue
        # image = cv2.imread(os.path.join("../ISIC_2019", "ISIC_2019_Training_Input", img_name+".jpg"), cv2.IMREAD_COLOR)
        # image = cv2.resize(image, (256, 256), cv2.INTER_AREA) # remove later, inter area is for making it smaller, for making it larger use inter linear
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
        label = class_to_idx[isic_data_map[label]]
        raw_data_test.append((img_name, label))

    return raw_data_train, raw_data_test, unknown_labels

# def main():

#     # path to the directory that contains train and val

    


#     # source = os.path.join('..//../ISIC_2019', 'ISIC_2019_Training_Input')
#     # target = os.path.join('./', 'random_discovery' )
#     # files = os.listdir(source)
#     # for file in files[:500]:
#     #     print(file)
#     #     shutil.copy(os.path.join(source, file), target)

# main()
raw_data_train, raw_data_test, unknown_labels = load_isic_2019()


In [None]:
# load inceptionv3 model with imagenet weights
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

# replace fc layer with 8 classes
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
# x = Dropout(0.5)(x)
predictions = Dense(8, activation='softmax')(x)

# train 
model = Model(inputs=base_model.input, outputs=predictions)

# train the model
for layer in base_model.layers:
    layer.trainable = False

    
