In [1]:
import pandas as pd
import os
import tensorflow as tf

from glob import glob
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from tqdm import tqdm
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import layers
from tensorflow.keras import models

In [2]:
def get_image_information():
    df = pd.read_csv('../input/data/Data_Entry_2017.csv')
    df = df.drop(columns = ['Follow-up #', 'Patient ID', 'Patient Age', 'Patient Gender', 'View Position', 
                            'OriginalImage[Width', 'Height]', 'OriginalImagePixelSpacing[x', 'y]', 'Unnamed: 11'
    ])
    image_label_dict = {}
    for row in range(112120):
        image_name = df.iat[row, 0]
        disease_name = df.iat[row, 1]
        image_label_dict[image_name] = disease_name

    for key, value in image_label_dict.items():
        value = value.split('|')
        image_label_dict[key] = value
    
    return image_label_dict

In [3]:
def get_image_paths():
    all_image_paths = {}
    paths = glob(os.path.join('..', 'input','data','images*','images','*.png'))
    for path in paths:
        all_image_paths[os.path.basename(path)] = path
        
    return all_image_paths

In [4]:
def dict_to_df(img_labels):
    df = pd.DataFrame.from_dict(img_labels, orient ='index', columns=['Disease_1', 'Disease_2', 'Disease_3', 'Disease_4', 'Disease_5', 'Disease_6', 'Disease_7', 'Disease_8', 'Disease_9'])
    df.reset_index(inplace=True)
    df = df.rename(columns = {'index':'Image_Name'})
    return df

In [5]:
def path_to_tensor(img_path, shape):
    img = tf.keras.preprocessing.image.load_img(img_path, grayscale = False, color_mode = "rgb", target_size = shape)
    img = tf.keras.preprocessing.image.img_to_array(img) / 255
    return img

In [6]:
def list_of_tensors(img_paths, shape):
    list_of_tensors = [path_to_tensor(img_path, shape) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [7]:
image_labels = get_image_information()
image_paths = get_image_paths()
image_info_df  = dict_to_df(image_labels)

In [8]:
train_df, test_df = train_test_split(image_info_df, test_size = 0.20)
train_df, val_df = train_test_split(train_df, test_size = 0.2)
print(f'Training Subset = {train_df.shape[0]} images \nValidation Subset = {val_df.shape[0]} images \nTesting Subset = {test_df.shape[0]} images')

Training Subset = 71756 images 
Validation Subset = 17940 images 
Testing Subset = 22424 images


In [9]:
img_shape = (224, 224)

# train_tensors = []
# for key in image_paths:
#     train_tensors = list_of_tensors()
# train_tensors = list_of_tensors(train_df['Path'].to_list(), shape = img_shape)
# val_tensors = list_of_tensors(val_df['Path'].to_list(), shape = img_shape)
# test_tensors = list_of_tensors(test_df['Path'].to_list(), shape = img_shape)
# path_to_tensor(image_paths["00015852_001.png"], img_shape)

In [10]:
model = tf.keras.models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
model.add(layers.Flatten())

model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation = 'softmax'))

2022-10-01 18:45:24.060355: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [11]:
model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), metrics = ['accuracy'])

In [12]:
# results = model.fit(train_imgs, train_lbls, epochs = 30, validation_split = 0.1)

In [13]:
# base_model = VGG16(weights='imagenet', include_top = False, input_shape = train_tensors.shape[1:])

# add_model = Sequential()
# add_model.add(Flatten(input_shape = base_model.output_shape[1:]))
# add_model.add(Dropout(0.2))
# add_model.add(Dense(256, activation='relu'))
# add_model.add(Dropout(0.2))
# add_model.add(Dense(50, activation='relu'))
# add_model.add(Dropout(0.2))
# add_model.add(Dense(1, activation='sigmoid'))

# model = Model(inputs = base_model.input, outputs = add_model(base_model.output))

# model.summary()
# add_model.summary()