In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

def get_image_paths(directory):
    image_paths = []
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".png"):
                image_paths.append(os.path.join(subdir, file))
    return image_paths


dataset_directory = '/kaggle/input/data'  
image_paths = get_image_paths(dataset_directory)

csv_file_path = '/kaggle/input/data/Data_Entry_2017.csv'  
data_entry = pd.read_csv(csv_file_path)

image_name_to_path_dict = {os.path.basename(path): path for path in image_paths}
filtered_data_entry = data_entry[data_entry['Image Index'].isin(image_name_to_path_dict.keys())]
filtered_data_entry['Full Image Path'] = filtered_data_entry['Image Index'].apply(lambda x: image_name_to_path_dict[x])


mlb = MultiLabelBinarizer()
labels_list = filtered_data_entry['Finding Labels'].str.split('|').tolist()
encoded_labels = mlb.fit_transform(labels_list)

tf_dataset = pd.DataFrame({
    'Image Path': filtered_data_entry['Full Image Path'],
    'Label': list(encoded_labels)
})


train_df, validation_df = train_test_split(tf_dataset, test_size=0.2, random_state=42)


def parse_image(filename, label):
    image = tf.io.read_file(filename)
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.resize(image, [240, 240])
    image = image / 255.0
    label = tf.cast(label, dtype=tf.float32)
    return image, label


train_dataset = tf.data.Dataset.from_tensor_slices((train_df['Image Path'].values, np.vstack(train_df['Label'].values)))
train_dataset = train_dataset.map(parse_image).shuffle(buffer_size=1000).batch(32)

validation_dataset = tf.data.Dataset.from_tensor_slices((validation_df['Image Path'].values, np.vstack(validation_df['Label'].values)))
validation_dataset = validation_dataset.map(parse_image).batch(32)



In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

In [3]:
all_labels = data_entry['Finding Labels'].str.split('|').explode().unique()
num_classes = len(all_labels)

In [4]:
inputs= keras.Input(shape=(240, 240,1))


x = Conv2D(128, kernel_size=(3, 3), activation='relu',padding='SAME')(inputs)
x = Conv2D(128, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)
x = Conv2D(128, kernel_size=(3, 3), activation='relu',padding='SAME')(x)
x = Conv2D(128, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)

x = Conv2D(64, kernel_size=(3, 3), activation='relu',padding='SAME')(inputs)
x = Conv2D(64, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)
x = Conv2D(64, kernel_size=(3, 3), activation='relu',padding='SAME')(x)
x = Conv2D(64, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)


x = Conv2D(32, kernel_size=(3, 3), activation='relu',padding='SAME')(x)
x = Conv2D(32, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)
x = Conv2D(32, kernel_size=(3, 3), activation='relu',padding='SAME')(x)
x = Conv2D(32, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)



x = Conv2D(16, kernel_size=(3, 3), activation='relu',padding='SAME')(x)
x = Conv2D(16, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)
x = Conv2D(16, kernel_size=(3, 3), activation='relu',padding='SAME')(x)
x = Conv2D(16, kernel_size=(3, 3),padding='SAME')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.3)(x)


x = Flatten(name='flatten')(x)
x = Dense(units=300, activation='relu')(x)
outputs = Dense(units=num_classes, activation='sigmoid')(x)
model= keras.Model(inputs=inputs, outputs=outputs)

In [5]:
all_labels

array(['Cardiomegaly', 'Emphysema', 'Effusion', 'No Finding', 'Hernia',
       'Infiltration', 'Mass', 'Nodule', 'Atelectasis', 'Pneumothorax',
       'Pleural_Thickening', 'Pneumonia', 'Fibrosis', 'Edema',
       'Consolidation'], dtype=object)

In [6]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [7]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer


csv_file_path = '/kaggle/input/data/Data_Entry_2017.csv'
data_entry = pd.read_csv(csv_file_path)

labels = data_entry['Finding Labels'].str.split('|').tolist()

mlb = MultiLabelBinarizer()
mlb.fit(labels)

binary_labels = mlb.transform(labels)

class_counts = binary_labels.sum(axis=0)
n_samples = len(labels)

class_weights = (n_samples / class_counts)

min_weight = min(class_weights)
class_weights_normalized = class_weights / min_weight

class_weights = {i: weight for i, weight in enumerate(class_weights_normalized)}

class_weights


{0: 5.221991521757937,
 1: 21.743876080691642,
 2: 12.93357617313049,
 3: 26.209726443768997,
 4: 4.532627468649094,
 5: 23.99085850556439,
 6: 35.801304863582445,
 7: 265.90748898678413,
 8: 3.0341308937368052,
 9: 10.439467312348668,
 10: 1.0,
 11: 9.534196809350814,
 12: 17.831905465288035,
 13: 42.18099231306779,
 14: 11.384571859675594}

In [8]:
class_counts

array([11559,  2776,  4667,  2303, 13317,  2516,  1686,   227, 19894,
        5782, 60361,  6331,  3385,  1431,  5302])

In [9]:
model.compile(
  optimizer='adam',
  loss=tf.losses.BinaryCrossentropy(),
  metrics=['accuracy'])

In [10]:
def get_early_stopping():
    
    return tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
     
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                  patience=6, min_lr=0.000001)

In [11]:
from tensorflow import keras

In [12]:
checkpoint_filepath = "./checkpoint"
checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_filepath,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True,
)

In [13]:
early_stopping = get_early_stopping()

callbacks = [checkpoint_callback, early_stopping, reduce_lr]

In [None]:
history = model.fit(train_dataset, validation_data=validation_dataset, batch_size=32, epochs=30,
                    class_weight=class_weights, callbacks=callbacks)

Epoch 1/30


2023-12-27 19:03:26.605697: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel/dropout_2/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


