In [1]:
# !unzip /content/drive/MyDrive/Hira/CheXpert-v1.0-small.zip

In [2]:
import pandas as pd

def load_data(csv):
    df = pd.read_csv(csv)
    # Keep only Path and Pneumonia columns
    df = df[["Path", "Pneumonia"]]
    # Remove rows where Pneumonia is empty
    df = df[df["Pneumonia"].notna()]
    # Remove -1
    df = df[df['Pneumonia'] != -1]
    # df = df.replace(-1, 0)
    return df

# Load data from csv files
train_df = load_data("CheXpert-v1.0-small/train.csv").head(200)
valid_df = load_data("CheXpert-v1.0-small/valid.csv").head(100)

train_df['Pneumonia'].value_counts()

1.0    137
0.0     63
Name: Pneumonia, dtype: int64

In [3]:
import cv2
import numpy as np

def load_image(path):
    # Load image from path
    img = cv2.imread(path)
    # Convert image to RGB format
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # Resize image to 224 x 224
    img = cv2.resize(img, (224, 224))
    return img

def extract_data(df):
    data = []
    labels = []
    for index, row in df.iterrows():
        # Load image from path
        img = load_image(row["Path"])
        # Append image to data list
        data.append(img)
        # Append label to labels list
        labels.append(row["Pneumonia"])
    # Convert lists to numpy arrays
    data = np.array(data)
    labels = np.array(labels)
    return data, labels

# Extract data from train and valid dataframes
train_data, train_labels = extract_data(train_df)
valid_data, valid_labels = extract_data(valid_df)

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def augment_data(data, labels):
    # Calculate number of zeros and ones
    zeros = np.count_nonzero(labels == 0)
    ones = np.count_nonzero(labels == 1)
    # Calculate difference between zeros and ones
    diff = abs(zeros - ones)
    # Create ImageDataGenerator object for augmentation
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode="nearest"
    )
    if zeros > ones:
        # Augment ones to balance classes
        augment_indices = np.random.choice(np.where(labels == 1)[0], diff)
        for index in augment_indices:
            # Reshape image to 4D tensor for augmentation
            img = data[index].reshape((1, ) + data[index].shape)
            # Generate augmented images
            aug_imgs = datagen.flow(img, batch_size=1)
            # Append augmented images to data list
            data = np.append(data, aug_imgs[0], axis=0)
            # Append labels to labels list
            labels = np.append(labels, 1)
    elif ones > zeros:
        # Augment zeros to balance classes
        augment_indices = np.random.choice(np.where(labels == 0)[0], diff)
        for index in augment_indices:
            # Reshape image to 4D tensor for augmentation
            img = data[index].reshape((1, ) + data[index].shape)
            # Generate augmented images
            aug_imgs = datagen.flow(img, batch_size=1)
            # Append augmented images to data list
            data = np.append(data, aug_imgs[0], axis=0)
            # Append labels to labels list
            labels = np.append(labels, 0)
    return data, labels

# Augment train data and labels
train_data, train_labels = augment_data(train_data, train_labels)
valid_data, valid_labels = augment_data(valid_data, valid_labels)

In [5]:
def shuffle_data(data, labels):
    # Get shuffled indices
    indices = np.arange(len(data))
    np.random.shuffle(indices)
    # Shuffle data and labels using shuffled indices
    data = data[indices]
    labels = labels[indices]
    return data, labels

# Shuffle train data and labels
train_data, train_labels = shuffle_data(train_data, train_labels)
valid_data, valid_labels = shuffle_data(valid_data, valid_labels)

In [6]:
def check_balance(arr):
    num_zeros = sum(arr == 0)
    num_ones = sum(arr == 1)
    if num_zeros == num_ones:
        return True, num_zeros, num_ones
    else:
        return False, num_zeros, num_ones

# Check if training labels are balanced after augmentation
train_balanced = check_balance(train_labels)
print("Training data is balanced:", train_balanced)

# Check if validation labels are balanced after augmentation
valid_balanced = check_balance(valid_labels)
print("Validation data is balanced:", valid_balanced)

Training data is balanced: (True, 137, 137)
Validation data is balanced: (True, 96, 96)


In [7]:
# pip install tensorflow_addons

In [8]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Reshape
from tensorflow.keras.optimizers import Adam
from vit_keras import vit

# Define CNN 1 layers
cnn_input1 = Input(shape=(224, 224, 3))
cnn_layer1 = Conv2D(filters=32, kernel_size=3, activation='relu')(cnn_input1)
cnn_layer1 = MaxPooling2D(pool_size=2)(cnn_layer1)
cnn_layer1 = Conv2D(filters=64, kernel_size=3, activation='relu')(cnn_layer1)
cnn_layer1 = MaxPooling2D(pool_size=2)(cnn_layer1)
cnn_layer1 = Conv2D(filters=128, kernel_size=3, activation='relu')(cnn_layer1)
cnn_layer1 = MaxPooling2D(pool_size=2)(cnn_layer1)
cnn_layer1 = Flatten()(cnn_layer1)
cnn_output1 = Dense(units=64, activation='relu')(cnn_layer1)

# Define CNN 2 layers
cnn_input2 = Input(shape=(224, 224, 3))
cnn_layer2 = Conv2D(filters=32, kernel_size=3, activation='relu')(cnn_input2)
cnn_layer2 = MaxPooling2D(pool_size=2)(cnn_layer2)
cnn_layer2 = Conv2D(filters=64, kernel_size=3, activation='relu')(cnn_layer2)
cnn_layer2 = MaxPooling2D(pool_size=2)(cnn_layer2)
cnn_layer2 = Conv2D(filters=128, kernel_size=3, activation='relu')(cnn_layer2)
cnn_layer2 = MaxPooling2D(pool_size=2)(cnn_layer2)
cnn_layer2 = Flatten()(cnn_layer2)
cnn_output2 = Dense(units=64, activation='relu')(cnn_layer2)

# Define ViT layers
vit_input1 = Input(shape=(224, 224, 3))
vit_layer1 = vit.vit_b16(
    image_size=224,
    activation='sigmoid',
    pretrained=True,
    include_top=True
)(vit_input1)
vit_output1 = Reshape((1000,))(vit_layer1)

# Concatenate CNN and ViT layers
concat_layer = Concatenate()([cnn_output1, cnn_output2, vit_output1])
output = Dense(units=1, activation='sigmoid')(concat_layer)

# Define the model
model = tf.keras.Model(inputs=[cnn_input1, cnn_input2, vit_input1], outputs=output)



In [9]:
model.compile(loss="binary_crossentropy", optimizer=Adam(0.0001), metrics=["accuracy"])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 222, 222, 32  896         ['input_1[0][0]']                
                                )                                                             

In [10]:
history = model.fit([train_data, train_data, train_data], train_labels, epochs=10, batch_size=4, 
                    validation_data=([valid_data,valid_data, valid_data], valid_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10