In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras import layers, models
import joblib

2023-12-11 14:46:40.663402: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-11 14:46:40.663454: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-11 14:46:40.697040: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-11 14:46:40.767289: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv('../Data/sample_labels.csv')
df = df[['Image Index', 'Finding Labels']]
df['Diseases'] = df['Finding Labels'].apply(lambda x: x.split('|'))
label_encoder = LabelEncoder()
df['Labels'] = label_encoder.fit_transform(df['Finding Labels'])
joblib.dump(label_encoder, 'label_encoder.joblib')


['label_encoder.joblib']

In [3]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [4]:
def load_and_preprocess_images(image_paths, labels):
    images = []
    for path, label in zip(image_paths, labels):
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224, 224))
        img = img / 255.0
        images.append(img)
    return np.array(images), labels


In [5]:
def construct_image_paths(image_folder, image_indices):
    return [os.path.join(image_folder, img) for img in image_indices]


In [6]:
image_folder = '../Data/images2'

train_image_paths = construct_image_paths(image_folder, train_df['Image Index'])
test_image_paths = construct_image_paths(image_folder, test_df['Image Index'])


In [7]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)


In [8]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_folder,
    x_col="Image Index",
    y_col="Labels",
    target_size=(224, 224),
    batch_size=10, 
    class_mode="raw",
    subset="training"
)

Found 3588 validated image filenames.


In [9]:
validation_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_folder,
    x_col="Image Index",
    y_col="Labels",
    target_size=(224, 224),
    batch_size=10,  
    class_mode="raw",
    subset="validation"
)

Found 896 validated image filenames.


In [11]:
base_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


In [12]:
for layer in base_model.layers:
    layer.trainable = False


In [13]:
model = models.Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(label_encoder.classes_), activation='softmax')
])


In [14]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [15]:
history = model.fit(
    train_generator,
    epochs=4,  
    validation_data=validation_generator
)

Epoch 1/4


2023-12-11 14:47:08.384146: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2023-12-11 14:47:09.702119: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f03e83034e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-11 14:47:09.702137: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4070, Compute Capability 8.9
2023-12-11 14:47:09.711235: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1702286229.794761   29880 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/4
Epoch 3/4
Epoch 4/4


In [18]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=image_folder,
    x_col="Image Index",
    y_col="Labels",
    target_size=(224, 224),
    batch_size=10,  
    class_mode="raw",
    shuffle=False 
)


predictions = model.predict(test_generator)


predicted_labels = np.argmax(predictions, axis=1)

# Compare predicted labels with true labels
accuracy = np.sum(predicted_labels == test_df['Labels'].values) / len(test_df)

print(f"Test Accuracy: {accuracy * 100:.2f}%")

Found 1122 validated image filenames.
Test Accuracy: 54.01%


In [17]:
predictions = model.predict(test_generator)
predicted_labels = np.argmax(predictions, axis=1)
count=0

for i in range(100):
    true_class = label_encoder.inverse_transform([test_df['Labels'].values[i]])[0]
    predicted_class = label_encoder.inverse_transform([predicted_labels[i]])[0]
    print(f"Example {i+1}: True Class: {true_class}, Predicted Class: {predicted_class}")
    if(true_class==predicted_class):
        count=count+1

print(count)

Example 1: True Class: No Finding, Predicted Class: No Finding
Example 2: True Class: No Finding, Predicted Class: No Finding
Example 3: True Class: Mass|Nodule, Predicted Class: No Finding
Example 4: True Class: No Finding, Predicted Class: No Finding
Example 5: True Class: Consolidation|Mass, Predicted Class: No Finding
Example 6: True Class: No Finding, Predicted Class: No Finding
Example 7: True Class: No Finding, Predicted Class: No Finding
Example 8: True Class: Fibrosis, Predicted Class: No Finding
Example 9: True Class: No Finding, Predicted Class: No Finding
Example 10: True Class: No Finding, Predicted Class: No Finding
Example 11: True Class: Infiltration, Predicted Class: No Finding
Example 12: True Class: No Finding, Predicted Class: No Finding
Example 13: True Class: No Finding, Predicted Class: No Finding
Example 14: True Class: Infiltration, Predicted Class: No Finding
Example 15: True Class: Atelectasis, Predicted Class: No Finding
Example 16: True Class: Effusion|Infi

In [4]:
from tensorflow.keras.models import save_model
model.save('aayush_xray_classif.h5')

NameError: name 'model' is not defined

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 256)               6422784   
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 244)               62708     
                                                                 
Total params: 21200180 (80.87 MB)
Trainable params: 6485492 (24.74 MB)
Non-trainable params: 14714688 (56.13 MB)
_________________________________________________________________


In [19]:
from keras.models import model_from_json

# Assuming your model is named 'model'
model_json = model.to_json()
with open("model_architecture.json", "w") as json_file:
    json_file.write(model_json)


model.save_weights("model_weights.h5")
