In [1]:
import os
import numpy as np
import tensorflow as tf
import random
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense,
                                     Embedding, Bidirectional, LSTM, Attention, Flatten,
                                     Input, Concatenate)
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split


In [3]:
# Define dataset paths
DATASET_PATH = "/kaggle/input/chest-xray-pneumonia/chest_xray"
IMG_SIZE = 224

# Function to load images and labels
def load_images_from_folder(folder):
    images = []
    labels = []
    file_paths = []
    
    for category in ["NORMAL", "PNEUMONIA"]:
        path = os.path.join(folder, category)
        label = 0 if category == "NORMAL" else 1  # Binary labels: 0 for normal, 1 for pneumonia
        
        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            img = load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE), color_mode="grayscale")
            img = img_to_array(img) / 255.0  # Normalize pixels
            images.append(img)
            labels.append(label)
            file_paths.append(img_path)
    
    return np.array(images), np.array(labels), file_paths

# Load train, validation, and test sets
X_train_img, y_train, train_paths = load_images_from_folder(os.path.join(DATASET_PATH, "train"))
X_val_img, y_val, val_paths = load_images_from_folder(os.path.join(DATASET_PATH, "val"))
X_test_img, y_test, test_paths = load_images_from_folder(os.path.join(DATASET_PATH, "test"))


In [4]:
# Function to generate synthetic text reports based on labels
def generate_synthetic_report(label):
    normal_reports = [
        "The lungs appear clear with no significant abnormalities.",
        "No evidence of infection, tumor, or pleural effusion.",
        "Lung fields are normal with no signs of consolidation."
    ]
    
    pneumonia_reports = [
        "Chest X-ray shows increased opacity in the right lung suggestive of pneumonia.",
        "Findings consistent with bacterial pneumonia, with patchy infiltrates.",
        "Diffuse consolidation and bronchial thickening noted, indicating infection."
    ]
    
    return random.choice(normal_reports if label == 0 else pneumonia_reports)

# Create synthetic text dataset
train_reports = [generate_synthetic_report(label) for label in y_train]
val_reports = [generate_synthetic_report(label) for label in y_val]
test_reports = [generate_synthetic_report(label) for label in y_test]


In [5]:
MAX_VOCAB = 5000
MAX_LENGTH = 50  # Shorter length since reports are small

# Tokenizer setup
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=MAX_VOCAB, oov_token="<OOV>")
tokenizer.fit_on_texts(train_reports)

# Convert text to sequences & pad
X_train_txt = pad_sequences(tokenizer.texts_to_sequences(train_reports), maxlen=MAX_LENGTH)
X_val_txt = pad_sequences(tokenizer.texts_to_sequences(val_reports), maxlen=MAX_LENGTH)
X_test_txt = pad_sequences(tokenizer.texts_to_sequences(test_reports), maxlen=MAX_LENGTH)


In [6]:
# Image Model (CNN)
image_input = Input(shape=(IMG_SIZE, IMG_SIZE, 1))
x = Conv2D(32, (3, 3), activation="relu", padding="same")(image_input)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation="relu", padding="same")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
image_output = Model(image_input, x)

# Text Model (BiLSTM + Attention)
text_input = Input(shape=(MAX_LENGTH,))
embedding_layer = Embedding(input_dim=MAX_VOCAB, output_dim=128, input_length=MAX_LENGTH)(text_input)
lstm_out = Bidirectional(LSTM(64, return_sequences=True))(embedding_layer)

# Custom Attention Layer
attention_layer = Attention()([lstm_out, lstm_out])
flat_layer = Flatten()(attention_layer)
text_output = Dense(128, activation="relu")(flat_layer)
text_model = Model(text_input, text_output)

# Fusion Layer
combined = Concatenate()([image_output.output, text_model.output])
fusion_output = Dense(64, activation="relu")(combined)
final_output = Dense(1, activation="sigmoid")(fusion_output)

# Compile Model
model = Model(inputs=[image_input, text_input], outputs=final_output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss="binary_crossentropy",
              metrics=["accuracy"])




In [7]:
history = model.fit(
    [X_train_img, X_train_txt], y_train,
    validation_data=([X_val_img, X_val_txt], y_val),
    batch_size=32,
    epochs=10
)


Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 49ms/step - accuracy: 0.8922 - loss: 0.2439 - val_accuracy: 1.0000 - val_loss: 9.9889e-20
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 45ms/step - accuracy: 1.0000 - loss: 4.7276e-20 - val_accuracy: 1.0000 - val_loss: 9.9887e-20
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 45ms/step - accuracy: 1.0000 - loss: 4.8321e-20 - val_accuracy: 1.0000 - val_loss: 9.9887e-20
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 46ms/step - accuracy: 1.0000 - loss: 4.7272e-20 - val_accuracy: 1.0000 - val_loss: 9.9887e-20
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 46ms/step - accuracy: 1.0000 - loss: 4.8805e-20 - val_accuracy: 1.0000 - val_loss: 9.9887e-20
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 46ms/step - accuracy: 1.0000 - loss: 4.8934e-20 - val_accuracy: 1.0000 

In [8]:
# Evaluate the model
test_loss, test_acc = model.evaluate([X_test_img, X_test_txt], y_test)
print(f"Test Accuracy: {test_acc:.4f}")


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 1.0000 - loss: 1.3457e-19
Test Accuracy: 1.0000


In [9]:
def predict_diagnosis(image_path, text_report):
    # Preprocess image
    img = load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE), color_mode="grayscale")
    img = img_to_array(img) / 255.0
    img = np.expand_dims(img, axis=0)

    # Preprocess text
    text_seq = pad_sequences(tokenizer.texts_to_sequences([text_report]), maxlen=MAX_LENGTH)

    # Predict
    prediction = model.predict([img, text_seq])[0][0]
    return "Pneumonia" if prediction > 0.5 else "Normal"

# Test with new data
test_image_path = test_paths[0]  # Example test image
test_text_report = generate_synthetic_report(1)  # Generate a pneumonia report
print("Diagnosis:", predict_diagnosis(test_image_path, test_text_report))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300ms/step
Diagnosis: Pneumonia
