In [None]:
# Step 1: Import necessary libraries
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input
from tensorflow.keras.applications import VGG16
import matplotlib.pyplot as plt

# step2: Set source directories for each organ
source_dirs = {
    'brain': '/kaggle/input/brain-tumor-mri-dataset',
    'breast': '/kaggle/input/breast-cancer-patients-mris',
    'liver': '/kaggle/input/liver-dataset',
    'lung': '/kaggle/input/cardiomegaly-disease-prediction-using-cnn'
}

#step3:create Base directory to store organized dataset
base_dir = '/kaggle/working/organized_dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

#step4: Ensure destination directories exist
for organ in source_dirs.keys():
    os.makedirs(os.path.join(train_dir, organ), exist_ok=True)
    os.makedirs(os.path.join(val_dir, organ), exist_ok=True)
    os.makedirs(os.path.join(test_dir, organ), exist_ok=True)

#step5: Define Supported image extensions
supported_extensions = ('.jpg', '.jpeg', '.png')

#step6: Dfine Helper function to retrieve all image paths in nested directories
def get_image_paths(root_dir):
    image_paths = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.lower().endswith(supported_extensions):
                image_paths.append(os.path.join(root, file))
    return image_paths

#step7: Split and copy files to organized directories
for organ, paths in source_dirs.items():
    if os.path.exists(paths):
        images = get_image_paths(paths)
    else:
        print(f"Path does not exist: {paths}")
        continue

    print(f"Found {len(images)} images for organ '{organ}'.")

    if len(images) == 0:
        print(f"No images found for organ: {organ}")
        continue

    # Split images into 80% train, 10% validation, 10% test
    train_imgs, temp_imgs = train_test_split(images, test_size=0.2, random_state=42)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)

    # Copy images to train, validation, and test directories
    for img_path in train_imgs:
        shutil.copy(img_path, os.path.join(train_dir, organ, os.path.basename(img_path)))

    for img_path in val_imgs:
        shutil.copy(img_path, os.path.join(val_dir, organ, os.path.basename(img_path)))

    for img_path in test_imgs:
        shutil.copy(img_path, os.path.join(test_dir, organ, os.path.basename(img_path)))

print("Dataset organized successfully.")

# Step 8: Define model parameters
batch_size = 32
epochs = 20  # Increased to improve training

# Step 9: Initialize ImageDataGenerator with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,  # Small rotation
    width_shift_range=0.1,  # Slight horizontal shift
    height_shift_range=0.1, # Slight vertical shift
    zoom_range=0.1,         # Slight zoom
    validation_split=0.1    # Keep this for validation split
)
test_datagen = ImageDataGenerator(rescale=1./255)

# Step 10: Load training and validation data
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='grayscale',  
    subset='training',
    shuffle=True
)

validation_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='grayscale',  
    subset='validation',
    shuffle=False
)

# Load testing data
test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode='categorical',
    color_mode='grayscale',  
    shuffle=False
)

# Step 11: Build the model using Transfer Learning (VGG16)
base_model = VGG16(input_shape=(128, 128, 3), include_top=False, weights='/kaggle/input/vgg16-dataset/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')  # Update this path
base_model.trainable = False  # Freeze VGG16 layers

model = Sequential([
    Input(shape=(128, 128, 1)),  # Keep the input shape for grayscale
    Conv2D(3, (1, 1), activation='relu'),  # Adapt grayscale to RGB channels
    base_model,
    Flatten(),
    Dense(128, activation='relu'),          # Increased dense layer size
    Dense(len(source_dirs), activation='softmax')
])


# Step 12: Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Step 13: Calculate steps per epoch
steps_per_epoch = train_data.samples // batch_size
validation_steps = validation_data.samples // batch_size
test_steps = test_data.samples // batch_size

# Step 14: Train the model
history = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=epochs,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

# Step 15: Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_data, steps=test_steps)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Step 16: Plot training & validation accuracy and loss
plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.grid()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper left')
plt.grid()

plt.show()


In [23]:
# Save the trained model with a custom name
model_path = '/kaggle/working/team12_model.h5'  
model.save(model_path)

print(f"Model saved successfully at: {model_path}")


Model saved successfully at: /kaggle/working/team12_model.h5


In [26]:
model.save('/kaggle/working/team12_model.h5')

In [21]:
##testing part 

from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Step 11: Testing Code with Confidence Threshold
# Load and preprocess the test image
test_image_path = "/kaggle/input/liver-test2/WhatsApp Image 2024-10-28 at 22.05.15_7406f608.jpg"

# Load the image, convert to grayscale, and resize to the model's input size
test_image = load_img(test_image_path, color_mode='grayscale', target_size=(128, 128))
test_image = img_to_array(test_image)  # Convert to array
test_image = test_image / 255.0  # Normalize the image
test_image = np.expand_dims(test_image, axis=0)  # Add batch dimension

# Make a prediction
predictions = model.predict(test_image)

# Define a confidence threshold
confidence_threshold = 0.5  # Adjust this value based on your needs

# Output the predicted class
predicted_class = np.argmax(predictions, axis=1)
predicted_confidence = np.max(predictions, axis=1)

# Map the predicted class index to the organ name
class_indices = train_data.class_indices  # Get the class indices from the training data
organ_labels = {v: k for k, v in class_indices.items()}  # Reverse the mapping

if predicted_confidence[0] < confidence_threshold:
    print("Predicted Organ: Unknown")
else:
    predicted_organ = organ_labels[predicted_class[0]]  # Get the predicted organ name
    print(f"Predicted Organ: {predicted_organ} (Confidence: {predicted_confidence[0]:.2f})")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
Predicted Organ: liver (Confidence: 0.99)
