<a href="https://colab.research.google.com/github/atharvadesai1/BE-Project-Codes/blob/main/liver_tumor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
from google.colab import drive
drive.mount('/content/drive')

!unzip "/content/drive/MyDrive/liver_segmentation_dataset.zip" -d "/content/dataset"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Archive:  /content/drive/MyDrive/liver_segmentation_dataset.zip
replace /content/dataset/segmentations/segmentation-0.nii? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: /content/dataset/segmentations/segmentation-0.nii  
  inflating: /content/dataset/segmentations/segmentation-1.nii  
  inflating: /content/dataset/segmentations/segmentation-10.nii  
  inflating: /content/dataset/segmentations/segmentation-100.nii  
  inflating: /content/dataset/segmentations/segmentation-101.nii  
  inflating: /content/dataset/segmentations/segmentation-102.nii  
  inflating: /content/dataset/segmentations/segmentation-103.nii  
  inflating: /content/dataset/segmentations/segmentation-104.nii  
  inflating: /content/dataset/segmentations/segmentation-105.nii  
  inflating: /content/dataset/segmentations/segmentation-106.nii  
  inflating: /content/dataset/segmentations

In [31]:
!pip install nibabel lime scikit-image shap
!pip install -q tensorflow==2.12.0

Collecting numpy>=1.22 (from nibabel)
  Using cached numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
  Using cached numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Using cached numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.5
    Uninstalling numpy-1.23.5:
      Successfully uninstalled numpy-1.23.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 2.0.2 which is incompatible.
tensorflow-text 2.18.1 requires tensorflow<2.19,>=2.18.0, but you have tensorflow 2.12.0 which is incompatible.
orbax-checkpoint 0.11.13 requires jax>=0.5.0, but you have jax 0.4.30 which is incompati

In [44]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split
import nibabel as nib
import glob
from tqdm import tqdm
import lime
from lime import lime_image
from skimage.segmentation import mark_boundaries

In [45]:
def load_nifti_file(filepath):
    """Load a NIfTI file and return its data as numpy array"""
    scan = nib.load(filepath)
    data = scan.get_fdata()
    return data

def load_dataset(base_dir):
    """Load CT scans and corresponding masks with proper file matching"""
    ct_scans = []
    masks = []

    # Get all segmentation files first
    seg_files = sorted(glob.glob(os.path.join(base_dir, "segmentations", "*.nii*")))

    # Create a dictionary of available masks
    mask_dict = {os.path.basename(f): f for f in seg_files}

    # Search through all volume directories
    volume_dirs = sorted(glob.glob(os.path.join(base_dir, "volume_pt*")))

    for vol_dir in volume_dirs:
        ct_files = sorted(glob.glob(os.path.join(vol_dir, "*.nii*")))

        for ct_file in ct_files:
            # Try multiple naming patterns to find matching mask
            ct_filename = os.path.basename(ct_file)

            # Pattern 1: Direct match (volume-X.nii -> segmentation-X.nii)
            mask_file1 = os.path.join(base_dir, "segmentations", ct_filename.replace("volume", "segmentation"))

            # Pattern 2: Handle cases where filenames might be different
            # Extract number from filename (e.g., volume-0.nii -> 0)
            try:
                vol_num = ct_filename.split('-')[1].split('.')[0]
                mask_file2 = os.path.join(base_dir, "segmentations", f"segmentation-{vol_num}.nii")
            except:
                mask_file2 = ""

            # Check which pattern exists
            mask_file = None
            if os.path.exists(mask_file1):
                mask_file = mask_file1
            elif os.path.exists(mask_file2):
                mask_file = mask_file2
            elif ct_filename in mask_dict:
                mask_file = mask_dict[ct_filename]

            if mask_file and os.path.exists(mask_file):
                ct_scans.append(ct_file)
                masks.append(mask_file)
            else:
                print(f"Warning: No mask found for {ct_file}")

    return ct_scans, masks

In [46]:
# Load dataset (point to the root folder containing all your subfolders)
base_dir = "/content/dataset"  # Update with your actual path
ct_scans, masks = load_dataset(base_dir)

print(f"Found {len(ct_scans)} CT scans and {len(masks)} corresponding masks")

# Verify some samples
if len(ct_scans) > 0:
    print("\nSample CT scan:", ct_scans[0])
    print("Corresponding mask:", masks[0])

Found 51 CT scans and 51 corresponding masks

Sample CT scan: /content/dataset/volume_pt1/volume-0.nii
Corresponding mask: /content/dataset/segmentations/segmentation-0.nii


In [47]:
def preprocess_image(image, target_size=(256, 256)):
    """Normalize and resize CT scan slices"""
    # Normalize to [0, 1] range
    image = image.astype(np.float32)
    image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-8)
    # Resize
    image = cv2.resize(image, target_size)
    # Expand dimensions for model input
    return np.expand_dims(image, axis=-1)

def preprocess_mask(mask, target_size=(256, 256)):
    """Resize and binarize mask"""
    mask = cv2.resize(mask, target_size)
    mask = (mask > 0).astype(np.float32)  # Assuming 0 is background, >0 is tumor
    return np.expand_dims(mask, axis=-1)

In [48]:
class LiverTumorDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, ct_paths, mask_paths, batch_size=8, target_size=(256, 256), shuffle=True):
        self.ct_paths = ct_paths
        self.mask_paths = mask_paths
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.ct_paths) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        ct_paths = [self.ct_paths[k] for k in indexes]
        mask_paths = [self.mask_paths[k] for k in indexes]

        X = np.zeros((self.batch_size, *self.target_size, 1), dtype=np.float32)
        y = np.zeros((self.batch_size, *self.target_size, 1), dtype=np.float32)

        for i, (ct_path, mask_path) in enumerate(zip(ct_paths, mask_paths)):
            # Load NIfTI files
            ct_data = load_nifti_file(ct_path)
            mask_data = load_nifti_file(mask_path)

            # For 3D volumes, we'll use the middle slice
            if len(ct_data.shape) == 3:
                mid_slice = ct_data.shape[2] // 2
                ct_slice = ct_data[:, :, mid_slice]
                mask_slice = mask_data[:, :, mid_slice]
            else:  # Handle 2D slices directly
                ct_slice = ct_data
                mask_slice = mask_data

            X[i,] = preprocess_image(ct_slice, self.target_size)
            y[i,] = preprocess_mask(mask_slice, self.target_size)

        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.ct_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [49]:
def unet_model(input_size=(256, 256, 1)):
    inputs = Input(input_size)

    # Downsample
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    # Bottleneck
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)

    # Upsample
    up5 = UpSampling2D(size=(2, 2))(conv4)
    merge5 = concatenate([conv3, up5], axis=3)
    conv5 = Conv2D(256, 3, activation='relu', padding='same')(merge5)
    conv5 = Conv2D(256, 3, activation='relu', padding='same')(conv5)

    up6 = UpSampling2D(size=(2, 2))(conv5)
    merge6 = concatenate([conv2, up6], axis=3)
    conv6 = Conv2D(128, 3, activation='relu', padding='same')(merge6)
    conv6 = Conv2D(128, 3, activation='relu', padding='same')(conv6)

    up7 = UpSampling2D(size=(2, 2))(conv6)
    merge7 = concatenate([conv1, up7], axis=3)
    conv7 = Conv2D(64, 3, activation='relu', padding='same')(merge7)
    conv7 = Conv2D(64, 3, activation='relu', padding='same')(conv7)

    outputs = Conv2D(1, 1, activation='sigmoid')(conv7)

    model = Model(inputs=inputs, outputs=outputs)
    return model

In [50]:
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_loss(y_true, y_pred):
    return 1 - dice_coef(y_true, y_pred)

In [None]:
# Verify we have data
if len(ct_scans) == 0:
    raise ValueError("No matching CT scans and masks found! Check your dataset structure.")
if len(masks) == 0:
    raise ValueError("No masks found! Check your dataset paths.")

# Split data
train_ct, val_ct, train_mask, val_mask = train_test_split(
    ct_scans, masks, test_size=0.2, random_state=42)

# Create data generators
train_gen = LiverTumorDataGenerator(train_ct, train_mask)
val_gen = LiverTumorDataGenerator(val_ct, val_mask)

# Create model
model = unet_model()
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss=dice_loss,
              metrics=[dice_coef, 'binary_accuracy'])

# Callbacks
callbacks = [
    ModelCheckpoint('best_model.h5', monitor='val_dice_coef', save_best_only=True, mode='max'),
    EarlyStopping(monitor='val_dice_coef', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
]

# Train
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=50,
    callbacks=callbacks
)

# Save final model
model.save('liver_tumor_segmentation.h5')
print("Model saved as liver_tumor_segmentation.h5")



Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - binary_accuracy: 0.1745 - dice_coef: 0.0923 - loss: 0.9077 



In [None]:
def explain_with_lime(model, image, num_samples=1000):
    explainer = lime_image.LimeImageExplainer()
    explanation = explainer.explain_instance(
        image[0,:,:,0].astype('double'),
        model.predict,
        top_labels=1,
        hide_color=0,
        num_samples=num_samples
    )

    temp, mask = explanation.get_image_and_mask(
        explanation.top_labels[0],
        positive_only=True,
        num_features=5,
        hide_rest=False
    )

    plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
    plt.title('LIME Explanation')
    plt.show()
    return explanation

# Example usage
sample_image, sample_mask = next(iter(val_gen))
lime_explanation = explain_with_lime(model, sample_image)