<a href="https://colab.research.google.com/github/douglasmasho/MedAlgo/blob/main/TumorGrade2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
import zipfile
import os

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!pip install monai

Collecting monai
  Downloading monai-1.3.2-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.9->monai)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.9->monai)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.9->monai)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.9->monai)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.9->monai)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.9->monai)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-

In [None]:
import os
import glob
from sklearn.model_selection import train_test_split

extract_dir = '/content/drive/MyDrive/BRATS'

def find_nifti_files(root_dir, label):
    """Recursively find all .nii files in subdirectories and assign a label."""
    file_paths = []
    for subdir, _, _ in os.walk(root_dir):
        file_paths.extend(glob.glob(os.path.join(subdir, '*.nii')))
    return file_paths, [label] * len(file_paths)

# Define paths
hgg_path = os.path.join(extract_dir, "MICCAI_BraTS_2019_Data_Training",'HGG')
lgg_path = os.path.join(extract_dir, "MICCAI_BraTS_2019_Data_Training",'LGG')

# Find all .nii files
hgg_files, hgg_labels = find_nifti_files(hgg_path, 1)  # High-grade glioma
lgg_files, lgg_labels = find_nifti_files(lgg_path, 0)  # Low-grade glioma

print(f'HGG files: {len(hgg_files)}')
print(f'LGG files: {len(lgg_files)}')

# Combine and split data
all_files = hgg_files + lgg_files
all_labels = hgg_labels + lgg_labels

print(f'All files: {len(all_files)}')
print(f'All labels: {len(all_labels)}')

# Ensure non-empty lists before splitting
if len(all_files) == 0 or len(all_labels) == 0:
    raise ValueError("The file paths or labels are empty. Check the directory and file extensions.")

train_files, val_files, train_labels, val_labels = train_test_split(
    all_files, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)

In [None]:
import monai
from monai.transforms import Compose, LoadImage, ScaleIntensity, EnsureChannelFirst, Resize, ToTensor
from monai.data import Dataset, DataLoader

# Define transformations
transforms = Compose([
    LoadImage(image_only=True),  # Load NIfTI image
    ScaleIntensity(),            # Normalize intensity
    EnsureChannelFirst(),        # Add channel dimension
    Resize((128, 128, 128)),     # Resize to a fixed size
    ToTensor()                   # Convert to tensor
])

# Create custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        image = image_path  # Load image directly
        if self.transform:
            image = self.transform(image)
        return {'image': image, 'label': label}

# Create datasets and dataloaders
train_dataset = CustomDataset(train_files, train_labels, transform=transforms)
val_dataset = CustomDataset(val_files, val_labels, transform=transforms)

batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)


In [None]:
!pip install monai


In [None]:
import os
import nibabel as nib
import numpy as np
from PIL import Image
import torch
import monai
from monai.transforms import (
    Compose, LoadImage, EnsureChannelFirst, ScaleIntensity,
    Resize, ToTensor, RandAffine, RandRotate90
)
from monai.data import DataLoader, ImageDataset
from monai.networks.nets import DenseNet121
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
from monai.losses import DiceLoss

# Step 1: Slice .nii Files into .jpg Images (only t1ce sequences)

# Define paths
input_dir = '/content/drive/MyDrive/BRATS'  # Directory with nii files
output_dir = '/content/drive/MyDrive/BRATS_JPG'  # Output directory for jpg images

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Iterate over all .nii files in the input directory
for root, dirs, files in os.walk(input_dir):
    for file in files:
        if ('t1ce' in file) and (file.endswith('.nii') or file.endswith('.nii.gz')):
            file_path = os.path.join(root, file)
            nii_data = nib.load(file_path)
            images = nii_data.get_fdata()

            # Determine the class from the directory name
            if 'HGG' in root:
                label = 'HGG'
            elif 'LGG' in root:
                label = 'LGG'
            else:
                continue  # Skip if the directory is not labeled as HGG or LGG

            # Create class-specific output directory under BRATS_JPG
            class_output_dir = os.path.join(output_dir, label)
            os.makedirs(class_output_dir, exist_ok=True)

            # Normalize and convert each slice to an image
            for i in range(images.shape[2]):  # Assuming we're slicing along the third axis
                slice_data = images[:, :, i]
                slice_data = (slice_data - slice_data.min()) / (slice_data.max() - slice_data.min())  # Normalize to [0, 1]
                slice_data = (255 * slice_data).astype(np.uint8)  # Scale to [0, 255]

                # Convert to PIL Image and save as .jpg in the appropriate class directory
                img = Image.fromarray(slice_data)
                img.save(os.path.join(class_output_dir, f"{file}_{i}.jpg"))

print("Slicing complete!")

# Step 2: Use .jpg Images for Training

# Define paths to your .jpg data
data_dir = '/content/drive/MyDrive/BRATS_JPG'

# Prepare image files and corresponding labels
image_files = []
labels = []
for label_dir in ['HGG', 'LGG']:
    label_path = os.path.join(data_dir, label_dir)
    label = 0 if label_dir == 'HGG' else 1  # HGG = 0, LGG = 1
    for fname in os.listdir(label_path):
        if fname.endswith('.jpg'):
            image_files.append(os.path.join(label_path, fname))
            labels.append(label)

# Define transformations
train_transforms = Compose([
    LoadImage(image_only=True),
    EnsureChannelFirst(),
    ScaleIntensity(),
    Resize(spatial_size=(128, 128)),
    RandAffine(prob=0.5, rotate_range=(0.1, 0.1)),
    RandRotate90(prob=0.5),
    ToTensor()
])

val_transforms = Compose([
    LoadImage(image_only=True),
    EnsureChannelFirst(),
    ScaleIntensity(),
    Resize(spatial_size=(128, 128)),
    ToTensor()
])

# Prepare the dataset
train_ds = ImageDataset(image_files, labels, transform=train_transforms)
val_ds = ImageDataset(image_files, labels, transform=val_transforms)

train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=8, shuffle=False)

# Define the model, loss, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DenseNet121(spatial_dims=2, in_channels=1, out_channels=2).to(device)
loss_function = DiceLoss(to_onehot_y=True, softmax=True)
optimizer = Adam(model.parameters(), lr=1e-4)

# Training loop and validation loop
num_epochs = 50
best_acc = 0.0
writer = SummaryWriter()

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    epoch_acc = 0
    for batch_data in train_loader:
        inputs, labels = batch_data[0].to(device), batch_data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += (outputs.argmax(dim=1) == labels).float().mean().item()

    epoch_loss /= len(train_loader)
    epoch_acc /= len(train_loader)

    model.eval()
    val_loss = 0
    val_acc = 0
    with torch.no_grad():
        for val_data in val_loader:
            val_inputs, val_labels = val_data[0].to(device), val_data[1].to(device)
            val_outputs = model(val_inputs)
            val_loss += loss_function(val_outputs, val_labels).item()
            val_acc += (val_outputs.argmax(dim=1) == val_labels).float().mean().item()

    val_loss /= len(val_loader)
    val_acc /= len(val_loader)

    writer.add_scalar('Loss/train', epoch_loss, epoch)
    writer.add_scalar('Loss/val', val_loss, epoch)
    writer.add_scalar('Accuracy/train', epoch_acc, epoch)
    writer.add_scalar('Accuracy/val', val_acc, epoch)

    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')

writer.close()

print("Training complete!")


  slice_data = (slice_data - slice_data.min()) / (slice_data.max() - slice_data.min())  # Normalize to [0, 1]
  slice_data = (255 * slice_data).astype(np.uint8)  # Scale to [0, 255]


KeyboardInterrupt: 

In [None]:
import os
from PIL import Image
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define the path where the files are located
directory = '/content/drive/MyDrive/BRATS_JPG/HGG'

# Iterate over all files in the directory and attempt to open them
for root, dirs, files in os.walk(directory):
    for filename in files:
        file_path = os.path.join(root, filename)

        try:
            # Attempt to open the image to check if it's a valid image file
            with Image.open(file_path) as img:
                img.verify()  # This will raise an exception if the image is not valid
            print(f"Verified: {file_path}")

        except (IOError, SyntaxError) as e:
            print(f"File is corrupted or not a valid image: {file_path}, removing it.")
            os.remove(file_path)  # Remove the corrupt file

print("Image verification and cleanup complete.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_116.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_117.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_118.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_119.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_120.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_121.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_122.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_123.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_124.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_125.jpg
Verified: /content/drive/MyDrive/BRATS_JPG/HGG/BraTS19_TCIA05_396_1_t1ce.nii_126.jpg


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from monai.networks.nets import DenseNet121
from monai.transforms import Compose, ScaleIntensity, Resize, RandAffine, RandRotate, RandZoom, ToTensor
from monai.data import Dataset, DataLoader
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import os
import glob
from sklearn.model_selection import train_test_split
import nibabel as nib

extract_dir = '/content/drive/MyDrive/BRATS'

# Define paths
hgg_path = os.path.join(extract_dir, "MICCAI_BraTS_2019_Data_Training", 'HGG')
lgg_path = os.path.join(extract_dir, "MICCAI_BraTS_2019_Data_Training", 'LGG')

# Define a function to find all t1ce .nii files within each subject's directory
def find_t1ce_files(root_dir, label):
    subject_dirs = [os.path.join(root_dir, d) for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
    file_paths = []
    for subject_dir in subject_dirs:
        t1ce_files = glob.glob(os.path.join(subject_dir, '*t1ce*.nii'))
        if t1ce_files:
            file_paths.append(t1ce_files[0])  # Taking the first t1ce file as an example
    return file_paths, [label] * len(file_paths)

# Find all t1ce files
hgg_files, hgg_labels = find_t1ce_files(hgg_path, 1)
lgg_files, lgg_labels = find_t1ce_files(lgg_path, 0)

all_files = hgg_files + lgg_files
all_labels = hgg_labels + lgg_labels

# Convert lists to numpy arrays for resampling
all_files_np = np.array(all_files)
all_labels_np = np.array(all_labels)

# Oversample the minority class
ros = RandomOverSampler(random_state=42)
resampled_files, resampled_labels = ros.fit_resample(all_files_np.reshape(-1, 1), all_labels_np)
resampled_files = resampled_files.flatten()

# Split resampled data
train_files, val_files, train_labels, val_labels = train_test_split(
    resampled_files, resampled_labels, test_size=0.2, stratify=resampled_labels, random_state=42
)

# Define transformations with augmentation
transforms = Compose([
    ScaleIntensity(),
    Resize((128, 128)),
    RandAffine(prob=0.5),
    RandRotate(range_x=(0, 15), prob=0.5),
    RandZoom(min_zoom=0.9, max_zoom=1.1, prob=0.5),
    ToTensor()
])

# Create custom dataset class for 2D slices
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, augment_minority=False):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.augment_minority = augment_minority

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]

        # Load the NIfTI file
        img = nib.load(image_path).get_fdata()

        # Choose a random slice along the axial plane (e.g., middle slice)
        slice_index = img.shape[2] // 2
        image_slice = img[:, :, slice_index]

        # Add channel dimension for grayscale
        image_slice = np.expand_dims(image_slice, axis=0)

        if self.augment_minority and label == 0:
            if self.transform:
                image_slice = self.transform(image_slice)
        else:
            if self.transform:
                image_slice = self.transform(image_slice)

        return {'image': image_slice, 'label': label}

# Create datasets and dataloaders
batch_size = 4
train_dataset = CustomDataset(train_files, train_labels, transform=transforms, augment_minority=True)
val_dataset = CustomDataset(val_files, val_labels, transform=transforms)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2)

# Define the model for 2D input
model = DenseNet121(spatial_dims=2, in_channels=1, out_channels=2)

# Define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_function = torch.nn.CrossEntropyLoss()

# Compute class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(train_labels), y=train_labels)
class_weight_dict = dict(enumerate(class_weights))

# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for batch in train_loader:
        images = batch['image'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_predictions
    print(f'Epoch {epoch + 1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

    # Validation step
    model.eval()
    val_loss = 0.0
    val_correct_predictions = 0
    val_total_predictions = 0

    with torch.no_grad():
        for batch in val_loader:
            images = batch['image'].to(device)
            labels = batch['label'].to(device)

            outputs = model(images)
            loss = loss_function(outputs, labels)
            val_loss += loss.item()

            # Calculate validation accuracy
            _, predicted = torch.max(outputs, 1)
            val_correct_predictions += (predicted == labels).sum().item()
            val_total_predictions += labels.size(0)

    val_epoch_loss = val_loss / len(val_loader)
    val_epoch_accuracy = val_correct_predictions / val_total_predictions
    print(f'Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_accuracy:.4f}')

print('Training complete!')

  self.pid = os.fork()
  self.pid = os.fork()


Epoch 1, Loss: 0.6253, Accuracy: 0.6570
Validation Loss: 0.5586, Validation Accuracy: 0.6538
Epoch 2, Loss: 0.5694, Accuracy: 0.7101
Validation Loss: 0.5572, Validation Accuracy: 0.7019
Epoch 3, Loss: 0.5662, Accuracy: 0.6860
Validation Loss: 0.6547, Validation Accuracy: 0.6442
Epoch 4, Loss: 0.5669, Accuracy: 0.7053
Validation Loss: 0.4459, Validation Accuracy: 0.7788
Epoch 5, Loss: 0.5684, Accuracy: 0.7077
Validation Loss: 0.4789, Validation Accuracy: 0.7212
Epoch 6, Loss: 0.5263, Accuracy: 0.7319
Validation Loss: 0.4731, Validation Accuracy: 0.7500
Epoch 7, Loss: 0.5328, Accuracy: 0.7464
Validation Loss: 0.5784, Validation Accuracy: 0.5769
Epoch 8, Loss: 0.5077, Accuracy: 0.7440
Validation Loss: 0.4839, Validation Accuracy: 0.7596
Epoch 9, Loss: 0.4992, Accuracy: 0.7585
Validation Loss: 0.4137, Validation Accuracy: 0.8269
Epoch 10, Loss: 0.4938, Accuracy: 0.7633
Validation Loss: 0.6930, Validation Accuracy: 0.6058
Epoch 11, Loss: 0.5128, Accuracy: 0.7681
Validation Loss: 0.4098, Val