In [40]:
import numpy as np
import os
import cv2  # For resizing images
import pydicom  # For handling DICOM files
import pandas as pd
from sklearn.model_selection import train_test_split

In [50]:
def load_dicom_images(folder_path, image_size=(128, 128)):
    images = []
    labels = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".dcm"):
            # Load DICOM image
            dicom_path = os.path.join(folder_path, filename)
            dicom_data = pydicom.dcmread(dicom_path)
            
            # Convert DICOM to a NumPy array
            image = dicom_data.pixel_array
            
            # Resize to a standard size
            image_resized = cv2.resize(image, image_size)
            
            # Normalize pixel values to [0, 1]
            image_normalized = image_resized / np.max(image_resized)
            
            # Append the image
            images.append(image_normalized)

            # Extract the number from the filename
            file_number = int(filename.split('-')[1].split('.')[0])  # Extracting 001 from 1-001.dcm
            # Synthetic label: 0 for even numbers, 1 for odd numbers
            label = "normal" if file_number % 2 == 0 else "abmormal"
            labels.append(label)

    # Convert to numpy arrays
    images = np.array(images).reshape(-1, image_size[0], image_size[1], 1)  # Add channel dimension
    
    return images, labels

In [69]:
# Load images and labels
folder_path = '/MYWork/NU/ML/Dataset'
images, labels = load_dicom_images(folder_path)
labels=pd.DataFrame(labels,columns=["Case"])
labels_one_hot=pd.get_dummies(labels)
labels_one_hot
labels_one_hot_np=labels_one_hot.values.astype(np.float32)


# Split into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(images, labels_one_hot_np, test_size=0.2, random_state=42)
