In [42]:
import numpy as np
import os
import cv2  # For resizing images
import pydicom  # For handling DICOM files
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [43]:
# dataset_1
def load_dicom_images(folder_path, image_size=(128, 128)):
    images = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".dcm"):
            # Load DICOM image
            dicom_path = os.path.join(folder_path, filename)
            dicom_data = pydicom.dcmread(dicom_path)
            
            # Convert DICOM to a NumPy array
            image = dicom_data.pixel_array
            
            # Resize to a standard size
            image_resized = cv2.resize(image, image_size)
            
            # Normalize pixel values to [0, 1]
            image_normalized = image_resized / np.max(image_resized)
            
            # Append the image
            images.append(image_normalized)


    # Convert to numpy arrays
    images = np.array(images).reshape(-1, image_size[0], image_size[1], 1)  # Add channel dimension
    
    return images

In [44]:
# dataset_2
def load_png_images(folder_path, image_size=(128, 128)):
    images = []
    # Iterate through files in the directory
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpeg"):
            # Load the PNG image
            png_path = os.path.join(folder_path, filename)
            image = cv2.imread(png_path, cv2.IMREAD_GRAYSCALE)  # Read as grayscale

            # Resize to a standard size
            image_resized = cv2.resize(image, image_size)

            # Normalize pixel values to [0, 1]
            image_normalized = image_resized / 255.0  # Rescale pixel values to [0, 1]

            # Append the image
            images.append(image_normalized)

    # Convert images to numpy array and add channel dimension
    images = np.array(images).reshape(-1, image_size[0], image_size[1], 1)  # Add channel dimension

    return images

In [54]:
# Load images and labels
folder_path1 = '/MYWork/NU/ML/ML Project/Dataset'
folder_path2="/MYWork/NU/ML/ML Project/Dataset2"
images1= load_dicom_images(folder_path1)
images2= load_png_images(folder_path2)
images = np.concatenate([images1, images2], axis=0)
labels=pd.read_csv("/MYWork/NU/ML/ML Project/image_lables.csv")
labels_flattend=labels["image_labeling"].values.ravel()
labels_Encoded=LabelEncoder().fit_transform(labels_flattend).astype(float)
X_train,X_test,y_train,y_test=train_test_split(images,labels_Encoded,test_size=0.2,random_state=42)

(20,)