# Data Preprocessing

In [1]:
import cv2
import os

def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
    return images

# Load images
image_folder = '/Users/alirazi/Downloads/Data-2/24_chromosomes_object'
images = load_images_from_folder(image_folder)

# Preprocess images (e.g., resize, normalize, enhance contrast)
# Implement advanced preprocessing for separating overlapping chromosomes.


In [2]:
import cv2
import numpy as np

def preprocess_image(image, target_size=(128, 128)):
    # Resize the image to the target size
    image = cv2.resize(image, target_size)
    
    # Normalize pixel values to the range [0, 1]
    image = image.astype('float32') / 255.0
    
    # Enhance contrast using histogram equalization
    image = cv2.equalizeHist(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
    
    return image

# Example usage
preprocessed_images = [preprocess_image(image) for image in images]


# Data Splitting

In [3]:
import os
import xml.etree.ElementTree as ET  # Assuming your annotations are in XML format

def load_annotations(annotation_dir):
    annotations = []

    # Iterate through XML annotation files in the directory
    for filename in os.listdir(annotation_dir):
        if filename.endswith('.xml'):
            annotation_path = os.path.join(annotation_dir, filename)
            annotations.append(annotation_path)

    return annotations


In [4]:
import pathlib 
path_train_eda = "/Users/alirazi/Downloads/Data-2/24_chromosomes_object/JEPG"
data_train_eda_dir = pathlib.Path(path_train_eda)

In [5]:
import pathlib 
path_test_eda = "/Users/alirazi/Downloads/Data-2/24_chromosomes_object/JEPG"
data_test_eda_dir = pathlib.Path(path_test_eda)

In [6]:
x_test_eda_dir = os.path.join(data_test_eda_dir, 'test')
y_test_eda_dir = os.path.join(data_test_eda_dir, 'testannot')

In [7]:
x_train_dir = os.path.join(data_train_eda_dir, 'train')
y_train_dir = os.path.join(data_train_eda_dir, 'trainannot')

In [8]:
import os
import pathlib

# Define the directory paths
path_train_eda = "/Users/alirazi/Downloads/Data-2/24_chromosomes_object/JEPG"
path_test_eda = "/Users/alirazi/Downloads/Data-2/24_chromosomes_object/JEPG"

# Convert paths to pathlib.Path objects
data_train_eda_dir = pathlib.Path(path_train_eda)
data_test_eda_dir = pathlib.Path(path_test_eda)

# Define the subdirectories for training and testing data
x_train_dir = data_train_eda_dir / 'train'
y_train_dir = data_train_eda_dir / 'trainannot'
x_test_dir = data_test_eda_dir / 'test'
y_test_dir = data_test_eda_dir / 'testannot'


In [9]:
import os
import cv2
import xml.etree.ElementTree as ET

def load_data(image_dir, annotation_dir):
    images = []
    labels = []

    for filename in os.listdir(image_dir):
        if filename.endswith('.jpg'):
            image_path = os.path.join(image_dir, filename)
            annotation_path = os.path.join(annotation_dir, filename.replace('.jpg', '.xml'))

            # Load the image
            image = cv2.imread(image_path)
            images.append(image)

            # Load and parse the XML annotation
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            # Assuming a binary classification problem where 'chromosome' is class 1, and 'non-chromosome' is class 0
            label = 1 if any(obj.find('name').text == 'chromosome' for obj in root.findall('object')) else 0
            labels.append(label)

    return images, labels

# Define paths to image and annotation directories
image_dir = '/Users/alirazi/Downloads/Data-2/24_chromosomes_object/JEPG'
annotation_dir = '/Users/alirazi/Downloads/Data-2/24_chromosomes_object/annotations'

# Load data
images, labels = load_data(image_dir, annotation_dir)


In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)


In [12]:
from tensorflow import keras

model = keras.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(2, activation='softmax')  # Two output classes: chromosome or non-chromosome
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


2023-10-14 13:21:54.134392: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


NameError: name 'img_height' is not defined

In [11]:
# Define functions for loading and preprocessing
def load_and_preprocess_data(image_path, annotation_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)  # Adjust channels as per your images
    image = tf.image.resize(image, (img_height, img_width))
    image = tf.image.per_image_standardization(image)  # Normalize pixel values
    
    # Load and preprocess your annotations here
    annotation = preprocess_annotations(annotation_path)

    return image, annotation


In [12]:
import os
import xml.etree.ElementTree as ET
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define paths to image and annotation directories
image_dir = '/Users/alirazi/Downloads/Data-2/24_chromosomes_object/JEPG'
annotation_dir = '/Users/alirazi/Downloads/Data-2/24_chromosomes_object/annotations'

# Function to extract ROIs from annotations
def extract_rois_from_annotations(annotation_dir, image_dir):
    rois = []
    labels = []
    for filename in os.listdir(annotation_dir):
        if filename.endswith('.xml'):
            annotation_path = os.path.join(annotation_dir, filename)
            image_filename = os.path.splitext(filename)[0] + '.jpg'
            image_path = os.path.join(image_dir, image_filename)

            tree = ET.parse(annotation_path)
            root = tree.getroot()

            for object in root.findall('object'):
                name = object.find('name').text
                bndbox = object.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)

                # Load the image
                img = cv2.imread(image_path)
                roi = img[ymin:ymax, xmin:xmax]

                rois.append(roi)
                labels.append(1 if 'chromosome' in name else 0)  # Modify as needed

    return rois, labels




In [None]:
# Extract ROIs and labels from annotations
rois, labels = extract_rois_from_annotations(annotation_dir, image_dir)

In [None]:

# Split your data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(rois, labels, test_size=0.2, random_state=42)

In [None]:



# Convert your data to NumPy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

# Now you have X_train, y_train for training and X_test, y_test for testing

In [12]:
patha0='/Users/alirazi/Downloads/Data-2/24_chromosomes_object/annotations/103064.xml'
image = cv2.imread(path0)
tree = ET.parse(patha0)

NameError: name 'path0' is not defined