In [1]:
import pandas as pd
from PIL import Image

from imgaug import augmenters as iaa
import imageio
import numpy as np
import os
from imgaug.augmentables.kps import KeypointsOnImage, Keypoint

import cv2
from pathlib import Path

In [3]:
### Divide image into quadrants, keeping track of kinetoplast and nucleus
data = pd.read_csv('/Users/clee/Documents/Nolan_Lab/chagas/peerj-10-13470-s001_all.csv')

pos_dir = '/Users/clee/Documents/Nolan_Lab/chagas/split_images/positive_images'
neg_dir = '/Users/clee/Documents/Nolan_Lab/chagas/split_images/negative_images'

def is_in_quadrant(x, y, quad, width, height):
    if quad == 1:
        return x < width // 2 and y < height // 2
    elif quad == 2:
        return x >= width // 2 and y < height // 2
    elif quad == 3:
        return x < width // 2 and y >= height // 2
    elif quad == 4:
        return x >= width // 2 and y >= height // 2
    
def process_image(image_name, data, pos_dir, neg_dir):
    image_path = f'/Users/clee/Documents/Nolan_Lab/chagas/all/{image_name}'  
    with Image.open(image_path) as img:
        width, height = img.size

        quadrants = {
            1: img.crop((0, 0, width // 2, height // 2)),
            2: img.crop((width // 2, 0, width, height // 2)),
            3: img.crop((0, height // 2, width // 2, height)),
            4: img.crop((width // 2, height // 2, width, height))
        }

        # Check if quadrant contains t.cruzi
        quad_contains_tcruzi = {i: False for i in quadrants}

        for index, row in data[data['filename'] == image_name].iterrows():
            for quad in quadrants:
                if is_in_quadrant(row['x'], row['y'], quad, width, height):
                    quad_contains_tcruzi[quad] = True

        for quad, image in quadrants.items():
            image_filename = f'{image_name.split(".")[0]}_quad{quad}.jpg'
            image_save_path = f'{pos_dir}/{image_filename}' if quad_contains_tcruzi[quad] else f'{neg_dir}/{image_filename}'
            image.save(image_save_path)

# Process each image in the dataset
image_files = [f"field{str(i).zfill(4)}.jpg" for i in range(1, 705)]  

for image_name in image_files:
    try:
        process_image(image_name, data, pos_dir, neg_dir)
    except FileNotFoundError:
        print(f"Image {image_name} not found. Skipping...")

positive_images = os.listdir(pos_dir)
negative_images = os.listdir(neg_dir)
print("Positive images:", positive_images)
print("Negative images:", negative_images)

Image field0002.jpg not found. Skipping...
Image field0003.jpg not found. Skipping...
Image field0013.jpg not found. Skipping...
Image field0026.jpg not found. Skipping...
Image field0031.jpg not found. Skipping...
Image field0059.jpg not found. Skipping...
Image field0063.jpg not found. Skipping...
Image field0126.jpg not found. Skipping...
Image field0156.jpg not found. Skipping...
Image field0180.jpg not found. Skipping...
Image field0246.jpg not found. Skipping...
Image field0254.jpg not found. Skipping...
Image field0289.jpg not found. Skipping...
Image field0308.jpg not found. Skipping...
Image field0327.jpg not found. Skipping...
Image field0337.jpg not found. Skipping...
Image field0352.jpg not found. Skipping...
Image field0361.jpg not found. Skipping...
Image field0402.jpg not found. Skipping...
Image field0447.jpg not found. Skipping...
Image field0451.jpg not found. Skipping...
Image field0455.jpg not found. Skipping...
Image field0462.jpg not found. Skipping...
Image field

In [14]:
### Augment Negative images
# Blacken areas due to rotation
def blacken(image, threshold=500):
    gray_image = np.mean(image, axis=2)
    mask = gray_image > threshold
    image[mask] = [0, 0, 0] 
    return image

def augment_negative_image(image_path, seq, output_directory, filename, aug_count):
    image = imageio.imread(image_path)

    for i in range(aug_count):

        seq_det = seq.to_deterministic()
        image_aug = seq_det(image=image)
        image_aug = blacken(image_aug)

        save_path = os.path.join(output_directory, 'negative_images', f"{os.path.splitext(filename)[0]}_augmented_{i}.png")
        imageio.imwrite(save_path, image_aug)

# Augmentations
seq = iaa.Sequential([
    iaa.Fliplr(0.5),
    iaa.Flipud(0.2), 
    iaa.Affine(
        rotate=(-160, 160),  
        scale={"x": (0.9, 1.1), "y": (0.9, 1.1)},  
        translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, 
        shear=(-5, 5), 
        cval=0  
    )
], random_order=True)  

input_directory = '/Users/clee/Documents/Nolan_Lab/chagas/split_images/negative_images'
output_directory = '/Users/clee/Documents/Nolan_Lab/chagas/split_images/augmented_images'
        
aug_count = 2

for filename in os.listdir(input_directory):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(input_directory, filename)
        augment_negative_image(image_path, seq, output_directory, filename, aug_count)




  image = imageio.imread(image_path)


In [3]:
### Standardize - Rescale images to 1224x1632 & convert to png
def resize_and_convert_image(image_path, output_size, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    with Image.open(image_path) as img:

        if img.mode != 'RGB':
            img = img.convert('RGB')

        img = img.resize(output_size, Image.ANTIALIAS)

        output_file_path = os.path.join(output_dir, os.path.splitext(os.path.basename(image_path))[0] + '.png')
        
        img.save(output_file_path, 'PNG')
        print(f'Saved: {output_file_path}')

def process_directory(input_dir, output_dir, output_size):
    for subdir, dirs, files in os.walk(input_dir):
        for file in files:
            filepath = os.path.join(subdir, file)
            if filepath.lower().endswith(('.png', '.jpg', '.jpeg')):
                relative_path = os.path.relpath(subdir, input_dir)
                new_output_dir = os.path.join(output_dir, relative_path)
                
                resize_and_convert_image(filepath, output_size, new_output_dir)

desired_size = (1224, 1632)

base_input_directory = '/Users/clee/Documents/Nolan_Lab/chagas/test_images/original'
base_output_directory = '/Users/clee/Documents/Nolan_Lab/chagas/test_images/processed' 

process_directory(base_input_directory, base_output_directory, desired_size)



  img = img.resize(output_size, Image.ANTIALIAS)


Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0443_quad1.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0553_quad3.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0079_quad3.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0157_quad4.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0286_quad3.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0396_quad1.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0675_quad1.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0415_quad3.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed/positive_images/field0227_quad4.png
Saved: /Users/clee/Documents/Nolan_Lab/chagas/test_images/proces

In [3]:
### White balance images
def white_balance(img):
    wb = cv2.xphoto.createSimpleWB()
    wb.setP(0.5)
    balanced_img = wb.balanceWhite(img)
    return balanced_img

def process_images(source_directory, target_directory):
    os.makedirs(target_directory, exist_ok=True)
    
    for subdir, dirs, files in os.walk(source_directory):
        for file in files:
            filepath = os.path.join(subdir, file)
            if filepath.lower().endswith(('.png', '.jpg', '.jpeg')):
                img = cv2.imread(filepath)
                if img is not None:
                    img_balanced = white_balance(img)
                    relative_path = os.path.relpath(subdir, source_directory)
                    target_subdir = os.path.join(target_directory, relative_path)
                    os.makedirs(target_subdir, exist_ok=True)
                    output_file_path = os.path.join(target_subdir, file)
                    cv2.imwrite(output_file_path, img_balanced)
                    print(f"Processed and saved white-balanced image: {output_file_path}")
                else:
                    print(f"Failed to read image: {filepath}")

source_directory = '/Users/clee/Documents/Nolan_Lab/chagas/test_images/processed'
target_directory = '/Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance'

process_images(source_directory, target_directory)

Processed and saved white-balanced image: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/positive_images/field0379_quad3.png
Processed and saved white-balanced image: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/positive_images/field0578_quad2.png
Processed and saved white-balanced image: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/positive_images/field0052_quad2.png
Processed and saved white-balanced image: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/positive_images/field0608_quad2.png
Processed and saved white-balanced image: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/positive_images/field0322_quad2.png
Processed and saved white-balanced image: /Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/positive_images/field0686_quad2.png
Processed and saved white-balanced image: /Users/clee/Documents/Nolan_Lab/chagas/test_im

In [13]:
### Count number of quadrants
def count_quadrants(directory):
    quad_counts = {'quad1': 0, 'quad2': 0, 'quad3': 0, 'quad4': 0}
    total_files = 0

    files = os.listdir(directory)

    for filename in files:
        if filename.endswith('.png'):
            total_files += 1  
            if 'quad1' in filename:
                quad_counts['quad1'] += 1
            elif 'quad2' in filename:
                quad_counts['quad2'] += 1
            elif 'quad3' in filename:
                quad_counts['quad3'] += 1
            elif 'quad4' in filename:
                quad_counts['quad4'] += 1

    if total_files == 0:
        return quad_counts, {quad: 0 for quad in quad_counts}

    quad_percentages = {quad: (count / total_files) * 100 for quad, count in quad_counts.items()}

    return quad_counts, quad_percentages

#directory_path = '/Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/positive_images'
directory_path = '/Users/clee/Documents/Nolan_Lab/chagas/test_images/processed_whitebalance/negative_images'

quad_counts, quad_percentages = count_quadrants(directory_path)

print("Counts:")
for quad, count in quad_counts.items():
    print(f"{quad}: {count}")
print("Percentages:")
for quad, percentage in quad_percentages.items():
    print(f"{quad}: {percentage:.2f}%")

Counts:
quad1: 428
quad2: 622
quad3: 209
quad4: 486
Percentages:
quad1: 24.53%
quad2: 35.64%
quad3: 11.98%
quad4: 27.85%
