In [1]:
from pathlib import Path
import pandas as pd

data_dir = Path('../data/raw/')
labels_file = data_dir / 'labels_raw.csv'

# Read raw tag data
labels_df = pd.read_csv(labels_file, sep='\s+', header=None, names=['image_id', 'probability', 'type'])

# Modify the probability value to only keep two decimal places
labels_df['probability'] = labels_df['probability'].apply(lambda x: round(x, 2))

# return data to labels.csv
labels_df.to_csv(labels_file, sep=' ', index=False, header=False)

In [2]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from PIL import Image
import pandas as pd
from pathlib import Path
import shutil
import numpy as np
import csv
# Image data generator settings
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest',
    horizontal_flip=True
)

# file path
data_dir = Path('../data/raw/')
labels_file = data_dir / 'labels_raw.csv'
images_dir = data_dir / 'images_raw'
image_id_old = "images/cell"

# Create new image folder path
new_images_dir = Path('../data/raw/images/')
new_images_dir.mkdir(parents=True, exist_ok=True)

# Read tag data
labels_df = pd.read_csv(labels_file, sep='\s+', header=None, names=['image_id', 'probability', 'type'])

# Copy original images to new images folder
for image_file in images_dir.iterdir():
    shutil.copy(image_file, new_images_dir)


# Set new label file
new_labels_file = Path('../data/raw') / 'labels_agu.csv'
# Copy the original label data directly to the new file
new_labels_list2 = list(labels_df.to_records(index=False))

# Convert DataFrame to list, each element is a tuple with three fields
new_labels_list2 = [(row['image_id'], row['probability'], row['type']) for index, row in labels_df.iterrows()]
print(new_labels_list2[:100])

# Determine the starting ID of augmented images
start_new_id = 2625

class_2_df = labels_df[labels_df['probability'] == 0.33]
class_3_df = labels_df[labels_df['probability'] == 0.67]
# Number of existing images in each class
num_existing_class_2 = len(class_2_df)
num_existing_class_3 = len(class_3_df)

# Calculate the number of images needed for each class
total_images_needed = 400
num_images_to_generate_class_2 = total_images_needed - num_existing_class_2
print(num_images_to_generate_class_2)
num_images_to_generate_class_3 = total_images_needed - num_existing_class_3


# Images of management category 2
for index, row in class_2_df.iterrows():
    if num_images_to_generate_class_2 <= 0:
        break
    img = load_img(data_dir / row['image_id'])
    img_array = img_to_array(img)
    img_array = img_array.reshape((1,) + img_array.shape)

    i = 0
    while i < num_images_to_generate_class_2:
        batch = next(datagen.flow(img_array, batch_size=1))
        new_image_id = f'cell{start_new_id}.png'
        new_image_id2 = f"{image_id_old}{start_new_id + i}.png"
        new_labels_list2.append([new_image_id2, row['probability'], row['type']])

        # Convert Num Py array to image and save
        img_to_save = Image.fromarray(np.uint8(batch[0]))
        img_to_save.save(new_images_dir / new_image_id)
        num_existing_class_2 +=1
        start_new_id += 1
        num_images_to_generate_class_2 = total_images_needed - num_existing_class_2
        print(i,num_existing_class_2,num_images_to_generate_class_2)
# Processing images of category 3
for index, row in class_3_df.iterrows():
    if num_images_to_generate_class_3 <= 0:
        break
    img = load_img(data_dir / row['image_id'])
    img_array = img_to_array(img)
    img_array = img_array.reshape((1,) + img_array.shape)
    i = 0
    while i < num_images_to_generate_class_3:
        batch = next(datagen.flow(img_array, batch_size=1))
        new_image_id = f'cell{start_new_id}.png'
        new_image_id2 = f"{image_id_old}{start_new_id + i}.png"
        new_labels_list2.append([new_image_id2, row['probability'], row['type']])
        # Convert Num Py array to image and save
        img_to_save = Image.fromarray(np.uint8(batch[0]))
        img_to_save.save(new_images_dir / new_image_id)
        num_existing_class_3 +=1
        start_new_id += 1
        num_images_to_generate_class_3 = total_images_needed - num_existing_class_3
        print(i,num_existing_class_3,num_images_to_generate_class_3)

# Convert new label list to Data Frame
new_labels_df = pd.DataFrame(new_labels_list2, columns=['image_id', 'probability', 'type'])

# Save new label information to CS V file
new_labels_df.to_csv(new_labels_file, index=False, sep=' ', header=None)

print("The enhanced image has been generated and saved, and the corresponding label information has been updated to 'labels_agu.csv'。")

[('images/cell0001.png', 1.0, 'mono'), ('images/cell0002.png', 1.0, 'mono'), ('images/cell0003.png', 1.0, 'mono'), ('images/cell0004.png', 0.0, 'mono'), ('images/cell0005.png', 1.0, 'mono'), ('images/cell0006.png', 1.0, 'mono'), ('images/cell0007.png', 1.0, 'mono'), ('images/cell0008.png', 1.0, 'mono'), ('images/cell0009.png', 0.0, 'mono'), ('images/cell0010.png', 1.0, 'mono'), ('images/cell0011.png', 0.0, 'mono'), ('images/cell0012.png', 1.0, 'mono'), ('images/cell0013.png', 1.0, 'mono'), ('images/cell0014.png', 1.0, 'mono'), ('images/cell0015.png', 1.0, 'mono'), ('images/cell0016.png', 1.0, 'mono'), ('images/cell0017.png', 1.0, 'mono'), ('images/cell0018.png', 1.0, 'mono'), ('images/cell0019.png', 1.0, 'mono'), ('images/cell0020.png', 1.0, 'mono'), ('images/cell0021.png', 1.0, 'mono'), ('images/cell0022.png', 1.0, 'mono'), ('images/cell0023.png', 1.0, 'mono'), ('images/cell0024.png', 1.0, 'mono'), ('images/cell0025.png', 1.0, 'mono'), ('images/cell0026.png', 1.0, 'mono'), ('images/ce