In [1]:
import numpy as np
import glob
import os
import random
import re

#Function to get list of image_paths in one folder
def get_image_path_list(folder_path):
    tif_files = glob.glob(f"{folder_path}/*.tif")
    return tif_files

"""
shuffle list into n new lists:
num_lists: number of new lists
items_per_list: item per each new list
"""
def shuffle_into_lists(original_list, num_lists, items_per_list):
    # Shuffle the original list in-place
    random.shuffle(original_list)
    total_items = len(original_list)
    
    # Check if the specified number of lists and items per list are valid
    if num_lists * items_per_list > total_items:
        raise ValueError("Invalid parameters: Not enough items in the original list.")
    
    result_lists = [original_list[i:i+items_per_list] for i in range(0, num_lists*items_per_list, items_per_list)]
    return result_lists

#exracts the number of image in the file_path e.g. "image_123.tif"
def extract_number_image(file_path):
    # Use a regular expression to find the number after "image_" and before ".tif"
    match = re.search(r'image_(\d+).tif', file_path)

    # Check if a match is found
    return int(match.group(1))

In [2]:
datafile_path="../../smalldataset"
parent_folder = datafile_path+"/data"
image_path_list = []
for folder_class in os.listdir(parent_folder):
    folder_class = os.path.join(parent_folder, folder_class)
    
    if os.path.isdir(folder_class):
       print(folder_class)
       for folder_patient in os.listdir(folder_class):
            folder_patient = os.path.join(folder_class, folder_patient)
            if os.path.isdir(folder_patient):
                #print("Found folder:", folder_patient)
                image_path_list+=get_image_path_list(folder_patient)
print(f"Number of all images {len(image_path_list)}")                

../../smalldataset/data/CBFB_MYH11
../../smalldataset/data/control
../../smalldataset/data/NPM1
../../smalldataset/data/output
../../smalldataset/data/PML_RARA
../../smalldataset/data/RUNX1_RUNX1T1
Number of all images 11122


In [3]:
# Shuffle into n patients with n images
n_patients=10
n_images=10
experiment_name = "experiment_1"

shuffled_patients = shuffle_into_lists(image_path_list, n_patients, n_images)

output_folder = datafile_path+'/artificialdata/'+experiment_name

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Print the shuffled lists
for i, file_paths in enumerate(shuffled_patients):
    output_file_path = os.path.join(output_folder,f"patient{i}")
    if not os.path.exists(output_file_path):
        os.makedirs(output_file_path)
    #print(f"Patient {i + 1}: {sublist}")
    with open(output_file_path+"/images.txt", 'w') as file:
        for path in file_paths:
            file.write(path + '\n')

In [133]:
for patient, filepath_images_list in enumerate(shuffled_patients):
    print(f"Save patient {patient + 1} features")
    array_list=[]
    for filepath in filepath_images_list:
        patient_id = filepath[:filepath.find("/image")]
        features=np.load(patient_id+"/fnl34_bn_features_layer_7.npy")
        array_list.append([ features[extract_number_image(filepath)] ])
    #Concatenate all features for one artificial patient    
    artificial_features = np.concatenate(array_list,axis=0)
    output_npy_file = output_folder+f"/patient{patient}/fnl34_bn_features_layer_7.npy"
    # Save the array to the .npy file
    np.save(output_npy_file, artificial_features)


Save patient 1 features


Save patient 2 features
Save patient 3 features
Save patient 4 features
Save patient 5 features
Save patient 6 features
Save patient 7 features
Save patient 8 features
Save patient 9 features
Save patient 10 features
