#### This code serves to test Algorithm 4 for the Iterative Domain Information Framework

In [1]:
import glob as glob
import math, random
import pandas as pd
from DatasetNames import get_dataset_class_name
from ExtraFunctions import *

from MultiClassPrediction import MultiClassPrediction
from SingleClassPrediction import SingleClassPrediction
from MultiClassDescription import MultiClassDescription

from MultiClassPredictionByText import MultiClassPredictionByText
from SingleClassPredictionByText import SingleClassPredictionByText

In [2]:
DATASET_NAME = "PathMNIST"
NUMBER_OF_IMAGES = 3 # Total number of images to be used
NUMBER_OF_AUGMENTATIONS = 2 # Number of augmentations to be used
number_of_experiment = 1 # Number of experiment to be conducted

source_directory = f"../Datasets/{DATASET_NAME}/test"  # Change this to your test directory path
_, image_paths = get_balanced_random_images(source_directory, n = NUMBER_OF_IMAGES)

random.seed(55)
random.shuffle(image_paths)
initial_image_paths = image_paths.copy()

dataset_classes_of_interest = get_dataset_class_name(DATASET_NAME)
init_classes_of_interest = list(dataset_classes_of_interest.values())

classes_of_interest_list = [random.sample(init_classes_of_interest, len(init_classes_of_interest)) for i in range(len(image_paths))]

multiclass_image_tasks = create_multiclass_augmented_image_tasks(image_paths, classes_of_interest_list, number_of_classes_to_predict=len(init_classes_of_interest), total_number_of_augmentations=NUMBER_OF_AUGMENTATIONS)

#### Playground

In [3]:
model_list = [
    #     {
    #     "model_name": "claude-3-5-sonnet-20241022",
    #     "model_type": "claude",
    # },
    # {
    #     "model_name": "gpt-4o",
    #     "model_type": "openai",
    # },
    {
        "model_name": "gpt-4o-mini",
        "model_type": "openai",
    }
]

In [4]:
mc_descriptor = MultiClassDescription(batch_size = 1, max_workers=4)
txt_based_classifier = MultiClassPredictionByText(batch_size = 1, max_workers=4)
txt_based_single_class_classifier = SingleClassPredictionByText(batch_size = 1, max_workers=4)

In [5]:
exp_results = []

for experiment in range(number_of_experiment):
    print(f"Experiment {experiment+1}/{number_of_experiment}")

    # Iterate over the models
    for model in model_list:
        model_name = model["model_name"]
        model_type = model["model_type"]

        print(f"Model: {model_name}")
        print(f"Model Type: {model_type}")

        multiclass_image_tasks = create_multiclass_augmented_image_tasks(image_paths, classes_of_interest_list, number_of_classes_to_predict=len(init_classes_of_interest), total_number_of_augmentations=NUMBER_OF_AUGMENTATIONS)

        mc_descriptor.update_model(model_name)
        txt_based_classifier.update_model(model_name)
        txt_based_single_class_classifier.update_model(model_name)

        # For each model, initialize the variables for the Algorithm
        n = len(init_classes_of_interest)
        k = 2
        threshold = 2
        number_of_classes_to_predict = math.ceil(n / k)
        image_paths = initial_image_paths
        voting_result_list = None    

        # Get the descriptions of the images
        rs = mc_descriptor.process_images(multiclass_image_tasks, model_type=model_type)   

        # Create Multiclass Image Tasks from the descriptions
        multiclass_image_tasks = []
        for item in rs['images']:
            image_path = item['image_path']
            classes_to_use = random.sample(init_classes_of_interest, len(init_classes_of_interest))

            # Combine all the descriptions into one so that it can be seen as a single medical report and also add headings to each section
            combined_description = f"Technical Quality: {item['technical_quality']}\nAnatomical Description: {item['anatomical_description']}\nFindings: {item['findings']}"
            
            task = MultiClassImageTask(image_path=image_path, classes=classes_to_use)
            task.set_image_textual_description(combined_description)
            n = len(init_classes_of_interest)
            k = 2
            task.set_num_predictions(number_of_classes_to_predict)
            multiclass_image_tasks.append(task)

        while number_of_classes_to_predict >= threshold:

            # Prompt the model to predict "number_of_classes_to_predict" classes for each image
            rs = txt_based_classifier.process_descriptions(multiclass_image_tasks, model_type=model_type)

            # Set the predicted classes as the new classes of interest
            result_list, image_paths, classes_of_interest_list = extract_image_classes_from_multiclass_predictions(rs, number_of_classes_to_predict)

            print("Image Paths: ", image_paths)
            print("Image Paths: ", len(image_paths))

            log_experiment_results(dataset_classes_of_interest, exp_results, experiment, model_name, model_type, number_of_classes_to_predict, image_paths, classes_of_interest_list)

            # Update the number of classes to predict
            number_of_classes_to_predict = math.ceil(number_of_classes_to_predict / k)

            multiclass_image_tasks = create_new_multitask_image_tasks(number_of_classes_to_predict, result_list)

        if rs:
            # Result from Multiclass Prediction
            result_list, _, _ = extract_image_classes_from_multiclass_predictions(rs)

            # Merge augmentation results
            result_list, voting_result_list = transform_result_list_to_single_image_tasks(result_list)
        else:
            # Go to SingleTask result list directly
            result_list = create_singleclass_image_tasks(image_paths, classes_of_interest_list)

        # Predict the single classes for each image
        f_res = txt_based_single_class_classifier.process_descriptions(result_list, model_type)
        correct_classes, pred_classes, image_paths = extract_single_classes(f_res, dataset_classes_of_interest)

        log_experiment_results(dataset_classes_of_interest, exp_results, experiment, model_name, model_type, number_of_classes_to_predict, image_paths, pred_classes, voting_result_list)

Experiment 1/1
Model: gpt-4o
Model Type: openai
Inside process Descriptions for MultiClassPredictionByText:  [[MultiClassImageTask(image_path='../Datasets/PathMNIST/test\\7\\image_2935.png', classes=['debris', 'mucus', 'normal colon mucosa', 'smooth muscle', 'cancer-associated stroma', 'background', 'adipose', 'colorectal adenocarcinoma epithelium', 'lymphocytes'], num_predictions=5, encoded_image=None, features=None, image_textual_description="Technical Quality: {'image_modality': 'Histological image', 'positioning': 'Standard histological section', 'image_quality': 'High resolution with clear cellular detail'}\nAnatomical Description: {'location': 'Tissue section', 'anatomical_landmarks': ['Cell nuclei', 'Fibrous stroma'], 'orientation': 'Transverse plane'}\nFindings: {'composition': {'density_characteristics': 'Variable cellular density', 'internal_architecture': 'Fibrous and cellular components'}, 'morphology': {'shape': 'Elongated spindle-shaped cells', 'margins': 'Indistinct', 's

In [6]:
x_sam = pd.DataFrame(exp_results)

In [7]:
x_sam

Unnamed: 0,experiment,number of classes,model_name,model_type,accuracy,image_paths,LLM Predicted Classes,Correct Classes,Voting Result
0,0,5,gpt-4o,openai,148 out of 200,"[../Datasets/PathMNIST/test\7\image_2935.png, ...","[[normal colon mucosa, background, smooth musc...","[cancer-associated stroma, smooth muscle, smoo...",
1,0,3,gpt-4o,openai,114 out of 200,"[../Datasets/PathMNIST/test\7\image_2935.png, ...","[[background, smooth muscle, cancer-associated...","[cancer-associated stroma, smooth muscle, smoo...",
2,0,2,gpt-4o,openai,95 out of 200,"[../Datasets/PathMNIST/test\7\image_2935.png, ...","[[cancer-associated stroma, smooth muscle], [b...","[cancer-associated stroma, smooth muscle, smoo...",
3,0,1,gpt-4o,openai,40 out of 100,"[../Datasets/PathMNIST/test\7\image_2935.png, ...","[cancer-associated stroma, cancer-associated s...","[cancer-associated stroma, smooth muscle, smoo...","[[cancer-associated stroma, smooth muscle, bac..."
4,0,5,gpt-4o-mini,openai,131 out of 200,"[../Datasets/PathMNIST/test\7\image_3832.png, ...","[[cancer-associated stroma, normal colon mucos...","[cancer-associated stroma, debris, normal colo...",
5,0,3,gpt-4o-mini,openai,101 out of 200,"[../Datasets/PathMNIST/test\8\image_1288.png, ...","[[background, colorectal adenocarcinoma epithe...","[colorectal adenocarcinoma epithelium, colorec...",
6,0,2,gpt-4o-mini,openai,82 out of 200,"[../Datasets/PathMNIST/test\8\image_1288.png, ...","[[cancer-associated stroma, colorectal adenoca...","[colorectal adenocarcinoma epithelium, colorec...",
7,0,1,gpt-4o-mini,openai,30 out of 100,"[../Datasets/PathMNIST/test\7\image_3832.png, ...","[cancer-associated stroma, debris, mucus, canc...","[cancer-associated stroma, debris, normal colo...","[[normal colon mucosa, cancer-associated strom..."


In [8]:
pd.DataFrame(exp_results).to_csv(f"{DATASET_NAME}_results_Algorithm4_EXP1.csv", index=False)