#### This code serves to test Algorithm 1 for the Iterative Domain Information Framework

In [1]:
import glob as glob
import math, random
import pandas as pd
from DatasetNames import get_dataset_class_name
from ExtraFunctions import *
from MultiClassPrediction import MultiClassPrediction, MultiClassImageTask
from SingleClassPrediction import SingleClassPrediction

In [2]:
DATASET_NAME = "PathMNIST"
NUMBER_OF_IMAGES = 100 # Total number of images to be used

source_directory = f"../Datasets/{DATASET_NAME}/test"  # Change this to your test directory path
_, image_paths = get_balanced_random_images(source_directory, n = NUMBER_OF_IMAGES)

random.seed(55)
random.shuffle(image_paths)
initial_image_paths = image_paths.copy()

dataset_classes_of_interest = get_dataset_class_name(DATASET_NAME)
init_classes_of_interest = list(dataset_classes_of_interest.values())

classes_of_interest_list = [random.sample(init_classes_of_interest, len(init_classes_of_interest))] * len(image_paths)

#### Playground

In [3]:
number_of_experiment = 1

model_list = [
    {
        "model_name": "gpt-4o",
        "model_type": "openai",
    },
    {
        "model_name": "gpt-4o-mini",
        "model_type": "openai",
    }
]

In [4]:
mcp = MultiClassPrediction(batch_size = 1, max_workers=2)
scp = SingleClassPrediction(batch_size = 1, max_workers=2)

exp_results = []

# Iterate over the number of experiments
for experiment in range(number_of_experiment):
    print(f"Experiment {experiment+1}/{number_of_experiment}")

    # Iterate over the models
    for model in model_list:
        model_name = model["model_name"]
        model_type = model["model_type"]

        print(f"Model: {model_name}")
        print(f"Model Type: {model_type}")

        # Update the model
        mcp.update_model(model_name)
        scp.update_model(model_name)

        # For each model, initialize the variables for the Algorithm
        n = len(init_classes_of_interest)
        k = 2
        threshold = 2
        number_of_classes_to_predict = math.ceil(n / k)
        image_paths = initial_image_paths
        classes_of_interest_list = [random.sample(init_classes_of_interest, len(init_classes_of_interest))] * len(image_paths)

        while number_of_classes_to_predict >= threshold:

            # Create the multiclass image tasks
            multiclass_image_tasks = create_multiclass_augmented_image_tasks(image_paths, classes_of_interest_list, number_of_classes_to_predict=number_of_classes_to_predict)

            # Prompt the model to predict "number_of_classes_to_predict" classes for each image
            rs = mcp.process_images(multiclass_image_tasks, model_type)

            # Set the predicted classes as the new classes of interest
            result_list, image_paths, classes_of_interest_list = extract_image_classes_from_multiclass_predictions(rs, number_of_classes_to_predict)
            
            image_paths_classes = [dataset_classes_of_interest[image_path.split("\\")[-2]] for image_path in image_paths]
            curr_pred_acc = compute_top_n_accuracy(image_paths_classes, classes_of_interest_list)

            # print(f"Predicted Classes: {classes_of_interest_list}")
            # print(f"Correct Classes: {image_paths_classes}")
            print(f"Accuracy: {curr_pred_acc}")

            log_experiment_results(dataset_classes_of_interest, exp_results, experiment, model_name, model_type, number_of_classes_to_predict, image_paths, classes_of_interest_list)

            # Update the number of classes to predict
            number_of_classes_to_predict = math.ceil(number_of_classes_to_predict / k)

        if rs:
            # Result from Multiclass Prediction
            result_list, _, _ = extract_image_classes_from_multiclass_predictions(rs)
        else:
            # Go to SingleTask result list directly
            result_list = create_singleclass_image_tasks(image_paths, classes_of_interest_list)

        # Predict the single classes for each image
        f_res = scp.process_images(result_list, model_type)
        correct_classes, pred_classes, image_paths = extract_single_classes(f_res, dataset_classes_of_interest)

        # Count the number of matches
        pred_result = count_matches(correct_classes, pred_classes)

        # print(f"Predicted Classes: {pred_classes}")
        # print(f"Correct Classes: {correct_classes}")
        print(f"Accuracy: {pred_result}")

        log_experiment_results(dataset_classes_of_interest, exp_results, experiment, model_name, model_type, number_of_classes_to_predict, image_paths, pred_classes)

Experiment 1/1
Model: gpt-4o
Model Type: openai
Accuracy: 78 out of 100
Predicted Classes of Interest: [['debris', 'mucus', 'background', 'lymphocytes', 'cancer-associated stroma'], ['cancer-associated stroma', 'colorectal adenocarcinoma epithelium', 'mucus', 'debris', 'lymphocytes'], ['lymphocytes', 'background', 'cancer-associated stroma', 'smooth muscle', 'debris'], ['background', 'debris', 'colorectal adenocarcinoma epithelium', 'cancer-associated stroma', 'lymphocytes'], ['cancer-associated stroma', 'colorectal adenocarcinoma epithelium', 'lymphocytes', 'smooth muscle', 'background'], ['colorectal adenocarcinoma epithelium', 'mucus', 'lymphocytes', 'background', 'cancer-associated stroma'], ['cancer-associated stroma', 'debris', 'background', 'mucus', 'lymphocytes'], ['smooth muscle', 'cancer-associated stroma', 'lymphocytes', 'background', 'colorectal adenocarcinoma epithelium'], ['debris', 'cancer-associated stroma', 'lymphocytes', 'smooth muscle', 'background'], ['smooth muscle

In [5]:
x_sam = pd.DataFrame(exp_results)

In [6]:
x_sam

Unnamed: 0,experiment,number of classes,model_name,model_type,accuracy,image_paths,LLM Predicted Classes,Correct Classes,Voting Result
0,0,5,gpt-4o,openai,78 out of 100,"[../Datasets/PathMNIST/test\1\image_5521.png, ...","[[cancer-associated stroma, debris, background...","[background, debris, cancer-associated stroma,...",
1,0,3,gpt-4o,openai,63 out of 100,"[../Datasets/PathMNIST/test\2\image_6417.png, ...","[[cancer-associated stroma, colorectal adenoca...","[debris, background, cancer-associated stroma,...",
2,0,2,gpt-4o,openai,61 out of 100,"[../Datasets/PathMNIST/test\4\image_1905.png, ...","[[smooth muscle, cancer-associated stroma], [c...","[mucus, cancer-associated stroma, debris, smoo...",
3,0,1,gpt-4o,openai,48 out of 100,"[../Datasets/PathMNIST/test\4\image_1905.png, ...","[smooth muscle, smooth muscle, colorectal aden...","[mucus, cancer-associated stroma, debris, smoo...",
4,0,5,gpt-4o-mini,openai,83 out of 100,"[../Datasets/PathMNIST/test\0\image_3756.png, ...","[[mucus, lymphocytes, smooth muscle, colorecta...","[adipose, adipose, background, mucus, colorect...",
5,0,3,gpt-4o-mini,openai,56 out of 100,"[../Datasets/PathMNIST/test\0\image_2150.png, ...","[[debris, smooth muscle, mucus], [smooth muscl...","[adipose, normal colon mucosa, background, smo...",
6,0,2,gpt-4o-mini,openai,52 out of 100,"[../Datasets/PathMNIST/test\3\image_3284.png, ...","[[lymphocytes, colorectal adenocarcinoma epith...","[lymphocytes, cancer-associated stroma, backgr...",
7,0,1,gpt-4o-mini,openai,36 out of 100,"[../Datasets/PathMNIST/test\7\image_4939.png, ...","[cancer-associated stroma, mucus, lymphocytes,...","[cancer-associated stroma, background, lymphoc...",


In [8]:
pd.DataFrame(exp_results).to_csv(f"{DATASET_NAME}_results_Algorithm1_EXP1_batch_size_1.csv", index=False)