#### This code serves to analyse the results from different datasets and models

In [18]:
from ExtraFunctions import *
import numpy as np
import pandas as pd
import random

In [2]:
def get_bootstrap_results(rs, random_state=1000):
    result = pd.DataFrame({
        'LLM Predicted Classes': eval(rs['LLM Predicted Classes'].values[0]),
        'Correct Classes': eval(rs['Correct Classes'].values[0]),
    })


    bootstrap_df = result.sample(n=len(result), replace=True, random_state=random_state)
    return compute_top_n_accuracy( bootstrap_df['Correct Classes'].values.tolist(), bootstrap_df['LLM Predicted Classes'].values.tolist())

In [3]:
DATASET_NAME = "PathMNIST"
ALGORITHM_NUMBER = 5

In [8]:
dataset_list = ['OrganCMNIST', 'PathMNIST', 'BloodMNIST', 'DermaMNIST', 'OctMNIST']
algorithm_list = [2, 3, 5]
model_name = "gpt-4o-mini"

In [30]:
# Set the seed for reproducibility
random.seed(66)  # 42 is arbitrary, you can use any number

# Generate 30 random numbers
numbers = [random.randint(1, 1000) for _ in range(100)]

print(numbers)

[73, 320, 446, 946, 253, 806, 457, 302, 263, 883, 803, 574, 98, 763, 887, 949, 864, 507, 189, 480, 536, 132, 940, 804, 734, 72, 732, 373, 746, 172, 418, 746, 722, 469, 655, 518, 32, 542, 115, 864, 394, 139, 380, 25, 902, 10, 728, 212, 714, 480, 630, 211, 685, 327, 619, 495, 973, 314, 207, 299, 72, 205, 776, 944, 329, 994, 683, 898, 362, 813, 981, 416, 80, 318, 90, 414, 280, 18, 626, 121, 360, 486, 619, 425, 58, 670, 408, 575, 721, 102, 758, 869, 619, 437, 324, 490, 843, 381, 811, 376]


In [36]:
algo_full_name = {
    2: "Image Augmentations",
    3: "Image Augmentations + Feature Description",
    5: "Image Augmentations + Image Textual Description"
}

In [37]:
total_result = []

for dataset in dataset_list:
    for algorithm in algorithm_list:
        print(f"Dataset: {dataset}, Algorithm: {algorithm}")
        rs =pd.read_csv(f'./Results/{dataset}/Batch_Size_5/{dataset}_results_Algorithm{algorithm}_EXP1.csv')
        rs_gpt_4o_mini = rs[(rs['number of classes'] == 1) & (rs['model_name'] == 'gpt-4o-mini')]

        result_of_bootstrap = []
        for number in numbers:
            final_acc = int(get_bootstrap_results(rs_gpt_4o_mini, random_state=number).split()[0])
            result_of_bootstrap.append(final_acc)

        # print(np.mean(result_of_bootstrap), np.std(result_of_bootstrap))
        total_result.append([dataset, algo_full_name[algorithm], np.mean(result_of_bootstrap), np.std(result_of_bootstrap)])

Dataset: OrganCMNIST, Algorithm: 2
Dataset: OrganCMNIST, Algorithm: 3
Dataset: OrganCMNIST, Algorithm: 5
Dataset: PathMNIST, Algorithm: 2
Dataset: PathMNIST, Algorithm: 3
Dataset: PathMNIST, Algorithm: 5
Dataset: BloodMNIST, Algorithm: 2
Dataset: BloodMNIST, Algorithm: 3
Dataset: BloodMNIST, Algorithm: 5
Dataset: DermaMNIST, Algorithm: 2
Dataset: DermaMNIST, Algorithm: 3
Dataset: DermaMNIST, Algorithm: 5
Dataset: OctMNIST, Algorithm: 2
Dataset: OctMNIST, Algorithm: 3
Dataset: OctMNIST, Algorithm: 5


In [41]:
pd.DataFrame(total_result, columns=['Dataset', 'Algorithm', 'Mean', 'Std']).T.to_csv(f'./Results/Bootstrap_Results_{model_name}_Batch_Size_5.csv', index=False)

In [164]:
rs[(rs['experiment'] == 0) & (rs['model_type'] == 'openai')] #.to_csv(f'{DATASET_NAME}_results_Algorithm{ALGORITHM_NUMBER}_EXP1.csv', index=False)

Unnamed: 0,experiment,number of classes,model_name,model_type,accuracy,image_paths,LLM Predicted Classes,Correct Classes,Voting Result
0,0,5,gpt-4o-mini,openai,124 out of 200,['../Datasets/PathMNIST/test\\8\\image_1754.pn...,"[['background', 'debris', 'mucus', 'cancer-ass...","['colorectal adenocarcinoma epithelium', 'debr...",
1,0,3,gpt-4o-mini,openai,108 out of 200,['../Datasets/PathMNIST/test\\8\\image_1754.pn...,"[['debris', 'background', 'cancer-associated s...","['colorectal adenocarcinoma epithelium', 'debr...",
2,0,2,gpt-4o-mini,openai,77 out of 200,['../Datasets/PathMNIST/test\\7\\image_2935.pn...,"[['normal colon mucosa', 'cancer-associated st...","['cancer-associated stroma', 'smooth muscle', ...",
3,0,1,gpt-4o-mini,openai,30 out of 100,['../Datasets/PathMNIST/test\\8\\image_1754.pn...,"['colorectal adenocarcinoma epithelium', 'lymp...","['colorectal adenocarcinoma epithelium', 'debr...","[['cancer-associated stroma', 'debris', 'color..."
4,0,5,gpt-4o,openai,144 out of 200,['../Datasets/PathMNIST/test\\8\\image_1754.pn...,"[['background', 'mucus', 'cancer-associated st...","['colorectal adenocarcinoma epithelium', 'debr...",
5,0,3,gpt-4o,openai,124 out of 200,['../Datasets/PathMNIST/test\\7\\image_2935.pn...,"[['smooth muscle', 'background', 'normal colon...","['cancer-associated stroma', 'smooth muscle', ...",
6,0,2,gpt-4o,openai,107 out of 200,['../Datasets/PathMNIST/test\\8\\image_1754.pn...,"[['cancer-associated stroma', 'colorectal aden...","['colorectal adenocarcinoma epithelium', 'debr...",
7,0,1,gpt-4o,openai,43 out of 100,['../Datasets/PathMNIST/test\\7\\image_2935.pn...,"['smooth muscle', 'cancer-associated stroma', ...","['cancer-associated stroma', 'smooth muscle', ...","[['background', 'smooth muscle', 'cancer-assoc..."


In [165]:
rs_gpt_4o = rs[(rs['number of classes'] == 1) & (rs['model_name'] == 'gpt-4o')]
rs_gpt_4o_mini = rs[(rs['number of classes'] == 1) & (rs['model_name'] == 'gpt-4o-mini')]

In [166]:
rs_gpt_4o

Unnamed: 0,experiment,number of classes,model_name,model_type,accuracy,image_paths,LLM Predicted Classes,Correct Classes,Voting Result
7,0,1,gpt-4o,openai,43 out of 100,['../Datasets/PathMNIST/test\\7\\image_2935.pn...,"['smooth muscle', 'cancer-associated stroma', ...","['cancer-associated stroma', 'smooth muscle', ...","[['background', 'smooth muscle', 'cancer-assoc..."


In [167]:
rs_gpt_4o_mini

Unnamed: 0,experiment,number of classes,model_name,model_type,accuracy,image_paths,LLM Predicted Classes,Correct Classes,Voting Result
3,0,1,gpt-4o-mini,openai,30 out of 100,['../Datasets/PathMNIST/test\\8\\image_1754.pn...,"['colorectal adenocarcinoma epithelium', 'lymp...","['colorectal adenocarcinoma epithelium', 'debr...","[['cancer-associated stroma', 'debris', 'color..."


#### Get Boostrap results

In [None]:
# Generate 30 random numbers and use a seed to keep the results consistent
rand.

In [13]:
# Generate 30 random numbers with a seed of 1000
random_numbers = np.random.randint(0, 1000, 30, seed=1000)

TypeError: randint() got an unexpected keyword argument 'seed'

In [12]:
random_numbers

array([222, 471, 929, 290,  45, 333, 641, 247, 792, 409, 947, 498, 601,
        64,  11, 348,  47, 335, 365, 875, 756, 458, 271, 157, 718, 167,
       285, 702, 590, 919])

In [168]:
random_states = [1000, 2000]

for random_state in random_states:
    print(f'Random State: {random_state}')
    print('GPT-4o')
    print(get_bootstrap_results(rs_gpt_4o, random_state))
    print('GPT-4o-mini')
    print(get_bootstrap_results(rs_gpt_4o_mini, random_state))
    print('')

Random State: 1000
GPT-4o
54 out of 100
GPT-4o-mini
32 out of 100

Random State: 2000
GPT-4o
42 out of 100
GPT-4o-mini
22 out of 100



In [123]:
from medmnist import PathMNIST
dataset = PathMNIST(split='test', download=True, size = 224)

Using downloaded and verified file: C:\Users\azeezidris\.medmnist\pathmnist_224.npz


In [25]:
from Important_script_1 import save_npz_images

In [124]:
save_npz_images("C:\\Users\\azeezidris\\.medmnist\\pathmnist_224.npz", 'PathMNIST', 'test')

Saved 7180 images from test split
Number of classes: 9


In [125]:
from PIL import Image
import os
from pathlib import Path
from typing import Set, Tuple

def get_image_sizes(folder_path: str) -> Set[Tuple[int, int]]:
    """
    Recursively scan a folder and its subfolders to get all unique image sizes.
    
    Args:
        folder_path (str): Path to the folder containing images
        
    Returns:
        Set[Tuple[int, int]]: Set of unique image sizes as (width, height) tuples
        
    Example:
        sizes = get_image_sizes("path/to/image/folder")
        if len(sizes) == 1:
            print("All images have the same size:", next(iter(sizes)))
        else:
            print("Found different image sizes:", sizes)
    """
    # Common image extensions
    IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
    
    # Set to store unique image sizes
    sizes = set()
    
    # Convert to Path object for easier handling
    root_path = Path(folder_path)
    
    try:
        # Walk through all files in the folder and subfolders
        for file_path in root_path.rglob('*'):
            # Check if the file is an image based on extension
            if file_path.suffix.lower() in IMAGE_EXTENSIONS:
                try:
                    # Open image and get size
                    with Image.open(file_path) as img:
                        sizes.add(img.size)
                except Exception as e:
                    print(f"Error processing {file_path}: {str(e)}")
                    continue
    
    except Exception as e:
        print(f"Error accessing folder {folder_path}: {str(e)}")
        return set()
    
    return sizes

# Example usage
def check_image_sizes(folder_path: str) -> None:
    """
    Check and report if all images in a folder have the same size.
    
    Args:
        folder_path (str): Path to the folder containing images
    """
    sizes = get_image_sizes(folder_path)
    
    if not sizes:
        print("No valid images found in the folder.")
    elif len(sizes) == 1:
        width, height = next(iter(sizes))
        print(f"All images have the same size: {width}x{height} pixels")
    else:
        print("Found different image sizes:")
        for width, height in sorted(sizes):
            print(f"- {width}x{height} pixels")

In [138]:
check_image_sizes('../Datasets/BloodMNIST/test')

All images have the same size: 224x224 pixels
