### Zero-Shot Scenario

In [1]:
import base64
import httpx
import anthropic
import json
import random
import glob as glob

In [2]:
client = anthropic.Anthropic(api_key='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')

In [3]:
# Open the image file and encode it as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

In [4]:
def zero_shot_claude(client, images, classes_to_classify, image_media_type = 'image/png'):

    img_msgs = []

    for count, image_path in enumerate(images):
        img_msgs.extend(
            [{
                    "type": "text",
                    "text": f"Image {count}:"
                },
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_media_type,
                        "data": encode_image(image_path),
                    },
                }]
        )

    img_msgs.append({
        "type": "text",
        "text": f"Classify each of the following images into one of these classes: {classes_to_classify}. Return your response only as a JSON, with each Image and its predicted class. Example: Image: 1, Class: Cat"
    })

    message = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=1024,
        
        messages=[{"role": "user",
            "content": img_msgs}]
    )

    return message.content

In [28]:
result = zero_shot_claude(client, ['d1.jpeg', 'd2.jpeg', 'd3.jpeg', 'd4.jpeg', 'cat1.jpeg', 'cat2.jpeg', 'cat3.jpeg', 'cat4.jpeg'], ['Cat', 'Dog'], 'image/jpeg')

In [32]:
json.loads(result[0].text)

{'Image 0': 'Dog',
 'Image 1': 'Dog',
 'Image 2': 'Dog',
 'Image 3': 'Dog',
 'Image 4': 'Cat',
 'Image 5': 'Cat',
 'Image 6': 'Cat',
 'Image 7': 'Cat'}

In [52]:
class_list = ['actinic keratoses and intraepithelial carcinoma', 'basal cell carcinoma', 'benign keratosis-like lesions', 'dermatofibroma', 'melanoma', 'melanocytic nevi', 'vascular lesions']

In [46]:
samples_to_select = []
for i in range(7):
    samples_to_select.extend(random.sample(glob.glob(f'../realistic-al/datasets/MIO-TCD-Classification/train/{i}/*'), 2))
random.shuffle(samples_to_select)

In [53]:
correct_class = [class_list[int(item.split('/')[-2])] for item in samples_to_select]

In [57]:
med_result = zero_shot_claude(client, samples_to_select, class_list, 'image/png')

In [56]:
correct_class

['basal cell carcinoma',
 'benign keratosis-like lesions',
 'melanoma',
 'vascular lesions',
 'melanocytic nevi',
 'dermatofibroma',
 'benign keratosis-like lesions',
 'actinic keratoses and intraepithelial carcinoma',
 'actinic keratoses and intraepithelial carcinoma',
 'basal cell carcinoma',
 'vascular lesions',
 'melanocytic nevi',
 'dermatofibroma',
 'melanoma']

In [66]:
preds = list(json.loads(med_result[0].text).values())

In [67]:
correct_count = 0
for i in range(len(correct_class)):

    if correct_class[i].lower() == preds[i].lower():
        correct_count += 1


In [68]:
correct_count

2

In [70]:
correct_count * 100 / len(preds)

14.285714285714286

### Research Question Evaluation

In [25]:
# Get 10 test images from each of the classes. Determine how many of those are predicted correctly by MDM - Check here for class label definition - https://github.com/MedMNIST/MedMNIST/blob/main/medmnist/info.py

class_ground_truth_mapping = {
    'CIFAR10': {
        '0': 'Airplane',
        '1': 'Automobile',
        '2': 'Bird',
        '3': 'Cat',
        '4': 'Deer',
        '5': 'Dog',
        '6': 'Frog',
        '7': 'Horse',
        '8': 'Ship',
        '9': 'Truck'
    },

    'DermaMNIST': {
        '0': 'actinic keratoses and intraepithelial carcinoma',
        '1': 'basal cell carcinoma',
        '2': 'benign keratosis-like lesions',
        '3': 'dermatofibroma',
        '4': 'melanoma',
        '5': 'melanocytic nevi',
        '6': 'vascular lesions'
    },

    'OctMNIST': {
        "0": "choroidal neovascularization",
        "1": "diabetic macular edema",
        "2": "drusen",
        "3": "normal",
    },

    'PneumoniaMNIST': {
        "0": "normal", 
        "1": "pneumonia"
        },

    'STL10': {
        '0': 'airplane',
        '1': 'bird',
        '2': 'car',
        '3': 'cat',
        '4': 'deer',
        '5': 'dog',
        '6': 'horse',
        '7': 'monkey',
        '8': 'ship',
        '9': 'truck'
    }
}

In [6]:
def count_matching_samples_positionwise(list1, list2):
    """
    This function takes two lists of strings and returns the count of matching samples at the same positions.
    It only executes if both lists have the same length.
    
    :param list1: First list of strings
    :param list2: Second list of strings
    :return: Number of matching samples at the same positions or an error message if lengths differ
    """
    # Check if both lists have the same length
    if len(list1) != len(list2):
        return "Lists have different lengths and cannot be compared position-wise."
    
    # Count matching samples at the same positions
    matches = sum(1 for x, y in zip(list1, list2) if x.lower() == y.lower())
    
    return matches

In [7]:
def get_class_samples(prompt_to_use, list_of_images, classes_to_classify, ground_truths):

    # Pass the images through the model and get its predictions
    result = zero_shot_claude(client, list_of_images, classes_to_classify)

    matches = list(json.loads(result[0].text).values())

    print(matches)
    print(ground_truths)
    
    # Compare the predictions with the ground truth
    matches = count_matching_samples_positionwise(matches, ground_truths)

    accuracy = matches / len(ground_truths)

    return accuracy

In [8]:

import os
import glob
import random

def list_files_and_sample_subfolders(folder_path, sample_size=1, random_seed = 5):
    """
    List all files in each subfolder and select a random sample from the list.
    
    :param folder_path: Path to the main folder
    :param sample_size: Number of random samples to select from each subfolder
    :return: Dictionary with subfolder paths as keys and lists of random sampled files as values
    """
    # Get a list of all subfolders in the folder_path
    subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]

    random.seed(random_seed)
    
    # Dictionary to store random samples from each subfolder
    sampled_files = []
    
    # Iterate over each subfolder
    for subfolder in subfolders:
        # List all files in the subfolder
        files = glob.glob(os.path.join(subfolder, '*'))
        
        sampled_files.extend(random.sample(files, sample_size))

    random.shuffle(sampled_files)
    return sampled_files

def get_samples_from_each_folder(dataset_path, number_per_class, random_seed = 5):
    return list_files_and_sample_subfolders(dataset_path, number_per_class, random_seed)

def get_ground_truth_from_path(file_lists, label_mapping):
    ground_truth_labels = []

    for file in file_lists:
        # Split the file path by '/'
        parts = file.split('/')
        
        # Handle cases where the semi-last item is a number
        if parts[-2].isdigit():
            class_label = int(parts[-2])

            class_label = label_mapping[class_label]

        else:
            class_label = parts[-2]
        
        ground_truth_labels.append(class_label)

    return ground_truth_labels

#### Research Question 1 -     How well does MDM give a correct low-level reasoning on shape, texture, and color in an in-domain dataset?  

In [9]:
# Test for CIFAR10 sample
dataset = 'CIFAR10'
dataset_classes = list(class_ground_truth_mapping[dataset].values())

samples_to_use = get_samples_from_each_folder(f'./Datasets/{dataset}/test', 1)
ground_truths_for_samples = get_ground_truth_from_path(samples_to_use, dataset_classes)

In [11]:
# Test for CIFAR10 sample
dataset = 'CIFAR10'
dataset_classes = list(class_ground_truth_mapping[dataset].values())

print(dataset_classes)

samples_to_use = get_samples_from_each_folder(f'./Datasets/{dataset}/test', 1)

print(samples_to_use)
ground_truths_for_samples = get_ground_truth_from_path(samples_to_use, dataset_classes)

print(ground_truths_for_samples)

['Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']
['./Datasets/CIFAR10/test/Airplane/stealth_bomber_s_001453.png', './Datasets/CIFAR10/test/Frog/rana_clamitans_s_000197.png', './Datasets/CIFAR10/test/Ship/lifeboat_s_000406.png', './Datasets/CIFAR10/test/Automobile/funny_wagon_s_000357.png', './Datasets/CIFAR10/test/Dog/puppy_s_000486.png', './Datasets/CIFAR10/test/Horse/lippizan_s_000335.png', './Datasets/CIFAR10/test/Deer/elk_s_000398.png', './Datasets/CIFAR10/test/Cat/true_cat_s_000467.png', './Datasets/CIFAR10/test/Bird/bird_s_002425.png', './Datasets/CIFAR10/test/Truck/moving_van_s_001714.png']
['Airplane', 'Frog', 'Ship', 'Automobile', 'Dog', 'Horse', 'Deer', 'Cat', 'Bird', 'Truck']


In [15]:
get_class_samples('', samples_to_use, dataset_classes, ground_truths_for_samples)

['Ship', 'Bird', 'Truck', 'Truck', 'Dog', 'Cat', 'Deer', 'Bird', 'Bird', 'Bird']
['Airplane', 'Frog', 'Ship', 'Automobile', 'Dog', 'Horse', 'Deer', 'Cat', 'Bird', 'Truck']


0.3

In [71]:
get_class_samples('', samples_to_use, dataset_classes, ground_truths_for_samples)

['Ship', 'Cat', 'Truck', 'Automobile', 'Dog', 'Cat', 'Deer', 'Bird', 'Bird', 'Bird']
['Airplane', 'Frog', 'Ship', 'Automobile', 'Dog', 'Horse', 'Deer', 'Cat', 'Bird', 'Truck']


0.4

In [79]:
get_class_samples('', samples_to_use, dataset_classes, ground_truths_for_samples)

['Ship', 'Cat', 'Truck', 'Truck', 'Dog', 'Bird', 'Deer', 'Bird', 'Bird', 'Bird']
['Airplane', 'Frog', 'Ship', 'Automobile', 'Dog', 'Horse', 'Deer', 'Cat', 'Bird', 'Truck']


0.3

In [94]:
dataset = 'CIFAR10'
dataset_classes = list(class_ground_truth_mapping[dataset].values())
samples_to_use = get_samples_from_each_folder(f'./Datasets/{dataset}/test', 1, 6)
ground_truths_for_samples = get_ground_truth_from_path(samples_to_use, dataset_classes)
get_class_samples('', samples_to_use, dataset_classes, ground_truths_for_samples)

['Ship', 'Horse', 'Dog', 'Bird', 'Dog', 'Bird', 'Automobile', 'Automobile', 'Dog', 'Cat']
['Ship', 'Horse', 'Truck', 'Frog', 'Deer', 'Bird', 'Airplane', 'Automobile', 'Cat', 'Dog']


0.4

In [97]:
dataset = 'PneumoniaMNIST'
dataset_classes = list(class_ground_truth_mapping[dataset].values())
samples_to_use = get_samples_from_each_folder(f'./Datasets/{dataset}/test', 1, 6)
ground_truths_for_samples = get_ground_truth_from_path(samples_to_use, dataset_classes)
get_class_samples('', samples_to_use, dataset_classes, ground_truths_for_samples)

['pneumonia', 'normal']
['pneumonia', 'normal']


1.0

In [16]:
dataset = 'DermaMNIST'
dataset_classes = list(class_ground_truth_mapping[dataset].values())
samples_to_use = get_samples_from_each_folder(f'./Datasets/{dataset}/test', 1, 6)
ground_truths_for_samples = get_ground_truth_from_path(samples_to_use, dataset_classes)
get_class_samples('', samples_to_use, dataset_classes, ground_truths_for_samples)

['vascular lesions', 'basal cell carcinoma', 'melanocytic nevi', 'melanoma', 'melanoma', 'melanoma', 'melanoma']
['vascular lesions', 'actinic keratoses and intraepithelial carcinoma', 'melanoma', 'basal cell carcinoma', 'melanocytic nevi', 'benign keratosis-like lesions', 'dermatofibroma']


0.14285714285714285

In [13]:
dataset_classes

['actinic keratoses and intraepithelial carcinoma',
 'basal cell carcinoma',
 'benign keratosis-like lesions',
 'dermatofibroma',
 'melanoma',
 'melanocytic nevi',
 'vascular lesions']

### Introduce Reasoning Options

In [40]:
def zero_shot_claude_with_reasoning(client, images, classes_to_classify, image_media_type = 'image/png'):

    img_msgs = []

    for count, image_path in enumerate(images):
        print(image_path)
        img_msgs.extend(
            [{
                    "type": "text",
                    "text": f"Image {count}:"
                },
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_media_type,
                        "data": encode_image(image_path),
                    },
                }]
        )

    img_msgs.append({
        "type": "text",
        "text": f"""You have been tasked to analyze the individual low-level visual characteristics of the given images in terms of shape, text, and color. 
        With that information, Classify each of the following images into one of these classes: {classes_to_classify}. 
        Return your response only as a JSON, with each Image, its reasoning and its predicted class. 
        Example: Image: 1, Reasoning: Presented hairy looking features with color perculiar to cats, Class: Cat"""
    })

    message = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=1024,
        
        messages=[{"role": "user",
            "content": img_msgs}]
    )

    return message.content

In [41]:
res = zero_shot_claude_with_reasoning(client, samples_to_use, dataset_classes)

./Datasets/STL10/test/3/7667.png
./Datasets/STL10/test/6/5069.png
./Datasets/STL10/test/2/271.png
./Datasets/STL10/test/4/4089.png
./Datasets/STL10/test/9/7197.png
./Datasets/STL10/test/8/5024.png
./Datasets/STL10/test/7/5991.png
./Datasets/STL10/test/1/7727.png
./Datasets/STL10/test/0/5423.png
./Datasets/STL10/test/5/4073.png


{'Image 0': {'Reasoning': 'The image shows a person holding a large cat-like animal with tufted ears and spotted fur pattern typical of wild felines. The shape and coloration suggest this is likely a lynx or similar wild cat species.',
  'Class': 'cat'},
 'Image 1': {'Reasoning': 'The image depicts a large brown quadruped animal with a long neck and legs grazing on grass. The distinctive shape and coloration are characteristic of a horse.',
  'Class': 'horse'},
 'Image 2': {'Reasoning': 'The image shows a small, light blue vehicle with a rounded, bubble-like shape and a front-opening door. The compact design and unique styling are typical of certain micro or city cars.',
  'Class': 'car'},
 'Image 3': {'Reasoning': 'The image depicts a brown animal with antlers lying on the ground. The shape of the body and the presence of antlers are characteristic features of a deer.',
  'Class': 'deer'},
 'Image 4': {'Reasoning': 'The image shows a brown-furred animal with a long tail and expressive

In [46]:
for i in range(len(samples_to_use)):
    print(class_ground_truth_mapping['STL10'][samples_to_use[i].split('/')[-2]])

cat
horse
car
deer
truck
ship
monkey
bird
airplane
dog


In [43]:
class_ground_truth_mapping['STL10']

{'0': 'airplane',
 '1': 'bird',
 '2': 'car',
 '3': 'cat',
 '4': 'deer',
 '5': 'dog',
 '6': 'horse',
 '7': 'monkey',
 '8': 'ship',
 '9': 'truck'}

In [18]:
json.loads(res[0].text)

{'Image 0': {'Reasoning': 'The image shows a circular shape with a pinkish-red color. There are no distinct textures or patterns visible. The uniform coloration and round shape are consistent with certain types of skin lesions.',
  'Class': 'vascular lesions'},
 'Image 1': {'Reasoning': 'This image displays an irregularly shaped pink/red patch with some darker areas. The texture appears slightly rough or scaly. The coloration and irregular border are indicative of certain skin conditions.',
  'Class': 'actinic keratoses and intraepithelial carcinoma'},
 'Image 2': {'Reasoning': 'The image shows a dark brown, irregularly shaped lesion with asymmetrical borders. The color is not uniform, with variations in brown shades. These features are typical of potentially concerning moles or skin growths.',
  'Class': 'melanoma'},
 'Image 3': {'Reasoning': 'This image presents a circular lesion with a light pink/tan color and a slightly raised appearance. The borders are well-defined and the color 

In [26]:
# Test for CIFAR10 sample
dataset = 'STL10' #'CIFAR10'
dataset_classes = list(class_ground_truth_mapping[dataset].values())

print(dataset_classes)

samples_to_use = get_samples_from_each_folder(f'./Datasets/{dataset}/test', 1)

print(samples_to_use)
# ground_truths_for_samples = get_ground_truth_from_path(samples_to_use, dataset_classes)

# print(ground_truths_for_samples)

# res = zero_shot_claude_with_reasoning(client, samples_to_use, dataset_classes)

['airplane', 'bird', 'car', 'cat', 'deer', 'dog', 'horse', 'monkey', 'ship', 'truck']
['./Datasets/STL10/test/3/7667.png', './Datasets/STL10/test/6/5069.png', './Datasets/STL10/test/2/271.png', './Datasets/STL10/test/4/4089.png', './Datasets/STL10/test/9/7197.png', './Datasets/STL10/test/8/5024.png', './Datasets/STL10/test/7/5991.png', './Datasets/STL10/test/1/7727.png', './Datasets/STL10/test/0/5423.png', './Datasets/STL10/test/5/4073.png']


In [20]:
json.loads(res[0].text)

{'Image 0': {'Reasoning': 'The image shows a long, sleek shape with wings and a tail, typical of an airplane. It appears to be a commercial airliner based on its size and design. The color is predominantly white with some blue markings.',
  'Class': 'Airplane'},
 'Image 1': {'Reasoning': 'The image depicts a furry animal with pointed ears, whiskers, and a feline face shape. The coloration is typical of a domestic cat, with a mix of light and dark fur patterns.',
  'Class': 'Cat'},
 'Image 2': {'Reasoning': 'The image shows a large, quadrupedal animal with antlers, slender legs, and a distinctive deer-like body shape. The coloration is brown, typical of many deer species.',
  'Class': 'Deer'},
 'Image 3': {'Reasoning': 'The image depicts a large, rectangular vehicle with wheels and a cab section. The shape and size are consistent with a truck, and it appears to be a cargo or shipping truck based on its design.',
  'Class': 'Truck'},
 'Image 4': {'Reasoning': 'The image shows a quadruped

In [27]:
def description_agent(client, images, image_media_type = 'image/png'):

    img_msgs = []

    for count, image_path in enumerate(images):
        print(image_path)
        img_msgs.extend(
            [{
                    "type": "text",
                    "text": f"Image {count}:"
                },
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_media_type,
                        "data": encode_image(image_path),
                    },
                }]
        )

    img_msgs.append({
        "type": "text",
        "text": f"""
        Provide a detailed description of what you see. 
        Do not return the name of the item, only describe the physical features, colors, and any other properties of interest. 
        But do not return the name or category of the item.
        Return your response only as a JSON, with each Image, its reasoning. 
        Example: Image: 1, Reasoning: Hairy looking features with red color."""
    })

    message = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=1024,
        
        messages=[{"role": "user",
            "content": img_msgs}]
    )

    return message.content

In [28]:
description_results = description_agent(client, samples_to_use)

./Datasets/STL10/test/3/7667.png
./Datasets/STL10/test/6/5069.png
./Datasets/STL10/test/2/271.png
./Datasets/STL10/test/4/4089.png
./Datasets/STL10/test/9/7197.png
./Datasets/STL10/test/8/5024.png
./Datasets/STL10/test/7/5991.png
./Datasets/STL10/test/1/7727.png
./Datasets/STL10/test/0/5423.png
./Datasets/STL10/test/5/4073.png


In [29]:
descriptions = json.loads(description_results[0].text)

In [30]:
descriptions

{'Image 0': {'Reasoning': 'A person with blonde hair is shown embracing and smiling with a large, furry animal. The animal has gray and brown fur, pointed ears, and appears to be some type of wild feline species. The image conveys a sense of affection between the human and animal.'},
 'Image 1': {'Reasoning': 'A large, muscular animal with reddish-brown fur is shown grazing on green grass. The animal has a long, thick neck, sturdy legs, and a flowing mane. It appears to be in an open field or pasture area.'},
 'Image 2': {'Reasoning': 'A small, compact vehicle in a bright turquoise color is displayed. It has a rounded, bubble-like shape with a clear dome top. The front appears to open upwards like a hatch. The vehicle looks retro or vintage in style.'},
 'Image 3': {'Reasoning': 'Two animals are visible in a natural setting. One is lying down, with a reddish-brown coat and distinctive antlers. The other is standing, with a similar coloration but no visible antlers. They appear to be in

In [47]:
def feature_agent(client, images, classes_to_classify, image_media_type = 'image/png'):

    img_msgs = []

    for count, image_path in enumerate(images):
        print(image_path)
        img_msgs.extend(
            [{
                    "type": "text",
                    "text": f"Image {count}:"
                },
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_media_type,
                        "data": encode_image(image_path),
                    },
                }]
        )

    img_msgs.append({
        "type": "text",
        "text": f"""Provide a detailed explanation of what features would be focused on if the given image is truly predicted as one of these classes: {classes_to_classify}. """ +  
        """ Return your response only as a JSON, with each Image, and for each class the specific features that would make it fit into the given class. 
        Do not say no features for the class are found, find at least 3 features that can make the image belong to the given class.
        You have to provide at least one feature that makes the image belong into the specified class. Example: 
        {Image: 1, Class: Cat, Reasoning: Presented hairy looking features with color perculiar to cats, Class: Dog, Reasoning: Seems to have a color similar to that of dogs, ..., Class N: Reasoning: Features perculair to class N are present. },
        {...},
        {Image: 2, Class: Cat, Reasoning: Has features partaining to the external features of a cat, Class: Dog, Reasoning: Does seem to look like a dog, ..., Class N: redness of class N present in the image, ...}"""
    })

    message = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=4096,
        
        messages=[{"role": "user",
            "content": img_msgs}]
    )

    return message.content

In [32]:
dataset_classes

['airplane',
 'bird',
 'car',
 'cat',
 'deer',
 'dog',
 'horse',
 'monkey',
 'ship',
 'truck']

In [31]:
features = feature_agent(client, samples_to_use, dataset_classes)

./Datasets/STL10/test/3/7667.png
./Datasets/STL10/test/6/5069.png
./Datasets/STL10/test/2/271.png
./Datasets/STL10/test/4/4089.png
./Datasets/STL10/test/9/7197.png
./Datasets/STL10/test/8/5024.png
./Datasets/STL10/test/7/5991.png
./Datasets/STL10/test/1/7727.png
./Datasets/STL10/test/0/5423.png
./Datasets/STL10/test/5/4073.png


In [33]:
print(features[0].text)

{
  "Image 0": {
    "airplane": "Large metallic body, elongated shape, potential for wings or tail visible",
    "bird": "Feather-like textures, curved beak-like shape, perched posture",
    "car": "Rounded shape, potential for wheels or headlights, metallic appearance",
    "cat": "Fur-like texture, pointed ear shapes, curled up posture",
    "deer": "Fur-like texture, potential antler shapes, slender body form",
    "dog": "Fur-like texture, canine facial features, affectionate posture with human",
    "horse": "Large body size, potential for mane-like features, muscular form",
    "monkey": "Furry appearance, arms wrapped around human, primate-like face",
    "ship": "Large size, potential for hull-like shape, metallic appearance",
    "truck": "Large size, boxy shape, potential for wheels or headlights"
  },
  "Image 1": {
    "airplane": "Elongated body, potential for wings, standing on grass-like surface",
    "bird": "Long neck, standing on grass, potential for beak or feathers

In [34]:
def zero_shot_with_features_claude(client, image_path, classes_to_classify, img_description, class_feats, image_media_type = 'image/png'):

    img_msgs = [
            {
                "type": "text",
                "text": f"Image:"
            },
            {
                "type": "image",
                "source": {
                "type": "base64",
                "media_type": image_media_type,
                "data": encode_image(image_path),
            },
            }
        ]


    img_msgs.append({
        "type": "text",
        "text": f"""
            Here is a description of the given image: {img_description}.
            For each of the possible classes to predict the image, here are the feature properties of each class that could have been used: {class_feats}.
            Given the image, revise its description and the class predictions, find more features that has become clearer based on the description and class predictions.
            The original description can be wrong, so modify as needed.
            Classify the image into one of these classes: {classes_to_classify}. 
            Return your response only as a JSON, with the predicted class and and an improved reasoning for the answer. 
            Example: Prediction: Class Cat, Reasoning: Based on the feature description and its features, we recon that the image would be a cat. 
            """
    })

    message = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=1024,
        
        messages=[{"role": "user",
            "content": img_msgs}]
    )

    return message.content

In [37]:
description_json = descriptions
feature_json = json.loads(features[0].text)

for i in range(len(samples_to_use)):
    key = f"Image {i}"

    description_for_img = description_json[key]
    feature_for_img = feature_json[key]

    # print(description_for_img)
    # print(feature_for_img)

    final_res = zero_shot_with_features_claude(client, samples_to_use[i], dataset_classes, description_for_img, feature_for_img, image_media_type = 'image/png')

    print(samples_to_use[i])
    print(f"The result is: {final_res}")

    # Refine the features based on the descriptions and predict which class the image belongs to

./Datasets/STL10/test/3/7667.png
The result is: [TextBlock(text='{\n  "Prediction": "Cat",\n  "Reasoning": "After reviewing the image description and potential class features, the most likely classification is \'cat\'. The image depicts a large, furry animal with pointed ears and gray-brown fur, which aligns closely with feline characteristics. The animal\'s size and wild appearance suggest it may be a larger wild cat species rather than a domestic cat, but it still falls within the \'cat\' category. The affectionate interaction between the human and the animal, with the person embracing it, is also consistent with cat-like behavior. While some features might overlap with other animals like a dog, the combination of the fur texture, ear shape, and overall description points more strongly to a feline species. The other classes (airplane, bird, car, deer, horse, monkey, ship, truck) do not match the described features or context of the image."\n}', type='text')]
./Datasets/STL10/test/6/5

In [39]:
class_ground_truth_mapping['STL10']

{'0': 'airplane',
 '1': 'bird',
 '2': 'car',
 '3': 'cat',
 '4': 'deer',
 '5': 'dog',
 '6': 'horse',
 '7': 'monkey',
 '8': 'ship',
 '9': 'truck'}

In [36]:
samples_to_use

['./Datasets/STL10/test/3/7667.png',
 './Datasets/STL10/test/6/5069.png',
 './Datasets/STL10/test/2/271.png',
 './Datasets/STL10/test/4/4089.png',
 './Datasets/STL10/test/9/7197.png',
 './Datasets/STL10/test/8/5024.png',
 './Datasets/STL10/test/7/5991.png',
 './Datasets/STL10/test/1/7727.png',
 './Datasets/STL10/test/0/5423.png',
 './Datasets/STL10/test/5/4073.png']

In [68]:
print(features[0].text)

{
  "Image 0": {
    "Airplane": "Elongated shape, metallic appearance, reflection on water surface reminiscent of aircraft",
    "Automobile": "Streamlined shape, potential for wheels hidden beneath water, metallic exterior",
    "Bird": "Elongated neck-like structure, water-based habitat, streamlined body shape",
    "Cat": "Pointed ear-like protrusion, sleek body shape, potential for whisker-like reflections",
    "Deer": "Elongated neck-like structure, potential antler-like reflection, water-based habitat",
    "Dog": "Elongated snout-like shape, potential for ears hidden by water, streamlined body",
    "Frog": "Water-based habitat, streamlined body shape, potential for eyes above water",
    "Horse": "Elongated neck-like structure, streamlined body, potential mane-like reflection",
    "Ship": "Elongated hull-like shape, water-based setting, metallic appearance",
    "Truck": "Elongated body shape, potential for cab-like structure, metallic appearance"
  },
  "Image 1": {
    "Ai

In [65]:
print(features[0].text)

{
  "Image 0": {
    "Airplane": "Elongated shape reminiscent of an aircraft fuselage",
    "Automobile": "Metallic surface could be part of a vehicle body",
    "Bird": "Curved shape could be interpreted as a bird's neck",
    "Cat": "Reflective surface resembles a cat's eye",
    "Deer": "Smooth curves reminiscent of deer antlers",
    "Dog": "Shiny surface similar to a dog's wet nose",
    "Frog": "Rounded shape could be seen as a frog's eye",
    "Horse": "Curved line might represent a horse's mane",
    "Ship": "Reflective quality reminiscent of water around a ship",
    "Truck": "Metal surface could be part of a truck's body"
  },
  "Image 1": {
    "Airplane": "Branches could be mistaken for airplane wings",
    "Automobile": "Rounded shape of leaves resembles car headlights",
    "Bird": "Leaf patterns similar to bird feathers",
    "Cat": "Leaf shapes resemble cat ears",
    "Deer": "Branch structure similar to deer antlers",
    "Dog": "Leaf cluster shape resembles a dog's fl

In [62]:
print(features[0].text)

{
  "Image 0": {
    "Bird": "Elongated neck and beak shape consistent with waterfowl",
    "Cat": "No feline features present",
    "Deer": "Long neck could be mistaken for a deer's, but overall shape doesn't match",
    "Dog": "No canine features visible",
    "Frog": "Aquatic setting, but body shape incompatible",
    "Horse": "No equine characteristics present",
    "Airplane": "No aircraft-like features",
    "Automobile": "No vehicular elements",
    "Ship": "Presence of water, but no vessel visible",
    "Truck": "No automotive features present"
  },
  "Image 1": {
    "Cat": "Furry texture and ear shape consistent with felines",
    "Dog": "Facial features could be mistaken for a small dog breed",
    "Bird": "No avian characteristics visible",
    "Deer": "No deer-like features present",
    "Frog": "No amphibian traits",
    "Horse": "No equine features",
    "Airplane": "No aircraft elements",
    "Automobile": "No vehicular components",
    "Ship": "No maritime features",
 

In [59]:
print(features[0].text)

{
  "Image 0": {
    "Bird": "Displays a long-necked water bird with a reflection, likely a heron or egret",
    "Airplane": "No aircraft-like features present",
    "Automobile": "No vehicle-like features present",
    "Cat": "No feline characteristics visible",
    "Deer": "No deer-like features present",
    "Dog": "No canine characteristics visible",
    "Frog": "No amphibian features present",
    "Horse": "No equine characteristics visible",
    "Ship": "No nautical vessel features present",
    "Truck": "No truck-like features present"
  },
  "Image 1": {
    "Cat": "Shows a feline face with fur, whiskers, and pointed ears",
    "Airplane": "No aircraft-like features present",
    "Automobile": "No vehicle-like features present",
    "Bird": "No avian characteristics visible",
    "Deer": "No deer-like features present",
    "Dog": "No canine characteristics visible",
    "Frog": "No amphibian features present",
    "Horse": "No equine characteristics visible",
    "Ship": "No n