# 9 way 5 shot GPT4-V experiment

#

In [1]:
## 
import os
from openai import OpenAI
import random
import base64
from tqdm import tqdm

In [31]:
# OpenAI API Key, removed for submission
# api_key = ""
api_key = ""

In [3]:
# helper functions
# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
    
def parse_response(response):
    # TODO: figure out how to output probs
    return response.choices[0].message.content

In [4]:
def format_predictions(gpt4v_response, categories):
    formatted_predictions = {}
    # irregular answer in the response, check if they fall into any of the 9 categories
    num_irregular = 0
    num_not_in_categories = 0
    num_more_than_one_categories = 0
    not_in_any = list()
    for key, value in gpt4v_response.items():
        if value['answer'] not in categories:
            num_irregular += 1
            print(key, value['answer'])
            # try to find if it contains an answer from the 9 categories
            find = list()
            for cat in categories:
                cat = cat.lower()
                if cat in value['answer'].lower():
                    print("find", cat)
                    find.append(cat)
            if "spurcewood" in value['answer'].lower():
                print("find spurcewood, typo in the categories, should be sprucewood")
                find.append("sprucewood") # typo in the categories
                
            if len(find) == 0:
                num_not_in_categories += 1
                print("not in categories")
                not_in_any.append((key, value['answer']))
                formatted_predictions[key] = "not sure"
            elif len(find) > 1:
                print("more than one categories")
                num_more_than_one_categories += 1
                formatted_predictions[key] = find[0]
            else:
                formatted_predictions[key] = find[0] # reformatted answer
        else:
            formatted_predictions[key] = value['answer']
    # print out stats
    print("num_irregular", num_irregular)
    print("num_not_in_categories", num_not_in_categories)
    print("num_more_than_one_categories", num_more_than_one_categories)
    # print("not_in_any", not_in_any)
    return formatted_predictions, not_in_any

In [5]:
categories = ['horsetail', 'bone', 'sprucewood', # typo spurcewood, should be sprucewood, but coexist in the filenames
              'fern', 'antler', 'ivory', 'beechwood', 
              'beforeuse', 'barley']

In [6]:
background_prompt = "An archaeologist conducting lithic use-wear analysis would examine the microscopic wear patterns on the tool's surface to infer its function and the materials it was used on. Typically it involves looking for certain types of wear on the stone tool, such as:\n * Polishing: Smooth areas that result from the tool being used on a particular material.\n * Striations: Lines that can indicate the direction of movement during use.\n * Microflaking: Tiny, often barely visible, chips along the edge of the tool that can show how it was used."
print(background_prompt)

An archaeologist conducting lithic use-wear analysis would examine the microscopic wear patterns on the tool's surface to infer its function and the materials it was used on. Typically it involves looking for certain types of wear on the stone tool, such as:
 * Polishing: Smooth areas that result from the tool being used on a particular material.
 * Striations: Lines that can indicate the direction of movement during use.
 * Microflaking: Tiny, often barely visible, chips along the edge of the tool that can show how it was used.


In [7]:
# pull out the test data:
test_dir = "./LUWA-FSL-GPT4V/Query-with-label-sequence/texture-png/"
test_images_all = os.listdir(test_dir)
test_images_x50 = [img for img in test_images_all if "50x" in img.lower() or "x50" in img.lower()]
test_images_x20 = [img for img in test_images_all if "20x" in img.lower() or "x20" in img.lower()]
print(len(test_images_x50), len(test_images_x20), len(test_images_all))

45 45 90


In [8]:
support_path = './LUWA-FSL-GPT4V/Support/9w10s/texture_png/'
example_files = os.listdir(support_path)
print(example_files)

['TRIBO_001_FLINTC1SL_FERN_9h_50X_003_001.png', 'TRIBO_010_FLINT1_BEECHWOOD_005_5h_X50_012_001.png', 'TRIBO_001_FLINTA2SM_BARLEY_9h_50X_020_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_012_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_002_001.png', 'TRIBO_022_FLINT1_ANTLER_5h_20X_046_001.png', 'TRIBO_015_FLINT1_BU_X50-002_001.png', 'TRIBO_001_FLINTB1SH_HORSETAIL_9h_20X_005_001.png', 'TRIBO_015_FLINT_SPRUCEWOOD_5h_50X_032_001.png', 'TRIBO_022_FLINT1_ANTLER_5h_20X_002_001.png', 'TRIBO_001_FLINTA1SM_BARLEY_9h_20X_002_001.png', 'TRIBO_001_FLINTB1SH_HORSETAIL_9h_20X_004_001.png', 'TRIBO_014_FLINT1_IVORY_5h_20X_006_001.png', 'TRIBO_005_FLINT1_BEECHWOOD_002_X50-001_001.png', 'TRIBO_005_FLINT1_BEECHWOOD_002_X20-001_001.png', 'TRIBO_014_FLINT1_IVORY_5h_50X_043_001.png', 'TRIBO_001_FLINTA2SM_BARLEY_9h_20X_020_001.png', 'TRIBO_013_FLINT1_BONE_002_5h_X20_004_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_50X_002_001.png', 'TRIBO_010_FLINT1_BEECHWOOD_005_5h_X50_003_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_003

In [10]:
# encode example iamges and pair with the categories based on the file name
# example_dict_x50 = {}
# example_dict_x20 = {}

def register_file(file, categories, example_dict, support_path):
    filename = file.lower()
    for cat in categories:
        if cat in filename:
            if cat not in example_dict:
                example_dict[cat] = [encode_image(support_path + file)]
            else:
                example_dict[cat].append(encode_image(support_path + file))
            print(cat, "X", filename)
            break
    if "spurcewood" in filename: # consider the typo
        if "sprucewood" not in example_dict:
            example_dict["sprucewood"] = [encode_image(support_path + file)]
        else:
            example_dict["sprucewood"].append(encode_image(support_path + file))
    # add before use
    elif "bu" in filename: # consider BU
        print(filename)
        if "beforeuse" not in example_dict:
            example_dict["beforeuse"] = [encode_image(support_path + file)]
        else:
            example_dict["beforeuse"].append(encode_image(support_path + file))

# for file in example_files:
#     filename = file.lower()
#     if "50x" in filename or "x50" in filename:
#         register_file(file, categories, example_dict_x50, support_path)
#     elif "20x" in filename or "x20" in filename:
#         register_file(file, categories, example_dict_x20, support_path)
#     else:
#         print("not a valid file", file)
    

In [11]:
example_dict = {}
for file in example_files:
    filename = file.lower()
    register_file(file, categories, example_dict, support_path)

fern X tribo_001_flintc1sl_fern_9h_50x_003_001.png
beechwood X tribo_010_flint1_beechwood_005_5h_x50_012_001.png
barley X tribo_001_flinta2sm_barley_9h_50x_020_001.png
fern X tribo_001_flintc1sl_fern_9h_20x_012_001.png
fern X tribo_001_flintc1sl_fern_9h_20x_002_001.png
antler X tribo_022_flint1_antler_5h_20x_046_001.png
tribo_015_flint1_bu_x50-002_001.png
horsetail X tribo_001_flintb1sh_horsetail_9h_20x_005_001.png
sprucewood X tribo_015_flint_sprucewood_5h_50x_032_001.png
antler X tribo_022_flint1_antler_5h_20x_002_001.png
barley X tribo_001_flinta1sm_barley_9h_20x_002_001.png
horsetail X tribo_001_flintb1sh_horsetail_9h_20x_004_001.png
ivory X tribo_014_flint1_ivory_5h_20x_006_001.png
beechwood X tribo_005_flint1_beechwood_002_x50-001_001.png
beechwood X tribo_005_flint1_beechwood_002_x20-001_001.png
ivory X tribo_014_flint1_ivory_5h_50x_043_001.png
barley X tribo_001_flinta2sm_barley_9h_20x_020_001.png
bone X tribo_013_flint1_bone_002_5h_x20_004_001.png
fern X tribo_001_flintc1sl_fe

In [12]:
# print(len(example_dict_x50), len(example_dict_x20))
# for k in example_dict_x50:
#     print(k, len(example_dict_x50[k])) 
# for k in example_dict_x20:
#     print(k, len(example_dict_x20[k]))
print(len(example_dict))
for k in example_dict:
    print(k, len(example_dict[k]))

9
fern 10
beechwood 10
barley 10
antler 10
beforeuse 10
horsetail 10
sprucewood 10
ivory 10
bone 10


In [13]:
random.seed(42)

In [14]:
def make_base_prompt_fewshot(categories, background_prompt, example_dict, num_shots=5, shuffle_show=False, shuffle_example=False):
    task_prompt = "Now there are a few demonstration images from exactly nine use-types:" + str(categories)
    base_request_content = [
        {
            "type": "text",
            "text": background_prompt + "\n\n" + task_prompt
        }
    ]
    demonstration_pairs = list()
    # make example dictionary into a list of pairs (cat, img)
    # adding beforeuse as the beginning.
    # demonstration_pairs.append(("beforeuse", example_dict["beforeuse"][0]))
    for cat in categories: # category + beforeuse
        if shuffle_example:
            for cat_img in random.sample(example_dict[cat], num_shots):
                demonstration_pairs.append((cat, cat_img))
        else:
            for cat_img in example_dict[cat][:num_shots]:
                demonstration_pairs.append((cat, cat_img))
    # # adding beforeuse as the end.
    # demonstration_pairs.append(("beforeuse", example_dict["beforeuse"][0]))
    # shuffle the list
    if shuffle_show:
        random.shuffle(demonstration_pairs)
    for (cat, cat_img) in demonstration_pairs:
        if cat == "beforeuse":
            prompt = "This is an image taken with advanced microscopes showing no trace of use on any meterial, so the use-type is {}.\n".format(cat)
        else:
            prompt = "This is an image taken with advanced microscopes containing traces created by meterial {}, so the use-type is {}.\n".format(cat, cat)
        print(prompt)
        base_request_content.append(
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{cat_img}"
                }
            }
        )
        base_request_content.append(
            {
            "type": "text",
            "text": prompt
            }
        )
    return base_request_content

## whole set experiments

In [15]:
base_request_content = make_base_prompt_fewshot(categories, background_prompt, example_dict, num_shots=5, shuffle_show=True, shuffle_example=True)

This is an image taken with advanced microscopes containing traces created by meterial bone, so the use-type is bone.

This is an image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an image taken with advanced microscopes containing traces created by meterial barley, so the use-type is barley.

This is an image taken with advanced microscopes containing traces created by meterial horsetail, so the use-type is horsetail.

This is an image taken with advanced microscopes containing traces created by meterial horsetail, so the use-type is horsetail.

This is an image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an image taken with advanced microscopes containing traces created by meterial fern, so the use-type is fern.

This is an image taken with advanced microscopes containing traces created by meterial ivory, so the use-type is ivory.

In [17]:
print(len(base_request_content))

91


In [16]:
instruction = "This is another image, do the traces on this image belong to which of the nine use-types as demonstrated above? Answer in exactly one word: "

In [19]:
gpt4v_response = {}
for test_image in tqdm(test_images_all):
    # encode the query image
    test_image_encoded = encode_image(test_dir + test_image)
    # test prompt
    test_image_prompt = {
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{test_image_encoded}"
            }
    }
    test_image_instruction = {
        "type": "text",
        "text": instruction
    }
    request_content = base_request_content + [test_image_prompt, test_image_instruction]
    # make request
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "user",
                "content": request_content
            }
        ],
        max_tokens=300,
        temperature=0.0,
    )
    # parse response
    gpt4v_response[test_image] = {
        'response': response,
        'answer': parse_response(response)
    }

100%|██████████| 90/90 [52:26<00:00, 34.96s/it] 


In [20]:
formatted, not_in_any = format_predictions(gpt4v_response, categories)

TRIBO_012_FLINT_SPRUCEWOOD_5h_20X_010_001.png The traces in this image belong to the use-type "antler."
find antler
TRIBO_023_FLINT1_BU_X20-003_001.png The traces in this image do not belong to any of the nine use-types demonstrated above.
not in categories
num_irregular 2
num_not_in_categories 1
num_more_than_one_categories 0


In [21]:
# save the answer as a readable json file
import json
with open('gpt4v_predictions_label_sequence_9w5s_20+50_tex_shuffle.json', 'w') as fp:
    json.dump(formatted, fp)

## whole experiment heightmap

In [22]:
additional_prompt = "They also use heightmap to help them understand the traces on the stone tools. The heightmap indicates the height of the surface of the stone tool with colors to better reflect the details of the surface and help identify the traces."

In [23]:
background_prompt = background_prompt + "\n"+ additional_prompt
print(background_prompt)

An archaeologist conducting lithic use-wear analysis would examine the microscopic wear patterns on the tool's surface to infer its function and the materials it was used on. Typically it involves looking for certain types of wear on the stone tool, such as:
 * Polishing: Smooth areas that result from the tool being used on a particular material.
 * Striations: Lines that can indicate the direction of movement during use.
 * Microflaking: Tiny, often barely visible, chips along the edge of the tool that can show how it was used.
They also use heightmap to help them understand the traces on the stone tools. The heightmap indicates the height of the surface of the stone tool with colors to better reflect the details of the surface and help identify the traces.


In [24]:
# pull out the test data:
test_dir = "./LUWA-FSL-GPT4V/Query-with-label-sequence/heightmap_png/"
test_images_all = os.listdir(test_dir)
# test_images_x50 = [img for img in test_images_all if "50x" in img.lower() or "x50" in img.lower()]
# test_images_x20 = [img for img in test_images_all if "20x" in img.lower() or "x20" in img.lower()]
# print(len(test_images_x50), len(test_images_x20), len(test_images_all))
print(len(test_images_all))

90


In [25]:
support_path = './LUWA-FSL-GPT4V/Support/9w10s/heightmap_png/'
example_files = os.listdir(support_path)
print(example_files)

['TRIBO_001_FLINTC1SL_FERN_9h_50X_003_001.png', 'TRIBO_010_FLINT1_BEECHWOOD_005_5h_X50_012_001.png', 'TRIBO_001_FLINTA2SM_BARLEY_9h_50X_020_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_012_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_002_001.png', 'TRIBO_022_FLINT1_ANTLER_5h_20X_046_001.png', 'TRIBO_015_FLINT1_BU_X50-002_001.png', 'TRIBO_001_FLINTB1SH_HORSETAIL_9h_20X_005_001.png', 'TRIBO_015_FLINT_SPRUCEWOOD_5h_50X_032_001.png', 'TRIBO_022_FLINT1_ANTLER_5h_20X_002_001.png', 'TRIBO_001_FLINTA1SM_BARLEY_9h_20X_002_001.png', 'TRIBO_001_FLINTB1SH_HORSETAIL_9h_20X_004_001.png', 'TRIBO_014_FLINT1_IVORY_5h_20X_006_001.png', 'TRIBO_005_FLINT1_BEECHWOOD_002_X50-001_001.png', 'TRIBO_005_FLINT1_BEECHWOOD_002_X20-001_001.png', 'TRIBO_014_FLINT1_IVORY_5h_50X_043_001.png', 'TRIBO_001_FLINTA2SM_BARLEY_9h_20X_020_001.png', 'TRIBO_013_FLINT1_BONE_002_5h_X20_004_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_50X_002_001.png', 'TRIBO_010_FLINT1_BEECHWOOD_005_5h_X50_003_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_003

In [26]:
example_dict = {}
for file in example_files:
    filename = file.lower()
    register_file(file, categories, example_dict, support_path)

print(len(example_dict))
for k in example_dict:
    print(k, len(example_dict[k]))

fern X tribo_001_flintc1sl_fern_9h_50x_003_001.png
beechwood X tribo_010_flint1_beechwood_005_5h_x50_012_001.png
barley X tribo_001_flinta2sm_barley_9h_50x_020_001.png
fern X tribo_001_flintc1sl_fern_9h_20x_012_001.png
fern X tribo_001_flintc1sl_fern_9h_20x_002_001.png
antler X tribo_022_flint1_antler_5h_20x_046_001.png
tribo_015_flint1_bu_x50-002_001.png
horsetail X tribo_001_flintb1sh_horsetail_9h_20x_005_001.png
sprucewood X tribo_015_flint_sprucewood_5h_50x_032_001.png
antler X tribo_022_flint1_antler_5h_20x_002_001.png
barley X tribo_001_flinta1sm_barley_9h_20x_002_001.png
horsetail X tribo_001_flintb1sh_horsetail_9h_20x_004_001.png
ivory X tribo_014_flint1_ivory_5h_20x_006_001.png
beechwood X tribo_005_flint1_beechwood_002_x50-001_001.png
beechwood X tribo_005_flint1_beechwood_002_x20-001_001.png
ivory X tribo_014_flint1_ivory_5h_50x_043_001.png
barley X tribo_001_flinta2sm_barley_9h_20x_020_001.png
bone X tribo_013_flint1_bone_002_5h_x20_004_001.png
fern X tribo_001_flintc1sl_fe

In [27]:
def make_base_prompt_fewshot(categories, background_prompt, example_dict, num_shots=5, shuffle_show=False, shuffle_example=False):
    task_prompt = "Now there are a few demonstration heightmap images from exactly nine use-types:" + str(categories)
    base_request_content = [
        {
            "type": "text",
            "text": background_prompt + "\n\n" + task_prompt
        }
    ]
    demonstration_pairs = list()
    # make example dictionary into a list of pairs (cat, img)
    # adding beforeuse as the beginning.
    # demonstration_pairs.append(("beforeuse", example_dict["beforeuse"][0]))
    for cat in categories: # category + beforeuse
        if shuffle_example:
            for cat_img in random.sample(example_dict[cat], num_shots):
                demonstration_pairs.append((cat, cat_img))
        else:
            for cat_img in example_dict[cat][:num_shots]:
                demonstration_pairs.append((cat, cat_img))
    # # adding beforeuse as the end.
    # demonstration_pairs.append(("beforeuse", example_dict["beforeuse"][0]))
    # shuffle the list
    if shuffle_show:
        random.shuffle(demonstration_pairs)
    for (cat, cat_img) in demonstration_pairs:
        if cat == "beforeuse":
            prompt = "This is an heightmap image taken with advanced microscopes showing no trace of use on any meterial, so the use-type is {}.\n".format(cat)
        else:
            prompt = "This is an heightmap image taken with advanced microscopes containing traces created by meterial {}, so the use-type is {}.\n".format(cat, cat)
        print(prompt)
        base_request_content.append(
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{cat_img}"
                }
            }
        )
        base_request_content.append(
            {
            "type": "text",
            "text": prompt
            }
        )
    return base_request_content

In [28]:
base_request_content = make_base_prompt_fewshot(categories, background_prompt, example_dict, num_shots=5, shuffle_show=True, shuffle_example=True)

This is an heightmap image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an heightmap image taken with advanced microscopes containing traces created by meterial beechwood, so the use-type is beechwood.

This is an heightmap image taken with advanced microscopes containing traces created by meterial beechwood, so the use-type is beechwood.

This is an heightmap image taken with advanced microscopes containing traces created by meterial beechwood, so the use-type is beechwood.

This is an heightmap image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an heightmap image taken with advanced microscopes containing traces created by meterial barley, so the use-type is barley.

This is an heightmap image taken with advanced microscopes containing traces created by meterial bone, so the use-type is bone.

This is an heightmap image taken with a

In [30]:
instruction = "This is another heightmap image, do the traces on this image belong to which of the nine use-types as demonstrated above? Answer in exactly one word: "

In [32]:
gpt4v_response = {}
for test_image in tqdm(test_images_all):
    # encode the query image
    test_image_encoded = encode_image(test_dir + test_image)
    # test prompt
    test_image_prompt = {
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{test_image_encoded}"
            }
    }
    test_image_instruction = {
        "type": "text",
        "text": instruction
    }
    request_content = base_request_content + [test_image_prompt, test_image_instruction]
    # make request
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "user",
                "content": request_content
            }
        ],
        max_tokens=300,
        temperature=0.0,
    )
    # parse response
    gpt4v_response[test_image] = {
        'response': response,
        'answer': parse_response(response)
    }

100%|██████████| 90/90 [57:52<00:00, 38.58s/it]  


In [33]:
formatted, not_in_any = format_predictions(gpt4v_response, categories)

TRIBO_012_FLINT_SPRUCEWOOD_5h_20X_008_001.png Barley
find barley
num_irregular 1
num_not_in_categories 0
num_more_than_one_categories 0


In [34]:
# save the answer as a readable json file
import json
with open('gpt4v_predictions_label_sequence_9w5s_20+50_hm_shuffle.json', 'w') as fp:
    json.dump(formatted, fp)

## 50x experiments

In [15]:
base_request_content = make_base_prompt_fewshot(categories, background_prompt, example_dict_x50, num_shots=5, shuffle_show=True, shuffle_example=False)

This is an image taken with advanced microscopes containing traces created by meterial bone, so the use-type is bone.

This is an image taken with advanced microscopes containing traces created by meterial horsetail, so the use-type is horsetail.

This is an image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an image taken with advanced microscopes containing traces created by meterial beechwood, so the use-type is beechwood.

This is an image taken with advanced microscopes containing traces created by meterial barley, so the use-type is barley.

This is an image taken with advanced microscopes containing traces created by meterial fern, so the use-type is fern.

This is an image taken with advanced microscopes containing traces created by meterial beechwood, so the use-type is beechwood.

This is an image taken with advanced microscopes containing traces created by meterial ivory, so the use-type is ivory.



In [16]:
instruction = "This is another image, do the traces on this image belong to which of the nine use-types as demonstrated above? Answer in exactly one word: "
# does the order matter?

In [17]:
gpt4v_response = {}
for test_image in tqdm(test_images_x50):
    # encode the query image
    test_image_encoded = encode_image(test_dir + test_image)
    # test prompt
    test_image_prompt = {
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{test_image_encoded}"
            }
    }
    test_image_instruction = {
        "type": "text",
        "text": instruction
    }
    request_content = base_request_content + [test_image_prompt, test_image_instruction]
    # make request
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "user",
                "content": request_content
            }
        ],
        max_tokens=300,
        temperature=0.0,
    )
    # parse response
    gpt4v_response[test_image] = {
        'response': response,
        'answer': parse_response(response)
    }

100%|██████████| 45/45 [26:33<00:00, 35.40s/it]


In [18]:
formatted, not_in_any = format_predictions(gpt4v_response, categories)

TRIBO_001_FLINTA1SM_BARLEY_9h_50X_003_001.png The traces in this image do not match any of the nine demonstrated use-types.
not in categories
TRIBO_011_FLINT1_ANTLER_001_3h_X50-003_001.png The traces in this image belong to the use-type "bone."
find bone
TRIBO_011_FLINT1_BU_X50-002_001.png The traces in this image belong to the use-type "bone."
find bone
num_irregular 3
num_not_in_categories 1
num_more_than_one_categories 0


In [19]:
# save the answer as a readable json file
import json
with open('gpt4v_predictions_label_sequence_9w5s_shuffle.json', 'w') as fp:
    json.dump(formatted, fp)

## 20x experiment

In [20]:
base_request_content = make_base_prompt_fewshot(categories, background_prompt, example_dict_x20, num_shots=5, shuffle_show=True, shuffle_example=False)

This is an image taken with advanced microscopes containing traces created by meterial horsetail, so the use-type is horsetail.

This is an image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an image taken with advanced microscopes containing traces created by meterial beechwood, so the use-type is beechwood.

This is an image taken with advanced microscopes containing traces created by meterial horsetail, so the use-type is horsetail.

This is an image taken with advanced microscopes containing traces created by meterial barley, so the use-type is barley.

This is an image taken with advanced microscopes containing traces created by meterial antler, so the use-type is antler.

This is an image taken with advanced microscopes showing no trace of use on any meterial, so the use-type is beforeuse.

This is an image taken with advanced microscopes containing traces created by meterial fern, so the use-type is fer

In [21]:
instruction = "This is another image, do the traces on this image belong to which of the nine use-types as demonstrated above? Answer in exactly one word: "

In [22]:
gpt4v_response = {}
for test_image in tqdm(test_images_x20):
    # encode the query image
    test_image_encoded = encode_image(test_dir + test_image)
    # test prompt
    test_image_prompt = {
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{test_image_encoded}"
            }
    }
    test_image_instruction = {
        "type": "text",
        "text": instruction
    }
    request_content = base_request_content + [test_image_prompt, test_image_instruction]
    # make request
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "user",
                "content": request_content
            }
        ],
        max_tokens=300,
        temperature=0.0,
    )
    # parse response
    gpt4v_response[test_image] = {
        'response': response,
        'answer': parse_response(response)
    }

100%|██████████| 45/45 [24:52<00:00, 33.16s/it]


In [23]:
formatted, not_in_any = format_predictions(gpt4v_response, categories)

TRIBO_011_FLINT1_ANTLER_001_5h_X20-005_001.png Bone
find bone
TRIBO_012_FLINT_SPRUCEWOOD_5h_20X_010_001.png The traces in this image do not match any of the nine demonstrated use-types.
not in categories
TRIBO_023_FLINT1_BU_X20-003_001.png The traces in this image do not belong to any of the nine use-types demonstrated above.
not in categories
TRIBO_020_FLINT1_BONE_5h_X20-001_001.png The traces in this image belong to the use-type "antler."
find antler
TRIBO_025_FLINT1_IVORY_X20_5h-002_001.png The traces in the image belong to the use-type "horsetail."
find horsetail
TRIBO_001_FLINTA2SM_BARLEY_9h_20X_012_001.png The traces in this image do not match any of the nine demonstrated use-types.
not in categories
num_irregular 6
num_not_in_categories 3
num_more_than_one_categories 0


In [24]:
# save the answer as a readable json file
import json
with open('gpt4v_predictions_label_sequence_9w5sx20_shuffle.json', 'w') as fp:
    json.dump(formatted, fp)

## Heightmap experiment

In [27]:
additional_prompt = "They also use heightmap to help them understand the traces on the stone tools. The heightmap indicates the height of the surface of the stone tool with colors to better reflect the details of the surface and help identify the traces."

In [29]:
print(additional_prompt)

They also use heightmap to help them understand the traces on the stone tools. The heightmap indicates the height of the surface of the stone tool with colors to better reflect the details of the surface and help identify the traces.


In [35]:
background_prompt = background_prompt + "\n"+ additional_prompt

In [36]:
print(background_prompt)

An archaeologist conducting lithic use-wear analysis would examine the microscopic wear patterns on the tool's surface to infer its function and the materials it was used on. Typically it involves looking for certain types of wear on the stone tool, such as:
 * Polishing: Smooth areas that result from the tool being used on a particular material.
 * Striations: Lines that can indicate the direction of movement during use.
 * Microflaking: Tiny, often barely visible, chips along the edge of the tool that can show how it was used.
They also use heightmap to help them understand the traces on the stone tools. The heightmap indicates the height of the surface of the stone tool with colors to better reflect the details of the surface and help identify the traces.


In [31]:
# pull out the test data:
test_dir = "./LUWA-FSL-GPT4V/Query-with-label-sequence/heightmap_png/"
test_images_all = os.listdir(test_dir)
test_images_x50 = [img for img in test_images_all if "50x" in img.lower() or "x50" in img.lower()]
test_images_x20 = [img for img in test_images_all if "20x" in img.lower() or "x20" in img.lower()]
print(len(test_images_x50), len(test_images_x20), len(test_images_all))

45 45 90


In [32]:
support_path = './LUWA-FSL-GPT4V/Support/9w10s/heightmap_png/'
example_files = os.listdir(support_path)
print(example_files)

['TRIBO_001_FLINTC1SL_FERN_9h_50X_003_001.png', 'TRIBO_010_FLINT1_BEECHWOOD_005_5h_X50_012_001.png', 'TRIBO_001_FLINTA2SM_BARLEY_9h_50X_020_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_012_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_002_001.png', 'TRIBO_022_FLINT1_ANTLER_5h_20X_046_001.png', 'TRIBO_015_FLINT1_BU_X50-002_001.png', 'TRIBO_001_FLINTB1SH_HORSETAIL_9h_20X_005_001.png', 'TRIBO_015_FLINT_SPRUCEWOOD_5h_50X_032_001.png', 'TRIBO_022_FLINT1_ANTLER_5h_20X_002_001.png', 'TRIBO_001_FLINTA1SM_BARLEY_9h_20X_002_001.png', 'TRIBO_001_FLINTB1SH_HORSETAIL_9h_20X_004_001.png', 'TRIBO_014_FLINT1_IVORY_5h_20X_006_001.png', 'TRIBO_005_FLINT1_BEECHWOOD_002_X50-001_001.png', 'TRIBO_005_FLINT1_BEECHWOOD_002_X20-001_001.png', 'TRIBO_014_FLINT1_IVORY_5h_50X_043_001.png', 'TRIBO_001_FLINTA2SM_BARLEY_9h_20X_020_001.png', 'TRIBO_013_FLINT1_BONE_002_5h_X20_004_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_50X_002_001.png', 'TRIBO_010_FLINT1_BEECHWOOD_005_5h_X50_003_001.png', 'TRIBO_001_FLINTC1SL_FERN_9h_20X_003

In [33]:
# encode example iamges and pair with the categories based on the file name
example_dict_x50 = {}
example_dict_x20 = {}
for file in example_files:
    filename = file.lower()
    if "50x" in filename or "x50" in filename:
        register_file(file, categories, example_dict_x50, support_path)
    elif "20x" in filename or "x20" in filename:
        register_file(file, categories, example_dict_x20, support_path)
    else:
        print("not a valid file", file)

fern X tribo_001_flintc1sl_fern_9h_50x_003_001.png
beechwood X tribo_010_flint1_beechwood_005_5h_x50_012_001.png
barley X tribo_001_flinta2sm_barley_9h_50x_020_001.png
fern X tribo_001_flintc1sl_fern_9h_20x_012_001.png
fern X tribo_001_flintc1sl_fern_9h_20x_002_001.png
antler X tribo_022_flint1_antler_5h_20x_046_001.png
tribo_015_flint1_bu_x50-002_001.png
horsetail X tribo_001_flintb1sh_horsetail_9h_20x_005_001.png
sprucewood X tribo_015_flint_sprucewood_5h_50x_032_001.png
antler X tribo_022_flint1_antler_5h_20x_002_001.png
barley X tribo_001_flinta1sm_barley_9h_20x_002_001.png
horsetail X tribo_001_flintb1sh_horsetail_9h_20x_004_001.png
ivory X tribo_014_flint1_ivory_5h_20x_006_001.png
beechwood X tribo_005_flint1_beechwood_002_x50-001_001.png
beechwood X tribo_005_flint1_beechwood_002_x20-001_001.png
ivory X tribo_014_flint1_ivory_5h_50x_043_001.png
barley X tribo_001_flinta2sm_barley_9h_20x_020_001.png
bone X tribo_013_flint1_bone_002_5h_x20_004_001.png
fern X tribo_001_flintc1sl_fe

In [34]:
print(len(example_dict_x50), len(example_dict_x20))
for k in example_dict_x50:
    print(k, len(example_dict_x50[k])) 
for k in example_dict_x20:
    print(k, len(example_dict_x20[k]))

9 9
fern 5
beechwood 5
barley 5
beforeuse 5
sprucewood 5
ivory 5
bone 5
horsetail 5
antler 4
fern 5
antler 6
horsetail 5
barley 5
ivory 5
beechwood 5
bone 5
beforeuse 5
sprucewood 5


In [37]:
def make_base_prompt_fewshot(categories, background_prompt, example_dict, num_shots=5, shuffle_show=False, shuffle_example=False):
    task_prompt = "Now there are a few demonstration heightmap images from exactly nine use-types:" + str(categories)
    base_request_content = [
        {
            "type": "text",
            "text": background_prompt + "\n\n" + task_prompt
        }
    ]
    demonstration_pairs = list()
    # make example dictionary into a list of pairs (cat, img)
    # adding beforeuse as the beginning.
    # demonstration_pairs.append(("beforeuse", example_dict["beforeuse"][0]))
    for cat in categories: # category + beforeuse
        if shuffle_example:
            for cat_img in random.sample(example_dict[cat], num_shots):
                demonstration_pairs.append((cat, cat_img))
        else:
            for cat_img in example_dict[cat][:num_shots]:
                demonstration_pairs.append((cat, cat_img))
    # # adding beforeuse as the end.
    # demonstration_pairs.append(("beforeuse", example_dict["beforeuse"][0]))
    # shuffle the list
    if shuffle_show:
        random.shuffle(demonstration_pairs)
    for (cat, cat_img) in demonstration_pairs:
        if cat == "beforeuse":
            prompt = "This is an heightmap image taken with advanced microscopes showing no trace of use on any meterial, so the use-type is {}.\n".format(cat)
        else:
            prompt = "This is an heightmap image taken with advanced microscopes containing traces created by meterial {}, so the use-type is {}.\n".format(cat, cat)
        print(prompt)
        base_request_content.append(
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{cat_img}"
                }
            }
        )
        base_request_content.append(
            {
            "type": "text",
            "text": prompt
            }
        )
    return base_request_content

### 50x experiments

In [43]:
base_request_content = make_base_prompt_fewshot(categories, background_prompt, example_dict_x20, num_shots=5, shuffle_show=True, shuffle_example=False)

This is an heightmap image taken with advanced microscopes containing traces created by meterial horsetail, so the use-type is horsetail.

This is an heightmap image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an heightmap image taken with advanced microscopes containing traces created by meterial sprucewood, so the use-type is sprucewood.

This is an heightmap image taken with advanced microscopes containing traces created by meterial barley, so the use-type is barley.

This is an heightmap image taken with advanced microscopes containing traces created by meterial bone, so the use-type is bone.

This is an heightmap image taken with advanced microscopes showing no trace of use on any meterial, so the use-type is beforeuse.

This is an heightmap image taken with advanced microscopes containing traces created by meterial antler, so the use-type is antler.

This is an heightmap image taken with advanced micros

In [44]:
instruction = "This is another heightmap image, do the traces on this image belong to which of the nine use-types as demonstrated above? Answer in exactly one word: "

In [45]:
gpt4v_response = {}
for test_image in tqdm(test_images_x20):
    # encode the query image
    test_image_encoded = encode_image(test_dir + test_image)
    # test prompt
    test_image_prompt = {
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{test_image_encoded}"
            }
    }
    test_image_instruction = {
        "type": "text",
        "text": instruction
    }
    request_content = base_request_content + [test_image_prompt, test_image_instruction]
    # make request
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "user",
                "content": request_content
            }
        ],
        max_tokens=300,
        temperature=0.0,
    )
    # parse response
    gpt4v_response[test_image] = {
        'response': response,
        'answer': parse_response(response)
    }

100%|██████████| 45/45 [34:08<00:00, 45.51s/it]


In [46]:
formatted, not_in_any = format_predictions(gpt4v_response, categories)

num_irregular 0
num_not_in_categories 0
num_more_than_one_categories 0


In [47]:
# save the answer as a readable json file
import json
with open('gpt4v_predictions_label_sequence_heightmap_9w5sx20_shuffle.json', 'w') as fp:
    json.dump(formatted, fp)