In [1]:
import json
import os

import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt


In [2]:
with open("experiments/exp0/results/ChatGPT_4_turbo__BLIP2_flan_t5_xxl.json") as f:
    gpt4_turbo_results = json.load(f)


In [3]:
failed_flips = [
    flip_key for flip_key, flip in gpt4_turbo_results["ChatGPT_4_turbo"]["BLIP2_flan_t5_xxl"]["flips_res"].items() if not flip["acc"]
]
print(f"Failed flips: {len(failed_flips)}")

Failed flips: 387


In [4]:
failed_flips[0]

'/flip/bafkreia2v23cs6gkkifim7jgrdebhgm4yotn6tqyg2hc3g24zeav3vcdyu'

In [6]:

flip = failed_flips[0]
flip_id = flip.split('/')[-1]
tasks = []
for flip in failed_flips:
    flip_id = flip.split('/')[-1]
    with open(f"data/full_flips_set/full_val_split/tasks/_flip_{flip_id}.json") as f:
        task = json.load(f)
    tasks.append(task)

images = {}
for image_filename in tqdm(os.listdir("data/full_flips_set/full_val_split/images")):
    image_id = image_filename.split('/')[-1].replace(".png", "")
    with open(f"data/full_flips_set/full_val_split/images/{image_id}.png", "rb") as f:
        image = Image.open(f)
        image = image.convert("RGB")
        # image = np.array(image)
    images[image_id] = image

100%|██████████| 7317/7317 [00:06<00:00, 1070.96it/s]


In [10]:
tasks[0]

{'name': '/flip/bafkreia2v23cs6gkkifim7jgrdebhgm4yotn6tqyg2hc3g24zeav3vcdyu',
 'image_lst1': {'0': 'blob:https://scan.idena.io/8d567f09-3df2-4cda-8e1a-5255be1d703f',
  '1': 'blob:https://scan.idena.io/b03ebb6d-72d5-4c6c-807b-b2b59f468be3',
  '2': 'blob:https://scan.idena.io/abb10807-69d9-4001-a8da-8db9ec358c8b',
  '3': 'blob:https://scan.idena.io/8b957949-6cae-47fc-aa53-598c216dbc23'},
 'image_lst2': {'0': 'blob:https://scan.idena.io/8b957949-6cae-47fc-aa53-598c216dbc23',
  '1': 'blob:https://scan.idena.io/abb10807-69d9-4001-a8da-8db9ec358c8b',
  '2': 'blob:https://scan.idena.io/b03ebb6d-72d5-4c6c-807b-b2b59f468be3',
  '3': 'blob:https://scan.idena.io/8d567f09-3df2-4cda-8e1a-5255be1d703f'},
 'agreed_answer': ['Right', 'Strong'],
 'votes': {'Left': '-', 'Right': '9', 'Reported': '0'},
 'details': {'Author:': '0x792b18A9188484983A556b01733b4f1743464CCb',
  'Epoch:': '#0015',
  'Size:': '118156 bytes',
  'Created:': '10/27/2019 21:00:34',
  'Block:': '418739',
  'Tx:': '0x45f93de49f98382e

In [26]:

def plot_task(task):
    img_list1 = [
        images[blob.split('/')[-1]] for blob in task["image_lst1"].values()
    ]
    img_list2 = [
        images[blob.split('/')[-1]] for blob in task["image_lst2"].values()
    ]

    # Resize images to same width (scale height accordingly)
    width = min(img.size[0] for img in img_list1 + img_list2)
    img_list1 = [img.resize((width, int(img.size[1] * (width / img.size[0])))) for img in img_list1]
    img_list2 = [img.resize((width, int(img.size[1] * (width / img.size[0])))) for img in img_list2]

    # Stack the images into columns
    image_column_1 = np.vstack([np.array(img) for img in img_list1])
    image_column_2 = np.vstack([np.array(img) for img in img_list2])

    # Get answer
    answer = task["agreed_answer"][0]

    # Plot the stacks next to each other
    fig, ax = plt.subplots(1, 2, figsize=(5, 5))
    ax[0].imshow(image_column_1)
    ax[0].axis('off')
    ax[1].imshow(image_column_2)
    ax[1].axis('off')
    ax[0].set_title("Image List 1")
    ax[1].set_title("Image List 2")
    # Set the title for the entire figure
    fig.suptitle(f"{answer}")
    fig.tight_layout()
    
    return fig, ax

def save_task(task, filename):
    fig, ax = plot_task(task)
    fig.savefig(filename)
    plt.close(fig)
# Create a directory to save the plots
output_dir = "failed_flips_plots"
os.makedirs(output_dir, exist_ok=True)
# Save the plots for each task
for i, task in tqdm(enumerate(tasks), total=len(tasks)):
    filename = os.path.join(output_dir, f"failed_flip_{i}.png")
    save_task(task, filename)



100%|██████████| 387/387 [00:43<00:00,  8.85it/s]


In [25]:
task

{'name': '/flip/bafybeihykityz5jpyopp6qkd5x5prigvjpvh6sjugqmwl6ewfboldkd6li',
 'image_lst1': {'0': 'blob:https://scan.idena.io/bb1e7245-0021-4e88-a33e-4f180646b935',
  '1': 'blob:https://scan.idena.io/cc7405ff-c47e-474e-bb76-2b5508f14f96',
  '2': 'blob:https://scan.idena.io/2259b456-7c32-4847-b285-83a4e6117206',
  '3': 'blob:https://scan.idena.io/6bdd47ba-e94d-4d39-8bef-bd681f7c6fc3'},
 'image_lst2': {'0': 'blob:https://scan.idena.io/6bdd47ba-e94d-4d39-8bef-bd681f7c6fc3',
  '1': 'blob:https://scan.idena.io/cc7405ff-c47e-474e-bb76-2b5508f14f96',
  '2': 'blob:https://scan.idena.io/bb1e7245-0021-4e88-a33e-4f180646b935',
  '3': 'blob:https://scan.idena.io/2259b456-7c32-4847-b285-83a4e6117206'},
 'agreed_answer': ['Right', 'Strong'],
 'votes': {'Left': '-', 'Right': '6', 'Reported': '2'},
 'details': {'Author:': '0xa1488Ed8312DadA2AFb67C8a88119Ce5446a94B1',
  'Epoch:': '#0033',
  'Size:': '266717 bytes',
  'Created:': '2/1/2020 13:19:26',
  'Block:': '833520',
  'Tx:': '0x3eb5aee8e9e60c9a1a