In [4]:
!pip install git+https://github.com/openai/CLIP.git



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-au1dzdla
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-au1dzdla
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import clip
from transformers import BlipProcessor, BlipForConditionalGeneration
import numpy as np
from tqdm import tqdm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class TinyImageNet(Dataset):
    def __init__(self, root, split='val', transform=None):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.split = split
        
        if split == 'train':
            self.image_paths = []
            self.labels = []
            
            # Process train directory which has subdirectories by class
            train_dir = os.path.join(self.root, 'train')
            self.classes = sorted(os.listdir(train_dir))
            self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
            
            for class_name in self.classes:
                class_dir = os.path.join(train_dir, class_name, 'images')
                if not os.path.isdir(class_dir):
                    continue
                    
                for img_name in os.listdir(class_dir):
                    if img_name.endswith('.JPEG'):
                        self.image_paths.append(os.path.join(class_dir, img_name))
                        self.labels.append(self.class_to_idx[class_name])
                        
        elif split == 'val':
            self.image_paths = []
            self.labels = []
            
            # Process val directory
            val_dir = os.path.join(self.root, 'val')
            images_dir = os.path.join(val_dir, 'images')
            
            # Read val annotations
            val_annotations_file = os.path.join(val_dir, 'val_annotations.txt')
            self.classes = []
            self.class_to_idx = {}
            
            with open(val_annotations_file, 'r') as f:
                for line in f:
                    parts = line.strip().split('\t')
                    img_name, class_id = parts[0], parts[1]
                    
                    if class_id not in self.class_to_idx:
                        self.classes.append(class_id)
                        self.class_to_idx[class_id] = len(self.classes) - 1
                    
                    self.image_paths.append(os.path.join(images_dir, img_name))
                    self.labels.append(self.class_to_idx[class_id])

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
            
        return img, label

def get_classnames(dataset, words_file_path):
    """Get formatted class names from dataset using WordNet mappings."""
    if hasattr(dataset, 'classes'):
        # Load WordNet ID to name mapping
        wordnet_map = {}
        with open(words_file_path, 'r') as f:
            for line in f:
                parts = line.strip().split('\t')
                if len(parts) == 2:
                    wordnet_id, names = parts
                    # Take the first name if there are multiple comma-separated ones
                    name = names.split(',')[0].strip()
                    wordnet_map[wordnet_id] = name
        
        # Replace WordNet IDs with human-readable names
        classnames = []
        for c in dataset.classes:
            if c in wordnet_map:
                classnames.append(wordnet_map[c])
            else:
                # Try without 'n' prefix if not found directly
                if c.startswith('n') and c[1:] in wordnet_map:
                    classnames.append(wordnet_map[c[1:]])
                else:
                    # Fallback to the original ID
                    classnames.append(c)
        
        return classnames
    return None

def prepare_dataset(root_dir):
    """Prepare and return the Tiny ImageNet dataset with appropriate transforms."""
    # Basic transform without normalization for dataset loading
    basic_transform = transforms.Compose([
        transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
    
    # Create dataset instance
    val_dataset = TinyImageNet(root_dir, split='val', transform=basic_transform)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
    
    return val_dataset, val_loader
    
def generate_captions(blip_model, blip_processor, images, prompts):
    """Generate captions for the given images using BLIP model."""
    all_captions = []
    
    # Create PIL images from tensors
    pil_images = []
    for img in images:
        # Convert tensor to PIL image
        # Ensure values are within 0-1 range
        np_img = img.cpu().numpy().transpose(1, 2, 0)
        np_img = np.clip(np_img, 0, 1)
        pil_img = Image.fromarray((np_img * 255).astype(np.uint8))
        pil_images.append(pil_img)
    
    # Process images with BLIP
    batch_size = len(pil_images)
    with torch.no_grad():
        for prompt in prompts:
            batch_captions = []
            
            # Process each image individually
            for i in range(batch_size):
                inputs = blip_processor(images=pil_images[i], text=prompt, return_tensors="pt").to(device)
                generated_ids = blip_model.generate(**inputs, max_length=50)
                caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
                batch_captions.append(caption)
                
            all_captions.append(batch_captions)
    
    return all_captions


def compute_image_embeddings(clip_model, images):
    """Compute CLIP image embeddings with proper normalization."""
    # Apply CLIP-specific normalization
    clip_mean = torch.tensor((0.48145466, 0.4578275, 0.40821073), device=images.device).view(1, 3, 1, 1)
    clip_std = torch.tensor((0.26862954, 0.26130258, 0.27577711), device=images.device).view(1, 3, 1, 1)
    normalized_images = (images - clip_mean) / clip_std
    
    with torch.no_grad():
        image_features = clip_model.encode_image(normalized_images)
        image_features = image_features / image_features.norm(dim=1, keepdim=True)
    return image_features

def compute_text_embeddings(clip_model, texts):
    """Compute CLIP text embeddings."""
    with torch.no_grad():
        text_features = clip_model.encode_text(texts)
        text_features = text_features / text_features.norm(dim=1, keepdim=True)
    return text_features

def image_caption_encoding(image_probs, caption_probs, K=5, xi=0.08, epsilon=1e-12):
    """
    Implement ICE method as described in the paper.
    
    Args:
        image_probs: Probability distribution from image embeddings
        caption_probs: Probability distribution from caption embeddings
        K: Number of top classes to consider
        xi: Scaling factor for lambda
        epsilon: Small constant to avoid division by zero
        
    Returns:
        Final probability distribution after ICE
    """
    # Find top K classes based on image probabilities
    topk_values, topk_indices = torch.topk(image_probs, k=K, dim=1)
    
    # Extract probabilities for top K classes
    image_topk_probs = torch.gather(image_probs, 1, topk_indices)
    caption_topk_probs = torch.gather(caption_probs, 1, topk_indices)
    
    # Compute standard deviation for confidence selection
    image_std = torch.std(image_topk_probs, dim=1, keepdim=True)
    caption_std = torch.std(caption_topk_probs, dim=1, keepdim=True)
    
    # Compute lambda based on equation 3 in the paper
    norm = torch.maximum(torch.norm(torch.cat([image_std, caption_std], dim=1), dim=1, keepdim=True), 
                         torch.tensor(epsilon, device=device))
    
    lambda_values = xi * (caption_std / norm)
    
    # Combine image and caption probabilities
    combined_topk_probs = image_topk_probs + lambda_values * caption_topk_probs
    
    # Create new probability distribution
    output_probs = torch.zeros_like(image_probs)
    output_probs.scatter_(1, topk_indices, combined_topk_probs)
    
    return output_probs

def evaluate_model(clip_model, blip_model, blip_processor, val_loader, classnames, caption_prompts):
    """Evaluate the model on the validation set."""
    text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in classnames]).to(device)
    text_features = compute_text_embeddings(clip_model, text_inputs)
    
    total = 0
    correct_base = 0
    correct_ice = 0
    
    # Print sample outputs for the first few batches only
    print_samples = 200
    sample_count = 0
    
    for batch_idx, (images, labels) in enumerate(tqdm(val_loader)):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = images.shape[0]
        total += batch_size
        
        # Compute image embeddings and probabilities
        image_features = compute_image_embeddings(clip_model, images)
        image_logits = 100.0 * image_features @ text_features.T
        image_probs = F.softmax(image_logits, dim=1)
        
        # Generate captions using BLIP
        batch_captions = generate_captions(blip_model, blip_processor, images, caption_prompts)
        
        # Process captions
        caption_features_list = []
        for prompt_captions in batch_captions:
            caption_tokens = clip.tokenize(prompt_captions).to(device)
            caption_features = compute_text_embeddings(clip_model, caption_tokens)
            caption_features_list.append(caption_features)
        
        # Compute centroid of caption embeddings
        caption_features = torch.stack(caption_features_list).mean(dim=0)
        
        # Compute caption probabilities
        caption_logits = 100.0 * caption_features @ text_features.T
        caption_probs = F.softmax(caption_logits, dim=1)
        
        # Apply ICE
        ice_probs = image_caption_encoding(image_probs, caption_probs)
        
        # Compute predictions
        _, base_preds = torch.max(image_probs, 1)
        _, ice_preds = torch.max(ice_probs, 1)
        
        # Update metrics
        correct_base += (base_preds == labels).sum().item()
        correct_ice += (ice_preds == labels).sum().item()
        
        # Print examples for debugging
        if batch_idx < print_samples:
            print(f"\n--- Batch {batch_idx} examples ---")
            # Print top 5 examples from the batch
            for i in range(min(5, batch_size)):
                true_label = labels[i].item()
                base_pred = base_preds[i].item()
                ice_pred = ice_preds[i].item()
                
                print(f"\nImage {sample_count + i}:")
                print(f"  True class: {classnames[true_label]}")
                print(f"  CLIP prediction: {classnames[base_pred]}")
                print(f"  ICE prediction: {classnames[ice_pred]}")
                
                # Print captions from all prompts
                print("  Captions:")
                for j, prompt in enumerate(caption_prompts):
                    print(f"    {prompt}: {batch_captions[j][i]}")
                
                # Print confidence scores for predictions
                base_conf = image_probs[i, base_pred].item() * 100
                ice_conf = ice_probs[i, ice_pred].item() * 100
                print(f"  CLIP confidence: {base_conf:.2f}%")
                print(f"  ICE confidence: {ice_conf:.2f}%")
            
            sample_count += batch_size
    
    base_acc = 100 * correct_base / total
    ice_acc = 100 * correct_ice / total
    
    return base_acc, ice_acc

def main():
    # Path to Tiny ImageNet dataset
    tiny_imagenet_path = '/kaggle/input/tiny-imagenet/tiny-imagenet-200'
    
    # Load models
    print("Loading CLIP model...")
    clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
    clip_model.eval()
    
    print("Loading BLIP model...")
    blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
    blip_model.eval()
    
    # Prepare dataset
    print("Preparing dataset...")
    val_dataset, val_loader = prepare_dataset(tiny_imagenet_path)
    classnames = get_classnames(val_dataset, '/kaggle/input/tiny-imagenet/tiny-imagenet-200/words.txt')  
    print("\nSample class mappings:")
    for i, c in enumerate(val_dataset.classes[:20]):
        print(f"{c} -> {classnames[i]}")
    # Set caption prompts
    caption_prompts = ["a photo of", "a picture of", "a photo containing"]
    
    # Evaluate
    print("Evaluating...")
    base_acc, ice_acc = evaluate_model(
        clip_model, blip_model, blip_processor, val_loader, classnames, caption_prompts
    )
    
    print(f"Base CLIP Accuracy: {base_acc:.2f}%")
    print(f"ICE Accuracy: {ice_acc:.2f}%")
    print(f"Improvement: {ice_acc - base_acc:.2f}%")

if __name__ == "__main__":
    main()

Using device: cuda
Loading CLIP model...
Loading BLIP model...
Preparing dataset...

Sample class mappings:
n03444034 -> go-kart
n04067472 -> reel
n04070727 -> refrigerator
n02808440 -> bathtub
n04399382 -> teddy
n04179913 -> sewing machine
n02823428 -> beer bottle
n04146614 -> school bus
n02226429 -> grasshopper
n04371430 -> swimming trunks
n07753592 -> banana
n03770439 -> miniskirt
n02056570 -> king penguin
n02906734 -> broom
n02125311 -> cougar
n04486054 -> triumphal arch
n04285008 -> sports car
n03763968 -> military uniform
n03814639 -> neck brace
n03837869 -> obelisk
Evaluating...


  0%|          | 0/157 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling 


--- Batch 0 examples ---

Image 0:
  True class: go-kart
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a group of people sitting on the ground
    a picture of: a picture of a group of people sitting on the ground
    a photo containing: a photo containing of a group of people in a street
  CLIP confidence: 87.30%
  ICE confidence: 87.30%

Image 1:
  True class: reel
  CLIP prediction: lifeboat
  ICE prediction: lifeboat
  Captions:
    a photo of: a photo of a sheep
    a picture of: a picture of a sheep
    a photo containing: a photo containing the image of a sheep
  CLIP confidence: 31.74%
  ICE confidence: 31.74%

Image 2:
  True class: refrigerator
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a city with a clock
    a picture of: a picture of a city with a lot of buildings
    a photo containing: a photo containing the city of london taken from a window in the tower
  CLIP confidence: 10.8

  1%|▏         | 2/157 [00:57<1:14:33, 28.86s/it]


--- Batch 1 examples ---

Image 64:
  True class: fountain
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of the fountain at night
    a picture of: a picture of a fountain with lights on it
    a photo containing: a photo containing the lights of the space shuttle
  CLIP confidence: 33.81%
  ICE confidence: 41.58%

Image 65:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a field with trees and a building in the background
    a picture of: a picture of a field with trees and a building in the background
    a photo containing: a photo containing of the site of the proposed new school
  CLIP confidence: 47.39%
  ICE confidence: 47.39%

Image 66:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a group of people standing around a tent
    a picture of: a picture of a group of people standing around a 

  2%|▏         | 3/157 [01:27<1:15:01, 29.23s/it]


--- Batch 2 examples ---

Image 128:
  True class: reel
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a man on a boat
    a picture of: a picture of a man on a boat
    a photo containing: a photo containing of a man on a boat
  CLIP confidence: 23.89%
  ICE confidence: 30.22%

Image 129:
  True class: brass
  CLIP prediction: cannon
  ICE prediction: cannon
  Captions:
    a photo of: a photo of a cat in the woods
    a picture of: a picture of a cat in the woods
    a photo containing: a photo containing the image of a cat
  CLIP confidence: 56.98%
  ICE confidence: 56.98%

Image 130:
  True class: bullfrog
  CLIP prediction: tailed frog
  ICE prediction: tailed frog
  Captions:
    a photo of: a photo of a lizard on a rock
    a picture of: a picture of a lizard on the beach
    a photo containing: a photo containing with the image of a lizard
  CLIP confidence: 58.15%
  ICE confidence: 60.74%

Image 131:
  True class: meat loaf
  CLI

  3%|▎         | 4/157 [01:57<1:15:01, 29.42s/it]


--- Batch 3 examples ---

Image 192:
  True class: black widow
  CLIP prediction: black widow
  ICE prediction: black widow
  Captions:
    a photo of: a photo of a spider on a piece of wood
    a picture of: a picture of a spider on a piece of wood
    a photo containing: a photo containing the spider
  CLIP confidence: 93.26%
  ICE confidence: 93.36%

Image 193:
  True class: sombrero
  CLIP prediction: sombrero
  ICE prediction: sombrero
  Captions:
    a photo of: a photo of a family posing for a picture
    a picture of: a picture of a family posing for a picture
    a photo containing: a photo containing of a family
  CLIP confidence: 64.16%
  ICE confidence: 64.16%

Image 194:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a candle
    a picture of: a picture of a candle in a glass
    a photo containing: a photo containing the purple flame of a candle
  CLIP confidence: 75.63%
  ICE confidence: 82.13%

Image 195:


  3%|▎         | 5/157 [02:27<1:15:31, 29.81s/it]


--- Batch 4 examples ---

Image 256:
  True class: beach wagon
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a car driving on a bridge
    a picture of: a picture of a car driving on a bridge
    a photo containing: a photo containing the image of a car driving on a bridge
  CLIP confidence: 58.89%
  ICE confidence: 58.89%

Image 257:
  True class: lion
  CLIP prediction: lion
  ICE prediction: lion
  Captions:
    a photo of: a photo of a lion in the wild
    a picture of: a picture of a lion in the wild
    a photo containing: a photo containing of a lion in the wild
  CLIP confidence: 76.95%
  ICE confidence: 83.40%

Image 258:
  True class: freight car
  CLIP prediction: freight car
  ICE prediction: freight car
  Captions:
    a photo of: a photo of a building with a sign on it
    a picture of: a picture of a building with a sign on it
    a photo containing: a photo containing the logo of the indian bank
  CLIP confidence: 98.73

  4%|▍         | 6/157 [02:56<1:14:00, 29.41s/it]


--- Batch 5 examples ---

Image 320:
  True class: hourglass
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a candle with a candle in the middle
    a picture of: a picture of a candle with a candle in the middle
    a photo containing: a photo containing the image of a burning candle
  CLIP confidence: 42.85%
  ICE confidence: 42.85%

Image 321:
  True class: sandal
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a light fixture
    a picture of: a picture of a light fixture
    a photo containing: a photo containing the three lights of the person ' s house
  CLIP confidence: 54.20%
  ICE confidence: 54.20%

Image 322:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a train
    a picture of: a picture of a train
    a photo containing: a photo containing of a train at a station
  CLIP confidence: 71.09%
  ICE confidence: 7

  4%|▍         | 7/157 [03:26<1:14:07, 29.65s/it]


--- Batch 6 examples ---

Image 384:
  True class: jellyfish
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a guitar
    a picture of: a picture of a guitar with a blue background
    a photo containing: a photo containing the sun and the moon
  CLIP confidence: 32.52%
  ICE confidence: 32.52%

Image 385:
  True class: orangutan
  CLIP prediction: orangutan
  ICE prediction: orangutan
  Captions:
    a photo of: a photo of a bear in the woods
    a picture of: a picture of a bear in the woods
    a photo containing: a photo containing the bear
  CLIP confidence: 98.44%
  ICE confidence: 98.44%

Image 386:
  True class: water jug
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a group of baskets
    a picture of: a picture of a group of baskets
    a photo containing: a photo containing of a set of three baskets
  CLIP confidence: 43.55%
  ICE confidence: 49.98%

Image 387:
  True class: co

  5%|▌         | 8/157 [03:57<1:14:18, 29.92s/it]


--- Batch 7 examples ---

Image 448:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a man in a suit and tie
    a picture of: a picture of a man in a suit and tie
    a photo containing: a photo containing the image of a man in a boat
  CLIP confidence: 94.04%
  ICE confidence: 94.04%

Image 449:
  True class: snorkel
  CLIP prediction: snorkel
  ICE prediction: snorkel
  Captions:
    a photo of: a photo of a small white object in the water
    a picture of: a picture of a small white object in the water
    a photo containing: a photo containing with the image of a man in a diving suit
  CLIP confidence: 39.21%
  ICE confidence: 39.21%

Image 450:
  True class: desk
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a bed with a teddy bear on it
    a picture of: a picture of a bed with a teddy bear on it
    a photo containing: a photo containing the im

  6%|▌         | 9/157 [04:27<1:13:59, 29.99s/it]


--- Batch 8 examples ---

Image 512:
  True class: beacon
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a lighthouse on the beach
    a picture of: a picture of a lighthouse on the beach
    a photo containing: a photo containing the lighthouse
  CLIP confidence: 75.10%
  ICE confidence: 75.24%

Image 513:
  True class: cliff
  CLIP prediction: swimming trunks
  ICE prediction: swimming trunks
  Captions:
    a photo of: a photo of a dog on the beach
    a picture of: a picture of a dog on the beach
    a photo containing: a photo containing of a dog on the beach
  CLIP confidence: 23.22%
  ICE confidence: 23.22%

Image 514:
  True class: scorpion
  CLIP prediction: scorpion
  ICE prediction: scorpion
  Captions:
    a photo of: a photo of a lizard
    a picture of: a picture of a lizard on a white surface
    a photo containing: a photo containing the image of a lizard
  CLIP confidence: 93.95%
  ICE confidence: 93.95%

Image 515:
  True cl

  6%|▋         | 10/157 [04:57<1:13:18, 29.92s/it]


--- Batch 9 examples ---

Image 576:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly on a leaf
    a picture of: a picture of a butterfly on a leaf
    a photo containing: a photo containing the butterfly effect
  CLIP confidence: 98.63%
  ICE confidence: 98.97%

Image 577:
  True class: pay-phone
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a parking meter in a parking lot
    a picture of: a picture of a parking meter in a city
    a photo containing: a photo containing the same parking meter
  CLIP confidence: 87.74%
  ICE confidence: 87.74%

Image 578:
  True class: pay-phone
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a group of socks with different designs
    a picture of: a picture of a group of socks with different designs
    a photo containing: a photo containing the christmas socks
  CLIP confid

  7%|▋         | 11/157 [05:26<1:12:44, 29.89s/it]


--- Batch 10 examples ---

Image 640:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police van
    a picture of: a picture of a police van
    a photo containing: a photo containing of the police van
  CLIP confidence: 99.85%
  ICE confidence: 105.47%

Image 641:
  True class: thatch
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a small house in the middle of a field
    a picture of: a picture of a small house in the middle of a field
    a photo containing: a photo containing the site of the first battle of the alam
  CLIP confidence: 84.57%
  ICE confidence: 84.57%

Image 642:
  True class: walking stick
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a person on a skateboard
    a picture of: a picture of a person on a skateboard
    a photo containing: a photo containing the image of a man on a skateboard
  CLIP con

  8%|▊         | 12/157 [05:56<1:11:57, 29.77s/it]


--- Batch 11 examples ---

Image 704:
  True class: boa constrictor
  CLIP prediction: boa constrictor
  ICE prediction: boa constrictor
  Captions:
    a photo of: a photo of a bird flying in the night sky
    a picture of: a picture of a bird flying in the night sky
    a photo containing: a photo containing the image of a bird
  CLIP confidence: 96.39%
  ICE confidence: 96.39%

Image 705:
  True class: refrigerator
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a glass door with a reflection
    a picture of: a picture of a room with a glass door
    a photo containing: a photo containing the inside of a building
  CLIP confidence: 74.76%
  ICE confidence: 74.76%

Image 706:
  True class: tabby
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a cat sitting on a table
    a picture of: a picture of a cat sitting on a table
    a photo containing: a photo containing of a cat
  

  8%|▊         | 13/157 [06:25<1:11:14, 29.68s/it]


--- Batch 12 examples ---

Image 768:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police car on a road
    a picture of: a picture of a police car on the road
    a photo containing: a photo containing of a police car
  CLIP confidence: 88.82%
  ICE confidence: 94.82%

Image 769:
  True class: sea slug
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a coral with a red coral in the background
    a picture of: a picture of a coral with a red coral in the background
    a photo containing: a photo containing the image of a coral
  CLIP confidence: 48.97%
  ICE confidence: 48.97%

Image 770:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a group of people sitting around a table
    a picture of: a picture of a group of people sitting around a table
    a photo containing: a photo containing t

  9%|▉         | 14/157 [06:54<1:10:21, 29.52s/it]


--- Batch 13 examples ---

Image 832:
  True class: teapot
  CLIP prediction: teapot
  ICE prediction: teapot
  Captions:
    a photo of: a photo of a green glass vase
    a picture of: a picture of a green glass vase
    a photo containing: a photo containing of a green teapot
  CLIP confidence: 84.86%
  ICE confidence: 85.06%

Image 833:
  True class: bucket
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a cup of coffee
    a picture of: a picture of a cup on a table
    a photo containing: a photo containing the logo of the university of wisconsin
  CLIP confidence: 82.62%
  ICE confidence: 82.62%

Image 834:
  True class: dam
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a bridge over a river
    a picture of: a picture of a bridge over a river
    a photo containing: a photo containing the image of a bridge over a river
  CLIP confidence: 80.18%
  ICE confidence: 80.18%

Image 835:
  True class: barb

 10%|▉         | 15/157 [07:25<1:10:24, 29.75s/it]


--- Batch 14 examples ---

Image 896:
  True class: American alligator
  CLIP prediction: bullfrog
  ICE prediction: bullfrog
  Captions:
    a photo of: a photo of a green bug on a leaf
    a picture of: a picture of a small insect on the ground
    a photo containing: a photo containing the image of a frog
  CLIP confidence: 62.40%
  ICE confidence: 62.40%

Image 897:
  True class: standard poodle
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog sitting in the grass
    a picture of: a picture of a dog sitting in the grass
    a photo containing: a photo containing of a dog in the grass
  CLIP confidence: 18.96%
  ICE confidence: 18.96%

Image 898:
  True class: comic book
  CLIP prediction: vestment
  ICE prediction: comic book
  Captions:
    a photo of: a photo of a woman in a pink shirt
    a picture of: a picture of a woman in a pink shirt
    a photo containing: a photo containing the cover of the album ' the bes

 10%|█         | 16/157 [07:56<1:10:44, 30.10s/it]


--- Batch 15 examples ---

Image 960:
  True class: ice lolly
  CLIP prediction: ice lolly
  ICE prediction: ice lolly
  Captions:
    a photo of: a photo of a baby
    a picture of: a picture of a baby
    a photo containing: a photo containing of a baby with a paco
  CLIP confidence: 64.26%
  ICE confidence: 64.26%

Image 961:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a water tower in the middle of a lake
    a picture of: a picture of a water tower in the middle of a lake
    a photo containing: a photo containing the water tower
  CLIP confidence: 61.28%
  ICE confidence: 68.26%

Image 962:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a bird standing in the water
    a picture of: a picture of a bird standing in the water
    a photo containing: a photo containing of a goose on the shore of a lake
  CLIP confidence: 98.24%
  ICE confide

 11%|█         | 17/157 [08:26<1:10:06, 30.05s/it]


--- Batch 16 examples ---

Image 1024:
  True class: candle
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a man sitting at a table with a cake
    a picture of: a picture of a man sitting at a table with a cake
    a photo containing: a photo containing of a man sitting at a table with a cake
  CLIP confidence: 19.38%
  ICE confidence: 19.38%

Image 1025:
  True class: frying pan
  CLIP prediction: frying pan
  ICE prediction: frying pan
  Captions:
    a photo of: a photo of a fried egg in a fry pan
    a picture of: a picture of a fried egg in a pan
    a photo containing: a photo containing the egg
  CLIP confidence: 87.60%
  ICE confidence: 93.51%

Image 1026:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a pie on a plate
    a picture of: a picture of a pie with a slice taken out
    a photo containing: a photo containing the recipe of the chicken pot pie
  C

 11%|█▏        | 18/157 [08:56<1:09:51, 30.16s/it]


--- Batch 17 examples ---

Image 1088:
  True class: trilobite
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a yellow rock
    a picture of: a picture of a yellow rock
    a photo containing: a photo containing with the image of a gold nugg
  CLIP confidence: 61.04%
  ICE confidence: 61.04%

Image 1089:
  True class: pomegranate
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a red flower in the middle of a green plant
    a picture of: a picture of a red flower in the middle of a green plant
    a photo containing: a photo containing the red ball in the middle of the green
  CLIP confidence: 94.58%
  ICE confidence: 94.58%

Image 1090:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of three plastic cups
    a picture of: a picture of three plastic cups
    a photo containing: a photo containing with a single - cell membrane
  CLIP 

 12%|█▏        | 19/157 [09:25<1:08:36, 29.83s/it]


--- Batch 18 examples ---

Image 1152:
  True class: teddy
  CLIP prediction: Arabian camel
  ICE prediction: Arabian camel
  Captions:
    a photo of: a photo of a bunch of flowers
    a picture of: a picture of a bunch of flowers
    a photo containing: a photo containing the image of a man with a beard
  CLIP confidence: 27.93%
  ICE confidence: 27.93%

Image 1153:
  True class: cardigan
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a suit and tie
    a picture of: a picture of a man in a suit and tie
    a photo containing: a photo containing from the video of the video of the manhuntd
  CLIP confidence: 69.78%
  ICE confidence: 69.78%

Image 1154:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a sewing machine
    a picture of: a picture of a sewing machine
    a photo containing: a photo containing the image of a sewing machine
  CLIP c

 13%|█▎        | 20/157 [09:54<1:07:46, 29.68s/it]


--- Batch 19 examples ---

Image 1216:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man punching a punching punching punching punching
    a picture of: a picture of a man punching a punching punching punching
    a photo containing: a photo containing the image of a man punching a punching punching
  CLIP confidence: 57.42%
  ICE confidence: 63.67%

Image 1217:
  True class: lion
  CLIP prediction: lion
  ICE prediction: lion
  Captions:
    a photo of: a photo of a lion in the grass
    a picture of: a picture of a lion in the grass
    a photo containing: a photo containing of a lion
  CLIP confidence: 84.72%
  ICE confidence: 90.92%

Image 1218:
  True class: brain coral
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a fish swimming in the ocean
    a picture of: a picture of a fish swimming in the ocean
    a photo containing: a photo contai

 13%|█▎        | 21/157 [10:26<1:08:22, 30.16s/it]


--- Batch 20 examples ---

Image 1280:
  True class: pill bottle
  CLIP prediction: pill bottle
  ICE prediction: pill bottle
  Captions:
    a photo of: a photo of a person playing with a toy
    a picture of: a picture of a person playing with a toy
    a photo containing: a photo containing the image of a person ' s hand holding a small bottle of pills
  CLIP confidence: 99.61%
  ICE confidence: 99.61%

Image 1281:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a plate of food with a fork
    a picture of: a picture of a plate of food with a fork
    a photo containing: a photo containing of a plate of food
  CLIP confidence: 75.24%
  ICE confidence: 75.24%

Image 1282:
  True class: refrigerator
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a refrigerator in a kitchen
    a picture of: a picture of a refrigerator in a kitchen
    a photo containing: a photo conta

 14%|█▍        | 22/157 [10:56<1:08:04, 30.25s/it]


--- Batch 21 examples ---

Image 1344:
  True class: tarantula
  CLIP prediction: alp
  ICE prediction: cliff
  Captions:
    a photo of: a photo of a mountain with a blue sky
    a picture of: a picture of a mountain with a blue sky
    a photo containing: a photo containing the site of the ancient site of the ancient city of del dio
  CLIP confidence: 6.38%
  ICE confidence: 7.60%

Image 1345:
  True class: confectionery
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a fish tank
    a picture of: a picture of a fish tank
    a photo containing: a photo containing the image of a fish tank
  CLIP confidence: 84.33%
  ICE confidence: 84.33%

Image 1346:
  True class: lawn mower
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a field of grass with a man walking in the distance
    a picture of: a picture of a field with a lot of yellow flowers
    a photo containing: a photo contai

 15%|█▍        | 23/157 [11:25<1:06:45, 29.89s/it]


--- Batch 22 examples ---

Image 1408:
  True class: sea slug
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a snake with its mouth open
    a picture of: a picture of a snake with its mouth open
    a photo containing: a photo containing of a yellow - bellis bellis
  CLIP confidence: 37.77%
  ICE confidence: 37.77%

Image 1409:
  True class: brown bear
  CLIP prediction: baboon
  ICE prediction: baboon
  Captions:
    a photo of: a photo of a bear on a rock
    a picture of: a picture of a bear standing on a rock
    a photo containing: a photo containing the image of a bear in the wild
  CLIP confidence: 37.67%
  ICE confidence: 37.67%

Image 1410:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a plate of tomatoes and peppers
    a picture of: a picture of a plate of tomatoes and peppers
    a photo containing: a photo containing the image of a pepper

 15%|█▌        | 24/157 [11:53<1:05:09, 29.39s/it]


--- Batch 23 examples ---

Image 1472:
  True class: Arabian camel
  CLIP prediction: Arabian camel
  ICE prediction: Arabian camel
  Captions:
    a photo of: a photo of a group of people riding camels in the desert
    a picture of: a picture of a group of people riding camels in the desert
    a photo containing: a photo containing the camel race
  CLIP confidence: 93.55%
  ICE confidence: 99.41%

Image 1473:
  True class: cannon
  CLIP prediction: flagpole
  ICE prediction: flagpole
  Captions:
    a photo of: a photo of a statue of a bull with a canadian flag
    a picture of: a picture of a statue of a man with a flag
    a photo containing: a photo containing the canadian flag
  CLIP confidence: 96.88%
  ICE confidence: 102.64%

Image 1474:
  True class: butcher shop
  CLIP prediction: butcher shop
  ICE prediction: butcher shop
  Captions:
    a photo of: a photo of a bunch of red roses
    a picture of: a picture of a person in a bed
    a photo containing: a photo containing

 16%|█▌        | 25/157 [12:23<1:04:28, 29.31s/it]


--- Batch 24 examples ---

Image 1536:
  True class: barn
  CLIP prediction: barn
  ICE prediction: barn
  Captions:
    a photo of: a photo of a red barn in the snow
    a picture of: a picture of a red barn with snow on the roof
    a photo containing: a photo containing the red barn
  CLIP confidence: 98.58%
  ICE confidence: 104.30%

Image 1537:
  True class: nail
  CLIP prediction: brass
  ICE prediction: brass
  Captions:
    a photo of: a photo of a gold ring
    a picture of: a picture of a gold ring
    a photo containing: a photo containing the gold ring
  CLIP confidence: 31.86%
  ICE confidence: 39.31%

Image 1538:
  True class: trilobite
  CLIP prediction: trilobite
  ICE prediction: trilobite
  Captions:
    a photo of: a photo of a man with a beard
    a picture of: a picture of a man with a beard
    a photo containing: a photo containing of a man ' s face
  CLIP confidence: 41.38%
  ICE confidence: 41.38%

Image 1539:
  True class: viaduct
  CLIP prediction: viaduct
 

 17%|█▋        | 26/157 [12:53<1:04:40, 29.62s/it]


--- Batch 25 examples ---

Image 1600:
  True class: cliff dwelling
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a white and brown dog
    a picture of: a picture of a white and brown dog
    a photo containing: a photo containing the image of a man in a suit
  CLIP confidence: 78.32%
  ICE confidence: 78.32%

Image 1601:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a woman in a black dress
    a picture of: a picture of a woman with a black hair
    a photo containing: a photo containing the image of a diamond
  CLIP confidence: 32.42%
  ICE confidence: 32.42%

Image 1602:
  True class: espresso
  CLIP prediction: espresso
  ICE prediction: espresso
  Captions:
    a photo of: a photo of a cup of coffee and a plate of food
    a picture of: a picture of a cup of coffee and a plate of food
    a photo containing: a photo containing of a cup

 17%|█▋        | 27/157 [13:22<1:04:04, 29.57s/it]


--- Batch 26 examples ---

Image 1664:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a stadium with a large screen
    a picture of: a picture of a stadium with a large screen
    a photo containing: a photo containing the video of the game
  CLIP confidence: 98.44%
  ICE confidence: 104.10%

Image 1665:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a van parked in a driveway
    a picture of: a picture of a white van parked in a parking lot
    a photo containing: a photo containing of a van parked in a driveway
  CLIP confidence: 99.02%
  ICE confidence: 102.15%

Image 1666:
  True class: vestment
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a city at night
    a picture of: a picture of a city at night
    a photo containing: a photo containing the image of the statue of liberty
  C

 18%|█▊        | 28/157 [13:53<1:04:07, 29.83s/it]


--- Batch 27 examples ---

Image 1728:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a pink dress with a green flower on it
    a picture of: a picture of a pink dress with a green flower on it
    a photo containing: a photo containing with a green lizard
  CLIP confidence: 57.13%
  ICE confidence: 57.13%

Image 1729:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a bird in the water
    a picture of: a picture of a bird in the water
    a photo containing: a photo containing the image of a swan
  CLIP confidence: 61.96%
  ICE confidence: 68.31%

Image 1730:
  True class: stopwatch
  CLIP prediction: stopwatch
  ICE prediction: stopwatch
  Captions:
    a photo of: a photo of a person holding a clock
    a picture of: a picture of a person holding a clock
    a photo containing: a photo containing the image of a clock
  CLIP confidence: 71.83%
 

 18%|█▊        | 29/157 [14:22<1:03:29, 29.76s/it]


--- Batch 28 examples ---

Image 1792:
  True class: tractor
  CLIP prediction: tractor
  ICE prediction: tractor
  Captions:
    a photo of: a photo of a tractor with a man in the back
    a picture of: a picture of a tractor with a man in the back
    a photo containing: a photo containing the new tractor
  CLIP confidence: 97.22%
  ICE confidence: 102.93%

Image 1793:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a woman in a police uniform
    a picture of: a picture of a woman in a police uniform
    a photo containing: a photo containing of a police officer
  CLIP confidence: 83.30%
  ICE confidence: 83.30%

Image 1794:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a cliff with a small house in the middle
    a picture of: a picture of a small village in the mountains
    a photo containing: a photo contain

 19%|█▉        | 30/157 [14:52<1:02:50, 29.69s/it]


--- Batch 29 examples ---

Image 1856:
  True class: broom
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a group of people sitting on a bench
    a picture of: a picture of a group of people sitting on a bench
    a photo containing: a photo containing of a group of people in a park
  CLIP confidence: 84.28%
  ICE confidence: 84.28%

Image 1857:
  True class: crane
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a sunset
    a picture of: a picture of a sunset
    a photo containing: a photo containing a sunset
  CLIP confidence: 17.16%
  ICE confidence: 20.45%

Image 1858:
  True class: standard poodle
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of two white pup sitting on a table
    a picture of: a picture of two white pup sitting on a table
    a photo containing: a photo containing of two bily pup
  CLIP confidence: 96.88%


 20%|█▉        | 31/157 [15:22<1:02:23, 29.71s/it]


--- Batch 30 examples ---

Image 1920:
  True class: teapot
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a red object
    a picture of: a picture of a red object on a table
    a photo containing: a photo containing the red color of the balloon
  CLIP confidence: 10.43%
  ICE confidence: 10.57%

Image 1921:
  True class: refrigerator
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a man in a suit and tie
    a picture of: a picture of a man with a hat and a hat
    a photo containing: a photo containing the video of the video of the incident
  CLIP confidence: 32.40%
  ICE confidence: 33.08%

Image 1922:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a bunch of oranges
    a picture of: a picture of a bunch of oranges
    a photo containing: a photo containing of a lemon tree
  CLIP confidence:

 20%|██        | 32/157 [15:53<1:02:50, 30.16s/it]


--- Batch 31 examples ---

Image 1984:
  True class: frying pan
  CLIP prediction: wok
  ICE prediction: wok
  Captions:
    a photo of: a photo of a coffee maker
    a picture of: a picture of a plate of food with a fork
    a photo containing: a photo containing the coffee filter
  CLIP confidence: 53.96%
  ICE confidence: 53.96%

Image 1985:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man in a white shirt and a blue bag
    a picture of: a picture of a man with a punching punching punching punching punching punching punching punching punching punching punching punching punching punching
    a photo containing: a photo containing the new product
  CLIP confidence: 55.27%
  ICE confidence: 55.27%

Image 1986:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly on a leaf
    a picture of: a picture of a butterfly on a leaf
   

 21%|██        | 33/157 [16:22<1:01:45, 29.88s/it]


--- Batch 32 examples ---

Image 2048:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a yellow liquid being poured into a glass
    a picture of: a picture of a yellow liquid being poured into a glass
    a photo containing: a photo containing with a yellow liquid
  CLIP confidence: 37.04%
  ICE confidence: 37.04%

Image 2049:
  True class: rugby ball
  CLIP prediction: rugby ball
  ICE prediction: rugby ball
  Captions:
    a photo of: a photo of a woman playing soccer
    a picture of: a picture of a person playing soccer
    a photo containing: a photo containing a woman playing soccer
  CLIP confidence: 90.28%
  ICE confidence: 90.33%

Image 2050:
  True class: ice cream
  CLIP prediction: mashed potato
  ICE prediction: mashed potato
  Captions:
    a photo of: a photo of a plate of food with a fork
    a picture of: a picture of a plate of food with a fork
    a photo containing: a photo containing a plate of food
 

 22%|██▏       | 34/157 [16:51<1:00:55, 29.72s/it]


--- Batch 33 examples ---

Image 2112:
  True class: pay-phone
  CLIP prediction: kimono
  ICE prediction: kimono
  Captions:
    a photo of: a photo of a woman in a dress
    a picture of: a picture of a woman in a dress
    a photo containing: a photo containing of a woman in a dress
  CLIP confidence: 24.84%
  ICE confidence: 24.84%

Image 2113:
  True class: birdhouse
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a black and white cat
    a picture of: a picture of a black and white cat
    a photo containing: a photo containing the image of a black and white cat
  CLIP confidence: 21.33%
  ICE confidence: 21.33%

Image 2114:
  True class: trolleybus
  CLIP prediction: trolleybus
  ICE prediction: trolleybus
  Captions:
    a photo of: a photo of a parking lot with a bus parked in the lot
    a picture of: a picture of a parking lot with a bus parked in the lot
    a photo containing: a photo containing of the parking area at the new

 22%|██▏       | 35/157 [17:21<1:00:30, 29.76s/it]


--- Batch 34 examples ---

Image 2176:
  True class: butcher shop
  CLIP prediction: butcher shop
  ICE prediction: butcher shop
  Captions:
    a photo of: a photo of a bar with a neon sign
    a picture of: a picture of a bar with a neon sign
    a photo containing: a photo containing the video of the video of the death of the family of the late person
  CLIP confidence: 98.97%
  ICE confidence: 99.07%

Image 2177:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a sheep laying in a field
    a picture of: a picture of a sheep laying in the grass
    a photo containing: a photo containing the image of a sheep
  CLIP confidence: 94.38%
  ICE confidence: 95.46%

Image 2178:
  True class: cliff
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a black and white marble
    a picture of: a picture of a black and white marble
    a photo containing: a photo containing the ima

 23%|██▎       | 36/157 [17:50<59:13, 29.37s/it]  


--- Batch 35 examples ---

Image 2240:
  True class: grasshopper
  CLIP prediction: tarantula
  ICE prediction: tarantula
  Captions:
    a photo of: a photo of a pink flower
    a picture of: a picture of a pink flower
    a photo containing: a photo containing the pink flowers of the az az az az az az az az az az az az az
  CLIP confidence: 17.64%
  ICE confidence: 17.64%

Image 2241:
  True class: mashed potato
  CLIP prediction: plate
  ICE prediction: plate
  Captions:
    a photo of: a photo of a plate of food with a fork
    a picture of: a picture of a plate of food with a fork
    a photo containing: a photo containing the color of the fruit
  CLIP confidence: 32.98%
  ICE confidence: 40.58%

Image 2242:
  True class: snail
  CLIP prediction: snail
  ICE prediction: snail
  Captions:
    a photo of: a photo of a snail on a leaf
    a picture of: a picture of a snail on a leaf
    a photo containing: a photo containing the snail ' s shell
  CLIP confidence: 96.19%
  ICE confid

 24%|██▎       | 37/157 [18:20<58:59, 29.50s/it]


--- Batch 36 examples ---

Image 2304:
  True class: projectile
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a city with a lot of buildings
    a picture of: a picture of a city with a lot of buildings
    a photo containing: a photo containing the view of the space shuttle center
  CLIP confidence: 16.22%
  ICE confidence: 16.22%

Image 2305:
  True class: backpack
  CLIP prediction: backpack
  ICE prediction: backpack
  Captions:
    a photo of: a photo of a black bag with a red and white logo
    a picture of: a picture of a black bag with a red and white logo
    a photo containing: a photo containing the bag
  CLIP confidence: 47.39%
  ICE confidence: 54.35%

Image 2306:
  True class: brass
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a large stone in a room
    a picture of: a picture of a statue in a room
    a photo containing: a photo containing the statue of the person w

 24%|██▍       | 38/157 [18:51<59:35, 30.04s/it]


--- Batch 37 examples ---

Image 2368:
  True class: picket fence
  CLIP prediction: picket fence
  ICE prediction: picket fence
  Captions:
    a photo of: a photo of a white picket fence
    a picture of: a picture of a white picket fence
    a photo containing: a photo containing the person family ' s home in the 1930s
  CLIP confidence: 94.78%
  ICE confidence: 100.59%

Image 2369:
  True class: snail
  CLIP prediction: snail
  ICE prediction: snail
  Captions:
    a photo of: a photo of a small toro toro toro toro toro toro toro toro
    a picture of: a picture of a small animal walking on the road
    a photo containing: a photo containing of a toro toro toro toro toro toro toro toro
  CLIP confidence: 79.83%
  ICE confidence: 79.83%

Image 2370:
  True class: Yorkshire terrier
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog
    a picture of: a picture of a dog
    a photo containing: a photo containing of a dog
 

 25%|██▍       | 39/157 [19:21<59:04, 30.04s/it]


--- Batch 38 examples ---

Image 2432:
  True class: coral reef
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a sea turtle swimming in the ocean
    a picture of: a picture of a sea turtle swimming in the ocean
    a photo containing: a photo containing with the image of a sea turtle
  CLIP confidence: 57.76%
  ICE confidence: 57.76%

Image 2433:
  True class: poncho
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a suit and tie
    a picture of: a picture of a man in a suit and tie
    a photo containing: a photo containing the image of a man in a suit
  CLIP confidence: 73.29%
  ICE confidence: 73.29%

Image 2434:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bison
    a picture of: a picture of a bison
    a photo containing: a photo containing of a bison
  CLIP confidence: 94.53%
  ICE confidence: 100.39%

Im

 25%|██▌       | 40/157 [19:50<58:03, 29.77s/it]


--- Batch 39 examples ---

Image 2496:
  True class: orange
  CLIP prediction: lemon
  ICE prediction: lemon
  Captions:
    a photo of: a photo of a lemon
    a picture of: a picture of a lemon
    a photo containing: a photo containing a lemon
  CLIP confidence: 94.68%
  ICE confidence: 100.49%

Image 2497:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a street with cars parked on it
    a picture of: a picture of a street with cars parked on it
    a photo containing: a photo containing the image of a car in a parking
  CLIP confidence: 91.26%
  ICE confidence: 91.26%

Image 2498:
  True class: altar
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a church with a stained glass window
    a picture of: a picture of a church with a cross in the middle
    a photo containing: a photo containing the interior of a church
  CLIP confidence: 65.77%
  ICE confide

 26%|██▌       | 41/157 [20:20<57:32, 29.76s/it]


--- Batch 40 examples ---

Image 2560:
  True class: dragonfly
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a bug on a leaf
    a picture of: a picture of a bug on a leaf
    a photo containing: a photo containing the image of a bug
  CLIP confidence: 94.34%
  ICE confidence: 94.34%

Image 2561:
  True class: boa constrictor
  CLIP prediction: Persian cat
  ICE prediction: Persian cat
  Captions:
    a photo of: a photo of a tree with a bird in the background
    a picture of: a picture of a tree with a bird in it
    a photo containing: a photo containing the image of a man in a suit
  CLIP confidence: 11.52%
  ICE confidence: 11.52%

Image 2562:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a water tower in the fog
    a picture of: a picture of a water tower in the fog
    a photo containing: a photo containing the water tower in the fog
  CLIP confiden

 27%|██▋       | 42/157 [20:49<56:39, 29.56s/it]


--- Batch 41 examples ---

Image 2624:
  True class: altar
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a christmas tree with candles
    a picture of: a picture of a christmas tree with candles
    a photo containing: a photo containing the christmas tree
  CLIP confidence: 50.78%
  ICE confidence: 50.78%

Image 2625:
  True class: ladybug
  CLIP prediction: ladybug
  ICE prediction: ladybug
  Captions:
    a photo of: a photo of a yellow cake with a ladybug on top
    a picture of: a picture of a yellow cake with a ladybug on top
    a photo containing: a photo containing the yellow cake
  CLIP confidence: 82.62%
  ICE confidence: 83.30%

Image 2626:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a woman with a large afro hairs
    a picture of: a picture of a man with a beard and a beard
    a photo containing: a photo containing with the image of a man wi

 27%|██▋       | 43/157 [21:18<56:03, 29.51s/it]


--- Batch 42 examples ---

Image 2688:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bear that is laying down
    a picture of: a picture of a dog that is laying down
    a photo containing: a photo containing of a bear that was found in the wild
  CLIP confidence: 40.16%
  ICE confidence: 40.16%

Image 2689:
  True class: basketball
  CLIP prediction: basketball
  ICE prediction: basketball
  Captions:
    a photo of: a photo of a basketball game with the ball in the air
    a picture of: a picture of a basketball game with the ball in the air
    a photo containing: a photo containing the basketball game between the two teams
  CLIP confidence: 71.24%
  ICE confidence: 77.78%

Image 2690:
  True class: picket fence
  CLIP prediction: picket fence
  ICE prediction: picket fence
  Captions:
    a photo of: a photo of a fence in the fog
    a picture of: a picture of a fence with a sky background
    a photo containing: a 

 28%|██▊       | 44/157 [21:48<55:28, 29.45s/it]


--- Batch 43 examples ---

Image 2752:
  True class: lifeboat
  CLIP prediction: lifeboat
  ICE prediction: lifeboat
  Captions:
    a photo of: a photo of a boat in the water
    a picture of: a picture of a boat in the water
    a photo containing: a photo containing of a boat in the water
  CLIP confidence: 99.71%
  ICE confidence: 99.71%

Image 2753:
  True class: African elephant
  CLIP prediction: African elephant
  ICE prediction: African elephant
  Captions:
    a photo of: a photo of a baby elephant in a dirt field
    a picture of: a picture of a small town with a small elephant
    a photo containing: a photo containing the elephant in the village
  CLIP confidence: 90.48%
  ICE confidence: 96.48%

Image 2754:
  True class: beer bottle
  CLIP prediction: beer bottle
  ICE prediction: beer bottle
  Captions:
    a photo of: a photo of a dog
    a picture of: a picture of a dog
    a photo containing: a photo containing of a dog
  CLIP confidence: 92.38%
  ICE confidence: 92.

 29%|██▊       | 45/157 [22:18<55:18, 29.63s/it]


--- Batch 44 examples ---

Image 2816:
  True class: torch
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a man holding a frth
    a picture of: a picture of a man holding a frth
    a photo containing: a photo containing a man holding a frc
  CLIP confidence: 35.57%
  ICE confidence: 35.57%

Image 2817:
  True class: golden retriever
  CLIP prediction: boa constrictor
  ICE prediction: boa constrictor
  Captions:
    a photo of: a photo of a dog laying in the snow
    a picture of: a picture of a dog laying in the snow
    a photo containing: a photo containing of a dog laying in the snow
  CLIP confidence: 33.40%
  ICE confidence: 33.40%

Image 2818:
  True class: bannister
  CLIP prediction: bannister
  ICE prediction: bannister
  Captions:
    a photo of: a photo of a building with a lot of windows
    a picture of: a picture of a city with buildings and a sky
    a photo containing: a photo containing with the image of a man in a suit
  CL