In [2]:
!pip install git+https://github.com/openai/CLIP.git



Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-6n9lekho
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-6n9lekho
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->clip==1.0)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->clip==1.0)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->clip==1.0)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (

In [16]:
def fine_tune_clip(clip_model, train_loader, val_loader, classnames, num_epochs=10):
    # 1. Model Preparation
    # Freeze all layers except the projection layers
    for name, param in clip_model.named_parameters():
        if 'visual.proj' not in name and 'text_projection' not in name:
            param.requires_grad = False
        else:
            print(f"Training layer: {name}")

    # 2. Optimizer Configuration
    optimizer = torch.optim.AdamW(
        filter(lambda p: p.requires_grad, clip_model.parameters()),
        lr=1e-6,  # Very conservative learning rate
        betas=(0.9, 0.98),
        eps=1e-6,
        weight_decay=0.01
    )
    
    # 3. Learning Rate Scheduling with Warmup
    warmup_steps = 500
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lambda step: min(1.0, step / warmup_steps)
    )
    
    criterion = torch.nn.CrossEntropyLoss()
    
    # 4. Text Input Preparation
    with torch.no_grad():
        text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in classnames]).to(device)
        text_features = clip_model.encode_text(text_inputs)
        text_features = text_features / text_features.norm(dim=1, keepdim=True)
    
    # 5. CLIP Normalization Parameters
    clip_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073], device=device).view(1, 3, 1, 1)
    clip_std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=device).view(1, 3, 1, 1)
    
    # 6. Training Loop with Stability Checks
    best_acc = 0
    
    for epoch in range(num_epochs):
        clip_model.train()
        train_loss = 0
        grad_norms = []
        
        for batch_idx, (images, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}")):
            images, labels = images.to(device), labels.to(device)
            
            # Apply CLIP normalization with numerical stability checks
            images = (images - clip_mean) / clip_std
            if torch.isnan(images).any():
                print("NaN detected in images after normalization!")
                continue
            
            # Forward pass with stability checks
            optimizer.zero_grad()
            
            image_features = clip_model.encode_image(images)
            if torch.isnan(image_features).any():
                print("NaN in image features!")
                continue
                
            image_features = image_features / image_features.norm(dim=1, keepdim=True)
            
            # Use pre-computed text features
            logit_scale = clip_model.logit_scale.exp().clamp(max=100)  # Clamp to prevent overflow
            logits = logit_scale * image_features @ text_features.T
            
            if torch.isnan(logits).any():
                print(f"NaN in logits! Scale: {logit_scale.item()}")
                continue
                
            loss = criterion(logits, labels)
            
            if torch.isnan(loss):
                print("NaN loss detected!")
                continue
                
            # Backward pass with gradient monitoring
            loss.backward()
            
            # Gradient clipping and monitoring
            grad_norm = torch.nn.utils.clip_grad_norm_(
                filter(lambda p: p.requires_grad, clip_model.parameters()),
                max_norm=1.0
            )
            grad_norms.append(grad_norm.item())
            
            if torch.isnan(grad_norm):
                print("NaN gradients detected!")
                optimizer.zero_grad()
                continue
                
            optimizer.step()
            scheduler.step()
            
            train_loss += loss.item()
            
            # Debug prints every 100 batches
            if batch_idx % 100 == 0:
                print(f"\nBatch {batch_idx}:")
                print(f"  Loss: {loss.item():.4f}")
                print(f"  Grad norm: {grad_norm:.4f}")
                print(f"  Logit scale: {logit_scale.item():.4f}")
                print(f"  Max logit: {logits.max().item():.4f}")
                print(f"  Min logit: {logits.min().item():.4f}")
        
        avg_train_loss = train_loss / len(train_loader)
        avg_grad_norm = sum(grad_norms) / len(grad_norms)
        print(f"\nEpoch {epoch+1}:")
        print(f"  Avg Train Loss: {avg_train_loss:.4f}")
        print(f"  Avg Grad Norm: {avg_grad_norm:.4f}")
        
        # Validation
        val_acc = validate_clip(clip_model, val_loader, text_features)
        print(f"  Val Acc: {val_acc:.2f}%")
        
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(clip_model.state_dict(), "best_clip_model.pth")
    
    return clip_model

def validate_clip(clip_model, val_loader, text_features):
    clip_model.eval()
    correct = 0
    total = 0
    
    clip_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073], device=device).view(1, 3, 1, 1)
    clip_std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=device).view(1, 3, 1, 1)
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            images = (images - clip_mean) / clip_std
            
            image_features = clip_model.encode_image(images)
            image_features = image_features / image_features.norm(dim=1, keepdim=True)
            
            logit_scale = clip_model.logit_scale.exp().clamp(max=100)
            logits = logit_scale * image_features @ text_features.T
            
            _, predicted = torch.max(logits, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    
    return 100 * correct / total

In [9]:
def fine_tune_blip(blip_model, blip_processor, train_loader, num_epochs=3, classnames):
    optimizer = torch.optim.AdamW(blip_model.parameters(), lr=5e-5)
    best_loss = float('inf')
    
    for epoch in range(num_epochs):
        print("Blip epoch:", epoch, num_epochs)
        blip_model.train()
        epoch_loss = 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images = images.to(device)
            
            # Convert to PIL images
            pil_images = []
            for img in images:
                np_img = img.cpu().numpy().transpose(1, 2, 0)
                np_img = np.clip(np_img, 0, 1)
                pil_img = Image.fromarray((np_img * 255).astype(np.uint8))
                pil_images.append(pil_img)
            
            # Create inputs - using classnames as prompts
            inputs = blip_processor(
                images=pil_images, 
                text=[f"a photo of a {classnames[l]}" for l in labels],
                return_tensors="pt",
                padding=True,
                truncation=True
            ).to(device)
            
            # Forward pass
            outputs = blip_model(**inputs)
            loss = outputs.loss
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        avg_loss = epoch_loss / len(train_loader)
        print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}")
        
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(blip_model.state_dict(), "best_blip_model.pth")
    
    return blip_model

In [19]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import clip
from transformers import BlipProcessor, BlipForConditionalGeneration
import numpy as np
from tqdm import tqdm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class TinyImageNet(Dataset):
    def __init__(self, root, split='val', transform=None):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.split = split
        
        if split == 'train':
            self.image_paths = []
            self.labels = []
            
            # Process train directory which has subdirectories by class
            train_dir = os.path.join(self.root, 'train')
            self.classes = sorted(os.listdir(train_dir))
            self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
            
            for class_name in self.classes:
                class_dir = os.path.join(train_dir, class_name, 'images')
                if not os.path.isdir(class_dir):
                    continue
                    
                for img_name in os.listdir(class_dir):
                    if img_name.endswith('.JPEG'):
                        self.image_paths.append(os.path.join(class_dir, img_name))
                        self.labels.append(self.class_to_idx[class_name])
                        
        elif split == 'val':
            self.image_paths = []
            self.labels = []
            
            # Process val directory
            val_dir = os.path.join(self.root, 'val')
            images_dir = os.path.join(val_dir, 'images')
            
            # Read val annotations
            val_annotations_file = os.path.join(val_dir, 'val_annotations.txt')
            self.classes = []
            self.class_to_idx = {}
            
            with open(val_annotations_file, 'r') as f:
                for line in f:
                    parts = line.strip().split('\t')
                    img_name, class_id = parts[0], parts[1]
                    
                    if class_id not in self.class_to_idx:
                        self.classes.append(class_id)
                        self.class_to_idx[class_id] = len(self.classes) - 1
                    
                    self.image_paths.append(os.path.join(images_dir, img_name))
                    self.labels.append(self.class_to_idx[class_id])

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
            
        return img, label

def get_classnames(dataset, words_file_path):
    """Get formatted class names from dataset using WordNet mappings."""
    if hasattr(dataset, 'classes'):
        # Load WordNet ID to name mapping
        wordnet_map = {}
        with open(words_file_path, 'r') as f:
            for line in f:
                parts = line.strip().split('\t')
                if len(parts) == 2:
                    wordnet_id, names = parts
                    # Take the first name if there are multiple comma-separated ones
                    name = names.split(',')[0].strip()
                    wordnet_map[wordnet_id] = name
        
        # Replace WordNet IDs with human-readable names
        classnames = []
        for c in dataset.classes:
            if c in wordnet_map:
                classnames.append(wordnet_map[c])
            else:
                # Try without 'n' prefix if not found directly
                if c.startswith('n') and c[1:] in wordnet_map:
                    classnames.append(wordnet_map[c[1:]])
                else:
                    # Fallback to the original ID
                    classnames.append(c)
        
        return classnames
    return None

def prepare_dataset(root_dir):
    """Prepare and return the Tiny ImageNet dataset with appropriate transforms."""
    # Basic transform without normalization for dataset loading
    basic_transform = transforms.Compose([
        transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
    
    # Create dataset instance
    val_dataset = TinyImageNet(root_dir, split='val', transform=basic_transform)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
    
    return val_dataset, val_loader
    
def generate_captions(blip_model, blip_processor, images, prompts):
    """Generate captions for the given images using BLIP model."""
    all_captions = []
    
    # Create PIL images from tensors
    pil_images = []
    for img in images:
        # Convert tensor to PIL image
        # Ensure values are within 0-1 range
        np_img = img.cpu().numpy().transpose(1, 2, 0)
        np_img = np.clip(np_img, 0, 1)
        pil_img = Image.fromarray((np_img * 255).astype(np.uint8))
        pil_images.append(pil_img)
    
    # Process images with BLIP
    batch_size = len(pil_images)
    with torch.no_grad():
        for prompt in prompts:
            batch_captions = []
            
            # Process each image individually
            for i in range(batch_size):
                inputs = blip_processor(images=pil_images[i], text=prompt, return_tensors="pt").to(device)
                generated_ids = blip_model.generate(**inputs, max_length=50)
                caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
                batch_captions.append(caption)
                
            all_captions.append(batch_captions)
    
    return all_captions


def compute_image_embeddings(clip_model, images):
    """Compute CLIP image embeddings with proper normalization."""
    # Apply CLIP-specific normalization
    clip_mean = torch.tensor((0.48145466, 0.4578275, 0.40821073), device=images.device).view(1, 3, 1, 1)
    clip_std = torch.tensor((0.26862954, 0.26130258, 0.27577711), device=images.device).view(1, 3, 1, 1)
    normalized_images = (images - clip_mean) / clip_std
    
    with torch.no_grad():
        image_features = clip_model.encode_image(normalized_images)
        image_features = image_features / image_features.norm(dim=1, keepdim=True)
    return image_features

def compute_text_embeddings(clip_model, texts):
    """Compute CLIP text embeddings."""
    with torch.no_grad():
        text_features = clip_model.encode_text(texts)
        text_features = text_features / text_features.norm(dim=1, keepdim=True)
    return text_features

def image_caption_encoding(image_probs, caption_probs, K=5, xi=0.08, epsilon=1e-12):
    """
    Implement ICE method as described in the paper.
    
    Args:
        image_probs: Probability distribution from image embeddings
        caption_probs: Probability distribution from caption embeddings
        K: Number of top classes to consider
        xi: Scaling factor for lambda
        epsilon: Small constant to avoid division by zero
        
    Returns:
        Final probability distribution after ICE
    """
    # Find top K classes based on image probabilities
    topk_values, topk_indices = torch.topk(image_probs, k=K, dim=1)
    
    # Extract probabilities for top K classes
    image_topk_probs = torch.gather(image_probs, 1, topk_indices)
    caption_topk_probs = torch.gather(caption_probs, 1, topk_indices)
    
    # Compute standard deviation for confidence selection
    image_std = torch.std(image_topk_probs, dim=1, keepdim=True)
    caption_std = torch.std(caption_topk_probs, dim=1, keepdim=True)
    
    # Compute lambda based on equation 3 in the paper
    norm = torch.maximum(torch.norm(torch.cat([image_std, caption_std], dim=1), dim=1, keepdim=True), 
                         torch.tensor(epsilon, device=device))
    
    lambda_values = xi * (caption_std / norm)
    
    # Combine image and caption probabilities
    combined_topk_probs = image_topk_probs + lambda_values * caption_topk_probs
    
    # Create new probability distribution
    output_probs = torch.zeros_like(image_probs)
    output_probs.scatter_(1, topk_indices, combined_topk_probs)
    
    return output_probs

def evaluate_model(clip_model, blip_model, blip_processor, val_loader, classnames, caption_prompts):
    """Evaluate the model on the validation set."""
    text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in classnames]).to(device)
    text_features = compute_text_embeddings(clip_model, text_inputs)
    
    total = 0
    correct_base = 0
    correct_ice = 0
    
    # Print sample outputs for the first few batches only
    print_samples = 200
    sample_count = 0
    
    for batch_idx, (images, labels) in enumerate(tqdm(val_loader)):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = images.shape[0]
        total += batch_size
        
        # Compute image embeddings and probabilities
        image_features = compute_image_embeddings(clip_model, images)
        image_logits = 100.0 * image_features @ text_features.T
        image_probs = F.softmax(image_logits, dim=1)
        
        # Generate captions using BLIP
        batch_captions = generate_captions(blip_model, blip_processor, images, caption_prompts)
        
        # Process captions
        caption_features_list = []
        for prompt_captions in batch_captions:
            caption_tokens = clip.tokenize(prompt_captions).to(device)
            caption_features = compute_text_embeddings(clip_model, caption_tokens)
            caption_features_list.append(caption_features)
        
        # Compute centroid of caption embeddings
        caption_features = torch.stack(caption_features_list).mean(dim=0)
        
        # Compute caption probabilities
        caption_logits = 100.0 * caption_features @ text_features.T
        caption_probs = F.softmax(caption_logits, dim=1)
        
        # Apply ICE
        ice_probs = image_caption_encoding(image_probs, caption_probs)
        
        # Compute predictions
        _, base_preds = torch.max(image_probs, 1)
        _, ice_preds = torch.max(ice_probs, 1)
        
        # Update metrics
        correct_base += (base_preds == labels).sum().item()
        correct_ice += (ice_preds == labels).sum().item()
        
        # Print examples for debugging
        if batch_idx < print_samples:
            print(f"\n--- Batch {batch_idx} examples ---")
            # Print top 5 examples from the batch
            for i in range(min(5, batch_size)):
                true_label = labels[i].item()
                base_pred = base_preds[i].item()
                ice_pred = ice_preds[i].item()
                
                print(f"\nImage {sample_count + i}:")
                print(f"  True class: {classnames[true_label]}")
                print(f"  CLIP prediction: {classnames[base_pred]}")
                print(f"  ICE prediction: {classnames[ice_pred]}")
                
                # Print captions from all prompts
                print("  Captions:")
                for j, prompt in enumerate(caption_prompts):
                    print(f"    {prompt}: {batch_captions[j][i]}")
                
                # Print confidence scores for predictions
                base_conf = image_probs[i, base_pred].item() * 100
                ice_conf = ice_probs[i, ice_pred].item() * 100
                print(f"  CLIP confidence: {base_conf:.2f}%")
                print(f"  ICE confidence: {ice_conf:.2f}%")
            
            sample_count += batch_size
    
    base_acc = 100 * correct_base / total
    ice_acc = 100 * correct_ice / total
    
    return base_acc, ice_acc

def main():
    tiny_imagenet_path = '/kaggle/input/tiny-imagenet/tiny-imagenet-200'

    
    # Prepare datasets
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
    
    train_dataset = TinyImageNet(tiny_imagenet_path, split='train', transform=train_transform)
    val_dataset = TinyImageNet(tiny_imagenet_path, split='val', transform=val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
        
    # Load models
    clip_model, _ = clip.load("ViT-B/32", device=device)
    blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
    classnames = get_classnames(val_dataset, '/kaggle/input/tiny-imagenet/tiny-imagenet-200/words.txt')

    # Fine-tune models
    # print("Fine-tuning CLIP...")
    # clip_model = fine_tune_clip(clip_model, train_loader, val_loader, classnames)
    
    # Evaluate with fine-tuned models
    caption_prompts = ["a photo of", "a photo containing", "the object in the photo is"]
    
    print("\nEvaluating with fine-tuned models...")
    base_acc, ice_acc = evaluate_model(
        clip_model, blip_model, blip_processor, val_loader, classnames, caption_prompts
    )
    
    print(f"Base CLIP Accuracy: {base_acc:.2f}%")
    print(f"ICE Accuracy: {ice_acc:.2f}%")
    print(f"Improvement: {ice_acc - base_acc:.2f}%")
if __name__ == "__main__":
    main()

Using device: cuda

Evaluating with fine-tuned models...


  0%|          | 0/157 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling 


--- Batch 0 examples ---

Image 0:
  True class: go-kart
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a group of people sitting on the ground
    a photo containing: a photo containing of the scene of the attack in the area of the school in the town of kaloa
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 90.38%
  ICE confidence: 90.38%

Image 1:
  True class: reel
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a sheep
    a photo containing: a photo containing a sheep
    the object in the photo is: the object in the photo is a sheep
  CLIP confidence: 49.61%
  ICE confidence: 49.61%

Image 2:
  True class: refrigerator
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a city with a clock
    a photo containing: a photo containing the city of a city
    the object in the photo is: the object in the photo is a picture

  1%|▏         | 2/157 [00:54<1:09:37, 26.95s/it]


--- Batch 1 examples ---

Image 64:
  True class: fountain
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of the fountain at night
    a photo containing: a photo containing the light show at the national mall in washington, dc
    the object in the photo is: the object in the photo is a jet
  CLIP confidence: 36.69%
  ICE confidence: 36.72%

Image 65:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a field with trees and a building in the background
    a photo containing: a photo containing of the site of the proposed new school
    the object in the photo is: the object in the photo is a large tree
  CLIP confidence: 65.33%
  ICE confidence: 65.33%

Image 66:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a tent with people standing around it
    a photo containing: a photo containing of the tent at 

  2%|▏         | 3/157 [01:21<1:09:50, 27.21s/it]


--- Batch 2 examples ---

Image 128:
  True class: reel
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a man fishing on a lake
    a photo containing: a photo containing of a man fishing
    the object in the photo is: the object in the photo is a fish
  CLIP confidence: 17.11%
  ICE confidence: 24.29%

Image 129:
  True class: brass
  CLIP prediction: cannon
  ICE prediction: cannon
  Captions:
    a photo of: a photo of a cat in the woods
    a photo containing: a photo containing the image of a white cat
    the object in the photo is: the object in the photo is a white cat
  CLIP confidence: 59.18%
  ICE confidence: 59.18%

Image 130:
  True class: bullfrog
  CLIP prediction: tailed frog
  ICE prediction: tailed frog
  Captions:
    a photo of: a photo of a lizard on a rock
    a photo containing: a photo containing of a frog
    the object in the photo is: the object in the photo is a frog
  CLIP confidence: 60.74%
  ICE confidence: 

  3%|▎         | 4/157 [01:50<1:11:01, 27.85s/it]


--- Batch 3 examples ---

Image 192:
  True class: black widow
  CLIP prediction: black widow
  ICE prediction: black widow
  Captions:
    a photo of: a photo of a spider on a piece of wood
    a photo containing: a photo containing the spider
    the object in the photo is: the object in the photo is a spider
  CLIP confidence: 90.48%
  ICE confidence: 90.53%

Image 193:
  True class: sombrero
  CLIP prediction: sombrero
  ICE prediction: sombrero
  Captions:
    a photo of: a photo of two people posing for a picture
    a photo containing: a photo containing of a family
    the object in the photo is: the object in the photo is a man with a hat
  CLIP confidence: 66.11%
  ICE confidence: 66.11%

Image 194:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a candle
    a photo containing: a photo containing the candle
    the object in the photo is: the object in the photo is a candle
  CLIP confidence: 91.06%
  ICE confid

  3%|▎         | 5/157 [02:19<1:11:27, 28.21s/it]


--- Batch 4 examples ---

Image 256:
  True class: beach wagon
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a blue car driving on a bridge
    a photo containing: a photo containing the new ford edge
    the object in the photo is: the object in the photo is a bridge
  CLIP confidence: 66.85%
  ICE confidence: 66.85%

Image 257:
  True class: lion
  CLIP prediction: lion
  ICE prediction: lion
  Captions:
    a photo of: a photo of a lion in the wild
    a photo containing: a photo containing of a lion
    the object in the photo is: the object in the photo is a lion
  CLIP confidence: 77.15%
  ICE confidence: 83.59%

Image 258:
  True class: freight car
  CLIP prediction: freight car
  ICE prediction: freight car
  Captions:
    a photo of: a photo of a train on the tracks
    a photo containing: a photo containing of the front of the building
    the object in the photo is: the object in the photo is a red object
  CLIP confidence: 

  4%|▍         | 6/157 [02:46<1:09:55, 27.79s/it]


--- Batch 5 examples ---

Image 320:
  True class: hourglass
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a hour hour hour hour hour hour hour hour hour hour hour hour hour hour hour hour hour
    a photo containing: a photo containing the image of a burning candle
    the object in the photo is: the object in the photo is a candle
  CLIP confidence: 44.24%
  ICE confidence: 44.26%

Image 321:
  True class: sandal
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a light fixture
    a photo containing: a photo containing the light bulb of a lightbulb
    the object in the photo is: the object in the photo is a candle
  CLIP confidence: 40.75%
  ICE confidence: 40.75%

Image 322:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a train on the tracks
    a photo containing: a photo containing of a train at a station
    the ob

  4%|▍         | 7/157 [03:14<1:10:03, 28.03s/it]


--- Batch 6 examples ---

Image 384:
  True class: jellyfish
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a guitar with a blue background
    a photo containing: a photo containing the sun and a rocket
    the object in the photo is: the object in the photo is a rocket
  CLIP confidence: 52.93%
  ICE confidence: 52.93%

Image 385:
  True class: orangutan
  CLIP prediction: orangutan
  ICE prediction: orangutan
  Captions:
    a photo of: a photo of a baby gorilla in the wild
    a photo containing: a photo containing the video of a gorilla
    the object in the photo is: the object in the photo is a gorilla
  CLIP confidence: 99.32%
  ICE confidence: 100.59%

Image 386:
  True class: water jug
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a group of baskets
    a photo containing: a photo containing of a set of three baskets
    the object in the photo is: the object in the photo is a 

  5%|▌         | 8/157 [03:42<1:08:58, 27.78s/it]


--- Batch 7 examples ---

Image 448:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a man in a boat
    a photo containing: a photo containing the image of a man in a boat
    the object in the photo is: the object in the photo is a large rock
  CLIP confidence: 93.65%
  ICE confidence: 93.65%

Image 449:
  True class: snorkel
  CLIP prediction: snorkel
  ICE prediction: snorkel
  Captions:
    a photo of: a photo of a small boat in the water
    a photo containing: a photo containing with the image of a ufo - like object
    the object in the photo is: the object in the photo is a small white object
  CLIP confidence: 44.48%
  ICE confidence: 44.48%

Image 450:
  True class: desk
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a bed with a red and blue blanket
    a photo containing: a photo containing the image of a man in a suit
    the object in the

  6%|▌         | 9/157 [04:10<1:09:07, 28.02s/it]


--- Batch 8 examples ---

Image 512:
  True class: beacon
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a lighthouse on the beach
    a photo containing: a photo containing the lighthouse
    the object in the photo is: the object in the photo is a lighthouse
  CLIP confidence: 74.37%
  ICE confidence: 75.44%

Image 513:
  True class: cliff
  CLIP prediction: swimming trunks
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a dog on the beach
    a photo containing: a photo containing of a dog on the beach
    the object in the photo is: the object in the photo is a bear
  CLIP confidence: 18.36%
  ICE confidence: 20.24%

Image 514:
  True class: scorpion
  CLIP prediction: scorpion
  ICE prediction: scorpion
  Captions:
    a photo of: a photo of a small lizard on a white surface
    a photo containing: a photo containing with the image of a lizard
    the object in the photo is: the object in the photo is a small lizard
  C

  6%|▋         | 10/157 [04:38<1:08:21, 27.90s/it]


--- Batch 9 examples ---

Image 576:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly on a leaf
    a photo containing: a photo containing the monarch butterfly
    the object in the photo is: the object in the photo is a butterfly
  CLIP confidence: 98.63%
  ICE confidence: 104.10%

Image 577:
  True class: pay-phone
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a parking meter in a city
    a photo containing: a photo containing the location of the new parking meter
    the object in the photo is: the object in the photo is a phone
  CLIP confidence: 90.97%
  ICE confidence: 90.97%

Image 578:
  True class: pay-phone
  CLIP prediction: remote control
  ICE prediction: remote control
  Captions:
    a photo of: a photo of a group of socks with different colors
    a photo containing: a photo containing the christmas socks
    the object in the photo is: the o

  7%|▋         | 11/157 [05:05<1:07:30, 27.74s/it]


--- Batch 10 examples ---

Image 640:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police van parked in a street
    a photo containing: a photo containing of the police van
    the object in the photo is: the object in the photo is a police van
  CLIP confidence: 99.76%
  ICE confidence: 105.47%

Image 641:
  True class: thatch
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a house in the middle of a field
    a photo containing: a photo containing the site of the first battle of the alam
    the object in the photo is: the object in the photo is a house
  CLIP confidence: 85.40%
  ICE confidence: 85.40%

Image 642:
  True class: walking stick
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a man on a skateboard
    a photo containing: a photo containing the image of a man on a skateboard
    the object in the photo is: t

  8%|▊         | 12/157 [05:33<1:07:18, 27.85s/it]


--- Batch 11 examples ---

Image 704:
  True class: boa constrictor
  CLIP prediction: boa constrictor
  ICE prediction: boa constrictor
  Captions:
    a photo of: a photo of a bird flying in the night sky
    a photo containing: a photo containing the image of a bird
    the object in the photo is: the object in the photo is a bird
  CLIP confidence: 92.63%
  ICE confidence: 92.63%

Image 705:
  True class: refrigerator
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a glass door with a sign
    a photo containing: a photo containing a glass door
    the object in the photo is: the object in the photo is a red object
  CLIP confidence: 88.62%
  ICE confidence: 88.62%

Image 706:
  True class: tabby
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a cat sitting on a table
    a photo containing: a photo containing of a cat in a library
    the object in the photo is: the object 

  8%|▊         | 13/157 [06:01<1:06:58, 27.91s/it]


--- Batch 12 examples ---

Image 768:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police car driving down a road
    a photo containing: a photo containing of a police car
    the object in the photo is: the object in the photo is a police car
  CLIP confidence: 83.11%
  ICE confidence: 88.92%

Image 769:
  True class: sea slug
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a coral with a red coral in the background
    a photo containing: a photo containing with the image of a coral
    the object in the photo is: the object in the photo is a red ball
  CLIP confidence: 62.89%
  ICE confidence: 62.89%

Image 770:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a group of people sitting around a table
    a photo containing: a photo containing the people at a dinner table
    the object in

  9%|▉         | 14/157 [06:29<1:06:29, 27.90s/it]


--- Batch 13 examples ---

Image 832:
  True class: teapot
  CLIP prediction: teapot
  ICE prediction: teapot
  Captions:
    a photo of: a photo of a green vase on a table
    a photo containing: a photo containing of a green glass vase
    the object in the photo is: the object in the photo is a green vase
  CLIP confidence: 88.67%
  ICE confidence: 88.67%

Image 833:
  True class: bucket
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a cup of coffee
    a photo containing: a photo containing the logo of the university of texas
    the object in the photo is: the object in the photo is a cup
  CLIP confidence: 76.12%
  ICE confidence: 76.12%

Image 834:
  True class: dam
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a bridge over a river
    a photo containing: a photo containing the image of a bridge over a river
    the object in the photo is: the object in the photo is a boat
  CLIP confidence: 76.32

 10%|▉         | 15/157 [06:56<1:05:10, 27.54s/it]


--- Batch 14 examples ---

Image 896:
  True class: American alligator
  CLIP prediction: bullfrog
  ICE prediction: bullfrog
  Captions:
    a photo of: a photo of a small insect on the ground
    a photo containing: a photo containing of a black beetle
    the object in the photo is: the object in the photo is a small insect
  CLIP confidence: 65.77%
  ICE confidence: 65.77%

Image 897:
  True class: standard poodle
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog sitting in the grass
    a photo containing: a photo containing of a poo puppy
    the object in the photo is: the object in the photo is a brown poodle
  CLIP confidence: 19.85%
  ICE confidence: 19.85%

Image 898:
  True class: comic book
  CLIP prediction: basketball
  ICE prediction: basketball
  Captions:
    a photo of: a photo of a woman in a pink shirt
    a photo containing: a photo containing the cover of the album ' the girl '
    the object in the

 10%|█         | 16/157 [07:24<1:04:57, 27.64s/it]


--- Batch 15 examples ---

Image 960:
  True class: ice lolly
  CLIP prediction: ice lolly
  ICE prediction: ice lolly
  Captions:
    a photo of: a photo of a baby
    a photo containing: a photo containing of a baby in a high chair
    the object in the photo is: the object in the photo is a baby
  CLIP confidence: 53.42%
  ICE confidence: 53.42%

Image 961:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a water tower in the middle of a lake
    a photo containing: a photo containing the water tower
    the object in the photo is: the object in the photo is a water tower
  CLIP confidence: 79.49%
  ICE confidence: 85.84%

Image 962:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a bird flying over a body of water
    a photo containing: a photo containing of a goose on the shore of a lake
    the object in the photo is: the object in the photo i

 11%|█         | 17/157 [07:52<1:04:58, 27.84s/it]


--- Batch 16 examples ---

Image 1024:
  True class: candle
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a man sitting at a table with a cake
    a photo containing: a photo containing of a man sitting at a table with a cake
    the object in the photo is: the object in the photo is a cake
  CLIP confidence: 23.69%
  ICE confidence: 23.80%

Image 1025:
  True class: frying pan
  CLIP prediction: frying pan
  ICE prediction: frying pan
  Captions:
    a photo of: a photo of a fried egg in a pan
    a photo containing: a photo containing the egg
    the object in the photo is: the object in the photo is a fried egg
  CLIP confidence: 86.38%
  ICE confidence: 88.48%

Image 1026:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a baked pie
    a photo containing: a photo containing the recipe of the baked corn corn corn corn corn corn corn corn corn corn corn corn corn 

 11%|█▏        | 18/157 [08:20<1:04:49, 27.98s/it]


--- Batch 17 examples ---

Image 1088:
  True class: trilobite
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a rock in the ocean
    a photo containing: a photo containing with the image of a gold nugg
    the object in the photo is: the object in the photo is a large rock
  CLIP confidence: 64.11%
  ICE confidence: 64.11%

Image 1089:
  True class: pomegranate
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a red flower in the grass
    a photo containing: a photo containing the red ball
    the object in the photo is: the object in the photo is a red ball
  CLIP confidence: 91.60%
  ICE confidence: 91.60%

Image 1090:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of three plastic beaks
    a photo containing: a photo containing with a phosphern
    the object in the photo is: the object in the photo is a plastic cup
  CLIP confid

 12%|█▏        | 19/157 [08:48<1:03:43, 27.71s/it]


--- Batch 18 examples ---

Image 1152:
  True class: teddy
  CLIP prediction: teddy
  ICE prediction: teddy
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing the image of a man with a dog
    the object in the photo is: the object in the photo is a teddy bear
  CLIP confidence: 21.96%
  ICE confidence: 29.30%

Image 1153:
  True class: cardigan
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing of a man in a suit
    the object in the photo is: the object in the photo is a man with a hat and a jacket
  CLIP confidence: 63.92%
  ICE confidence: 63.92%

Image 1154:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a sewing machine
    a photo containing: a photo containing of a sewing machine
    the object in the photo is: the object in th

 13%|█▎        | 20/157 [09:15<1:03:17, 27.72s/it]


--- Batch 19 examples ---

Image 1216:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man punching a punching punching punching punching
    a photo containing: a photo containing the image of a man punching a punching punching
    the object in the photo is: the object in the photo is a punching punching punching
  CLIP confidence: 73.68%
  ICE confidence: 75.59%

Image 1217:
  True class: lion
  CLIP prediction: lion
  ICE prediction: lion
  Captions:
    a photo of: a photo of a lion laying on the ground
    a photo containing: a photo containing of a lion
    the object in the photo is: the object in the photo is a lion
  CLIP confidence: 88.18%
  ICE confidence: 94.24%

Image 1218:
  True class: brain coral
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a fish swimming in the ocean
    a photo containing: a photo containing from the new book, 

 13%|█▎        | 21/157 [09:43<1:03:07, 27.85s/it]


--- Batch 20 examples ---

Image 1280:
  True class: pill bottle
  CLIP prediction: pill bottle
  ICE prediction: pill bottle
  Captions:
    a photo of: a photo of a person laying on the floor
    a photo containing: a photo containing a person ' s hand and a bottle of pills
    the object in the photo is: the object in the photo is a bottle of pills
  CLIP confidence: 99.37%
  ICE confidence: 99.66%

Image 1281:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a plate of food with a fork
    a photo containing: a photo containing of a plate of food
    the object in the photo is: the object in the photo is a fish
  CLIP confidence: 70.80%
  ICE confidence: 70.80%

Image 1282:
  True class: refrigerator
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a refrigerator and a refrigerator freezer
    a photo containing: a photo containing of a refrigerator and a refrigerator

 14%|█▍        | 22/157 [10:12<1:03:05, 28.04s/it]


--- Batch 21 examples ---

Image 1344:
  True class: tarantula
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a mountain with a blue sky
    a photo containing: a photo containing the site of the site of the first bomb
    the object in the photo is: the object in the photo is a small boat
  CLIP confidence: 26.42%
  ICE confidence: 26.42%

Image 1345:
  True class: confectionery
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a gold bar top
    a photo containing: a photo containing the gold and silver found in the gold mine
    the object in the photo is: the object in the photo is a gold leaf
  CLIP confidence: 95.26%
  ICE confidence: 95.51%

Image 1346:
  True class: lawn mower
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a field with sheep and a man
    a photo containing: a photo containing the image of a man in a field of sheep

 15%|█▍        | 23/157 [10:39<1:02:06, 27.81s/it]


--- Batch 22 examples ---

Image 1408:
  True class: sea slug
  CLIP prediction: slug
  ICE prediction: slug
  Captions:
    a photo of: a photo of a snake with its mouth open
    a photo containing: a photo containing with the image of a snake
    the object in the photo is: the object in the photo is a snake
  CLIP confidence: 37.40%
  ICE confidence: 37.45%

Image 1409:
  True class: brown bear
  CLIP prediction: baboon
  ICE prediction: baboon
  Captions:
    a photo of: a photo of a bear on a rock
    a photo containing: a photo containing of a bear on a cliff
    the object in the photo is: the object in the photo is a bear
  CLIP confidence: 48.12%
  ICE confidence: 48.12%

Image 1410:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a plate of tomatoes and peppers
    a photo containing: a photo containing the image of a pepper
    the object in the photo is: the object in the photo is a bell bell bel

 15%|█▌        | 24/157 [11:05<1:00:14, 27.18s/it]


--- Batch 23 examples ---

Image 1472:
  True class: Arabian camel
  CLIP prediction: Arabian camel
  ICE prediction: Arabian camel
  Captions:
    a photo of: a photo of a group of people riding camels in the desert
    a photo containing: a photo containing the camel race in the sahara desert
    the object in the photo is: the object in the photo is a camel
  CLIP confidence: 83.74%
  ICE confidence: 89.94%

Image 1473:
  True class: cannon
  CLIP prediction: flagpole
  ICE prediction: flagpole
  Captions:
    a photo of: a photo of a statue of a man with a flag
    a photo containing: a photo containing the canadian flag
    the object in the photo is: the object in the photo is a cannon
  CLIP confidence: 96.63%
  ICE confidence: 99.61%

Image 1474:
  True class: butcher shop
  CLIP prediction: butcher shop
  ICE prediction: butcher shop
  Captions:
    a photo of: a photo of a person holding a bunch of strawberries
    a photo containing: a photo containing of a red and white st

 16%|█▌        | 25/157 [11:32<59:41, 27.13s/it]  


--- Batch 24 examples ---

Image 1536:
  True class: barn
  CLIP prediction: barn
  ICE prediction: barn
  Captions:
    a photo of: a photo of a red barn in the snow
    a photo containing: a photo containing the red barn
    the object in the photo is: the object in the photo is a red barn
  CLIP confidence: 93.46%
  ICE confidence: 99.32%

Image 1537:
  True class: nail
  CLIP prediction: abacus
  ICE prediction: abacus
  Captions:
    a photo of: a photo of a gold ring
    a photo containing: a photo containing the gold ring
    the object in the photo is: the object in the photo is a gold ring
  CLIP confidence: 44.95%
  ICE confidence: 44.95%

Image 1538:
  True class: trilobite
  CLIP prediction: trilobite
  ICE prediction: trilobite
  Captions:
    a photo of: a photo of a man in a suit
    a photo containing: a photo containing of a man in a suit
    the object in the photo is: the object in the photo is a black object
  CLIP confidence: 56.93%
  ICE confidence: 56.93%

Image

 17%|█▋        | 26/157 [12:00<1:00:03, 27.51s/it]


--- Batch 25 examples ---

Image 1600:
  True class: cliff dwelling
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing the image of a man in a suit
    the object in the photo is: the object in the photo is a large rock
  CLIP confidence: 43.31%
  ICE confidence: 43.31%

Image 1601:
  True class: sewing machine
  CLIP prediction: chest
  ICE prediction: chest
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing from the video of the video of the death in the philippines
    the object in the photo is: the object in the photo is a small object
  CLIP confidence: 16.96%
  ICE confidence: 16.96%

Image 1602:
  True class: espresso
  CLIP prediction: espresso
  ICE prediction: espresso
  Captions:
    a photo of: a photo of a cup of coffee and a plate of food
    a photo containing: a photo containing of a cup of coffee


 17%|█▋        | 27/157 [12:28<59:30, 27.47s/it]  


--- Batch 26 examples ---

Image 1664:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a stadium with a large screen
    a photo containing: a photo containing the video of the game
    the object in the photo is: the object in the photo is a large screen
  CLIP confidence: 97.46%
  ICE confidence: 102.25%

Image 1665:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a van parked in a driveway
    a photo containing: a photo containing of a van parked in a driveway
    the object in the photo is: the object in the photo is a van
  CLIP confidence: 99.17%
  ICE confidence: 101.27%

Image 1666:
  True class: vestment
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a city at night
    a photo containing: a photo containing the cover of the album ' the last days '
    the object in the photo is: t

 18%|█▊        | 28/157 [12:57<1:00:02, 27.93s/it]


--- Batch 27 examples ---

Image 1728:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a pink dress with a green flower on it
    a photo containing: a photo containing with a green lizard
    the object in the photo is: the object in the photo is a green lizard
  CLIP confidence: 50.63%
  ICE confidence: 50.73%

Image 1729:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a swan
    a photo containing: a photo containing the image of a swan
    the object in the photo is: the object in the photo is a duck
  CLIP confidence: 56.88%
  ICE confidence: 63.82%

Image 1730:
  True class: stopwatch
  CLIP prediction: stopwatch
  ICE prediction: stopwatch
  Captions:
    a photo of: a photo of a person holding a clock
    a photo containing: a photo containing the image of a person holding a clock
    the object in the photo is: the object in the photo is a

 18%|█▊        | 29/157 [13:25<1:00:00, 28.13s/it]


--- Batch 28 examples ---

Image 1792:
  True class: tractor
  CLIP prediction: tractor
  ICE prediction: tractor
  Captions:
    a photo of: a photo of a tractor with a trailer behind it
    a photo containing: a photo containing the new tractor
    the object in the photo is: the object in the photo is a tractor
  CLIP confidence: 95.80%
  ICE confidence: 101.56%

Image 1793:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a woman in a police uniform
    a photo containing: a photo containing of a woman in a police uniform
    the object in the photo is: the object in the photo is a large yellow ball
  CLIP confidence: 71.92%
  ICE confidence: 71.92%

Image 1794:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a house in the mountains
    a photo containing: a photo containing the red rock house
    the object in t

 19%|█▉        | 30/157 [13:53<59:26, 28.08s/it]  


--- Batch 29 examples ---

Image 1856:
  True class: broom
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a group of people riding on a bike
    a photo containing: a photo containing of a group of people in a park
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 78.27%
  ICE confidence: 78.27%

Image 1857:
  True class: crane
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a sunset
    a photo containing: a photo containing a sunset
    the object in the photo is: the object in the photo is a ship
  CLIP confidence: 28.22%
  ICE confidence: 31.15%

Image 1858:
  True class: standard poodle
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of two white pup sitting on a table
    a photo containing: a photo containing of two white poodle pup
    the object in the photo is: the object in the photo is a 

 20%|█▉        | 31/157 [14:21<58:58, 28.08s/it]


--- Batch 30 examples ---

Image 1920:
  True class: teapot
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a red object on a table
    a photo containing: a photo containing the red plastic object
    the object in the photo is: the object in the photo is a red object
  CLIP confidence: 30.37%
  ICE confidence: 30.54%

Image 1921:
  True class: refrigerator
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing the video of the incident
    the object in the photo is: the object in the photo is a small white object
  CLIP confidence: 7.46%
  ICE confidence: 7.46%

Image 1922:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a bunch of oranges
    a photo containing: a photo containing of a lemon tree
    the object in the photo is: the object in t

 20%|██        | 32/157 [14:49<58:19, 28.00s/it]


--- Batch 31 examples ---

Image 1984:
  True class: frying pan
  CLIP prediction: frying pan
  ICE prediction: frying pan
  Captions:
    a photo of: a photo of a coffee maker with a cup of coffee
    a photo containing: a photo containing the two - way - to - go coffee maker
    the object in the photo is: the object in the photo is a small object
  CLIP confidence: 48.24%
  ICE confidence: 48.24%

Image 1985:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man with a punching punching punching punching punching punching punching punching punching punching punching punching punching punching
    a photo containing: a photo containing the new product, the new product, is shown in this image
    the object in the photo is: the object in the photo is a man with a baseball bat
  CLIP confidence: 60.99%
  ICE confidence: 60.99%

Image 1986:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction:

 21%|██        | 33/157 [15:16<56:56, 27.55s/it]


--- Batch 32 examples ---

Image 2048:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a yellow liquid being poured into a beakle
    a photo containing: a photo containing with a yellow liquid
    the object in the photo is: the object in the photo is a spoon and a piece of paper
  CLIP confidence: 41.21%
  ICE confidence: 41.21%

Image 2049:
  True class: rugby ball
  CLIP prediction: rugby ball
  ICE prediction: rugby ball
  Captions:
    a photo of: a photo of a person playing soccer
    a photo containing: a photo containing a woman playing soccer
    the object in the photo is: the object in the photo is a soccer ball
  CLIP confidence: 94.14%
  ICE confidence: 94.29%

Image 2050:
  True class: ice cream
  CLIP prediction: mashed potato
  ICE prediction: mashed potato
  Captions:
    a photo of: a photo of a plate of food with a fork
    a photo containing: a photo containing a plate of food
    the object in the pho

 22%|██▏       | 34/157 [15:43<56:23, 27.51s/it]


--- Batch 33 examples ---

Image 2112:
  True class: pay-phone
  CLIP prediction: Christmas stocking
  ICE prediction: Christmas stocking
  Captions:
    a photo of: a photo of a woman in a dress
    a photo containing: a photo containing of a woman in a red dress
    the object in the photo is: the object in the photo is a red and white bag
  CLIP confidence: 34.74%
  ICE confidence: 34.74%

Image 2113:
  True class: birdhouse
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a black and white cat
    a photo containing: a photo containing the image of a black and white cat
    the object in the photo is: the object in the photo is a black and white image
  CLIP confidence: 11.69%
  ICE confidence: 11.69%

Image 2114:
  True class: trolleybus
  CLIP prediction: trolleybus
  ICE prediction: trolleybus
  Captions:
    a photo of: a photo of a bus parked in a parking
    a photo containing: a photo containing of the bus crash scene
    the obj

 22%|██▏       | 35/157 [16:10<55:42, 27.40s/it]


--- Batch 34 examples ---

Image 2176:
  True class: butcher shop
  CLIP prediction: butcher shop
  ICE prediction: butcher shop
  Captions:
    a photo of: a photo of a bar with a neon sign
    a photo containing: a photo containing the video of the video of the death of the family of the late president
    the object in the photo is: the object in the photo is a red light
  CLIP confidence: 98.39%
  ICE confidence: 98.39%

Image 2177:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a dog running in the grass
    a photo containing: a photo containing of a dog chasing a bird
    the object in the photo is: the object in the photo is a bird
  CLIP confidence: 98.19%
  ICE confidence: 100.78%

Image 2178:
  True class: cliff
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a black and white cat
    a photo containing: a photo containing the image of a man in a cave
    t

 23%|██▎       | 36/157 [16:37<54:45, 27.15s/it]


--- Batch 35 examples ---

Image 2240:
  True class: grasshopper
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a pink flower
    a photo containing: a photo containing the pink flowers of the zinna
    the object in the photo is: the object in the photo is a pink flower
  CLIP confidence: 16.50%
  ICE confidence: 16.50%

Image 2241:
  True class: mashed potato
  CLIP prediction: plate
  ICE prediction: plate
  Captions:
    a photo of: a photo of a plate of food with a fork
    a photo containing: a photo containing of a plate of food
    the object in the photo is: the object in the photo is a plate of food
  CLIP confidence: 50.68%
  ICE confidence: 58.01%

Image 2242:
  True class: snail
  CLIP prediction: snail
  ICE prediction: snail
  Captions:
    a photo of: a photo of a snail on a leaf
    a photo containing: a photo containing with a snail
    the object in the photo is: the object in the photo is a snail
  CLIP confidence: 9

 24%|██▎       | 37/157 [17:04<54:21, 27.18s/it]


--- Batch 36 examples ---

Image 2304:
  True class: projectile
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a city with a lot of buildings
    a photo containing: a photo containing the proposed tower
    the object in the photo is: the object in the photo is a large white object
  CLIP confidence: 12.37%
  ICE confidence: 12.37%

Image 2305:
  True class: backpack
  CLIP prediction: backpack
  ICE prediction: backpack
  Captions:
    a photo of: a photo of a black bag with a red and white logo
    a photo containing: a photo containing the bag
    the object in the photo is: the object in the photo is a black bag
  CLIP confidence: 38.01%
  ICE confidence: 45.31%

Image 2306:
  True class: brass
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a large stone statue
    a photo containing: a photo containing the statue of the late king
    the object in the photo is: the object in the photo is 

 24%|██▍       | 38/157 [17:33<54:52, 27.67s/it]


--- Batch 37 examples ---

Image 2368:
  True class: picket fence
  CLIP prediction: picket fence
  ICE prediction: picket fence
  Captions:
    a photo of: a photo of a white picket fence
    a photo containing: a photo containing the white picket fence
    the object in the photo is: the object in the photo is a white picket fence
  CLIP confidence: 94.53%
  ICE confidence: 100.39%

Image 2369:
  True class: snail
  CLIP prediction: snail
  ICE prediction: snail
  Captions:
    a photo of: a photo of a squirrel walking on the road
    a photo containing: a photo containing of a squirrel
    the object in the photo is: the object in the photo is a ball of grass
  CLIP confidence: 57.28%
  ICE confidence: 58.84%

Image 2370:
  True class: Yorkshire terrier
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog
    a photo containing: a photo containing of a dog
    the object in the photo is: the object in the photo is a dog
 

 25%|██▍       | 39/157 [18:01<54:34, 27.75s/it]


--- Batch 38 examples ---

Image 2432:
  True class: coral reef
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a sea turtle swimming in the ocean
    a photo containing: a photo containing with the image of a sea turtle
    the object in the photo is: the object in the photo is a small fish
  CLIP confidence: 63.53%
  ICE confidence: 63.53%

Image 2433:
  True class: poncho
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a white shirt
    a photo containing: a photo containing the image of a man in a white shirt
    the object in the photo is: the object in the photo is a man
  CLIP confidence: 54.59%
  ICE confidence: 54.59%

Image 2434:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bison in the grass
    a photo containing: a photo containing of a bison
    the object in the photo is: the object in the photo is 

 25%|██▌       | 40/157 [18:28<53:33, 27.46s/it]


--- Batch 39 examples ---

Image 2496:
  True class: orange
  CLIP prediction: lemon
  ICE prediction: lemon
  Captions:
    a photo of: a photo of a lemon
    a photo containing: a photo containing a lemon
    the object in the photo is: the object in the photo is a lemon
  CLIP confidence: 85.69%
  ICE confidence: 91.85%

Image 2497:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a street with cars parked on it
    a photo containing: a photo containing the image of a car on a street
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 83.20%
  ICE confidence: 83.20%

Image 2498:
  True class: altar
  CLIP prediction: vestment
  ICE prediction: altar
  Captions:
    a photo of: a photo of a church with a cross in the middle
    a photo containing: a photo containing the interior of a church
    the object in the photo is: the object in the photo is a cross
  CLIP confiden

 26%|██▌       | 41/157 [18:55<53:10, 27.50s/it]


--- Batch 40 examples ---

Image 2560:
  True class: dragonfly
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a bug on a green leaf
    a photo containing: a photo containing the image of a dragonfly
    the object in the photo is: the object in the photo is a bug
  CLIP confidence: 85.64%
  ICE confidence: 86.96%

Image 2561:
  True class: boa constrictor
  CLIP prediction: Persian cat
  ICE prediction: Persian cat
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing of the site of the ancient tomb of the pharaoh
    the object in the photo is: the object in the photo is a white object
  CLIP confidence: 15.17%
  ICE confidence: 15.17%

Image 2562:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a water tower in the fog
    a photo containing: a photo containing the water tower in the fog
    the object in 

 27%|██▋       | 42/157 [19:22<52:23, 27.33s/it]


--- Batch 41 examples ---

Image 2624:
  True class: altar
  CLIP prediction: altar
  ICE prediction: altar
  Captions:
    a photo of: a photo of a christmas tree with candles
    a photo containing: a photo containing the christmas tree
    the object in the photo is: the object in the photo is a christmas tree
  CLIP confidence: 62.94%
  ICE confidence: 62.94%

Image 2625:
  True class: ladybug
  CLIP prediction: ladybug
  ICE prediction: ladybug
  Captions:
    a photo of: a photo of a yellow cake with a ladybug on top
    a photo containing: a photo containing the yellow cake
    the object in the photo is: the object in the photo is a yellow bird
  CLIP confidence: 87.99%
  ICE confidence: 87.99%

Image 2626:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a man in a blue shirt
    a photo containing: a photo containing with the image of a man in a diving suit
    the object in the photo is: the object

 27%|██▋       | 43/157 [19:50<52:30, 27.64s/it]


--- Batch 42 examples ---

Image 2688:
  True class: bison
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a bear that is laying down
    a photo containing: a photo containing of a bear that was found in the wild
    the object in the photo is: the object in the photo is a bear
  CLIP confidence: 27.15%
  ICE confidence: 27.15%

Image 2689:
  True class: basketball
  CLIP prediction: basketball
  ICE prediction: basketball
  Captions:
    a photo of: a photo of a basketball game with the ball in the air
    a photo containing: a photo containing the nba all - star ' s game against the nba all - star ' s team
    the object in the photo is: the object in the photo is a basketball player shooting the ball
  CLIP confidence: 80.62%
  ICE confidence: 86.82%

Image 2690:
  True class: picket fence
  CLIP prediction: picket fence
  ICE prediction: picket fence
  Captions:
    a photo of: a photo of a fence in the fog
    a photo containing: a photo contain

 28%|██▊       | 44/157 [20:19<52:18, 27.78s/it]


--- Batch 43 examples ---

Image 2752:
  True class: lifeboat
  CLIP prediction: lifeboat
  ICE prediction: lifeboat
  Captions:
    a photo of: a photo of a boat in the water
    a photo containing: a photo containing of a boat in the water
    the object in the photo is: the object in the photo is a boat
  CLIP confidence: 99.85%
  ICE confidence: 99.85%

Image 2753:
  True class: African elephant
  CLIP prediction: African elephant
  ICE prediction: African elephant
  Captions:
    a photo of: a photo of a small village with a small house
    a photo containing: a photo containing of a baby elephant in a village in the outskirts of the city of kash
    the object in the photo is: the object in the photo is a baby elephant
  CLIP confidence: 92.72%
  ICE confidence: 98.00%

Image 2754:
  True class: beer bottle
  CLIP prediction: beer bottle
  ICE prediction: beer bottle
  Captions:
    a photo of: a photo of a dog
    a photo containing: a photo containing of a dog
    the object i

 29%|██▊       | 45/157 [20:46<51:53, 27.80s/it]


--- Batch 44 examples ---

Image 2816:
  True class: torch
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a man holding a frc
    a photo containing: a photo containing a man holding a frc
    the object in the photo is: the object in the photo is a kite
  CLIP confidence: 47.73%
  ICE confidence: 47.73%

Image 2817:
  True class: golden retriever
  CLIP prediction: meat loaf
  ICE prediction: meat loaf
  Captions:
    a photo of: a photo of a dog playing with a toy
    a photo containing: a photo containing of a dog playing with a toy
    the object in the photo is: the object in the photo is a dog
  CLIP confidence: 15.97%
  ICE confidence: 15.97%

Image 2818:
  True class: bannister
  CLIP prediction: bannister
  ICE prediction: bannister
  Captions:
    a photo of: a photo of a building with a large window
    a photo containing: a photo containing with the image of the space shuttle
    the object in the photo is: the object in the photo i

 29%|██▉       | 46/157 [21:13<50:56, 27.53s/it]


--- Batch 45 examples ---

Image 2880:
  True class: brain coral
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a man in a suit
    a photo containing: a photo containing of a red - throated sea turtle
    the object in the photo is: the object in the photo is a small white object
  CLIP confidence: 58.64%
  ICE confidence: 58.64%

Image 2881:
  True class: gasmask
  CLIP prediction: neck brace
  ICE prediction: neck brace
  Captions:
    a photo of: a photo of a man with a blue hat
    a photo containing: a photo containing of a man with a blue mask
    the object in the photo is: the object in the photo is a blue object
  CLIP confidence: 40.62%
  ICE confidence: 40.62%

Image 2882:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a baseball field with a ball in the middle
    a photo containing: a photo containing the field at the baseball field
    the object in the p

 30%|██▉       | 47/157 [21:41<50:47, 27.70s/it]


--- Batch 46 examples ---

Image 2944:
  True class: espresso
  CLIP prediction: espresso
  ICE prediction: espresso
  Captions:
    a photo of: a photo of a cup of coffee on a table
    a photo containing: a photo containing of a cup of coffee
    the object in the photo is: the object in the photo is a cup of tea
  CLIP confidence: 99.80%
  ICE confidence: 103.52%

Image 2945:
  True class: bee
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of a spider on a flower
    a photo containing: a photo containing the image of a bee
    the object in the photo is: the object in the photo is a spider
  CLIP confidence: 27.76%
  ICE confidence: 35.30%

Image 2946:
  True class: goldfish
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a fire in the middle of a forest
    a photo containing: a photo containing the image of a fire
    the object in the photo is: the object in the photo is a fire
  CLIP confidence: 28

 31%|███       | 48/157 [22:09<50:08, 27.60s/it]


--- Batch 47 examples ---

Image 3008:
  True class: miniskirt
  CLIP prediction: miniskirt
  ICE prediction: miniskirt
  Captions:
    a photo of: a photo of a woman sitting on a chair
    a photo containing: a photo containing a woman sitting on a chair
    the object in the photo is: the object in the photo is a chair
  CLIP confidence: 89.70%
  ICE confidence: 89.70%

Image 3009:
  True class: bighorn
  CLIP prediction: Labrador retriever
  ICE prediction: Labrador retriever
  Captions:
    a photo of: a photo of a horse in a field
    a photo containing: a photo containing the image of a horse
    the object in the photo is: the object in the photo is a horse
  CLIP confidence: 32.25%
  ICE confidence: 32.25%

Image 3010:
  True class: space heater
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a red and white train
    a photo containing: a photo containing of a red and white train
    the object in the photo is: the object 

 31%|███       | 49/157 [22:37<49:49, 27.68s/it]


--- Batch 48 examples ---

Image 3072:
  True class: organ
  CLIP prediction: organ
  ICE prediction: organ
  Captions:
    a photo of: a photo of a woman in a white dress
    a photo containing: a photo containing the new york - area of the new york - area of the new york - area of the new york - area of the
    the object in the photo is: the object in the photo is a large, circular object
  CLIP confidence: 57.76%
  ICE confidence: 57.76%

Image 3073:
  True class: drumstick
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a man playing a drum
    a photo containing: a photo containing the drum lessons
    the object in the photo is: the object in the photo is a drum
  CLIP confidence: 25.42%
  ICE confidence: 25.81%

Image 3074:
  True class: black widow
  CLIP prediction: fly
  ICE prediction: fly
  Captions:
    a photo of: a photo of a spider crawling on the beach
    a photo containing: a photo containing of a spider on the beach
    th

 32%|███▏      | 50/157 [23:04<49:04, 27.51s/it]


--- Batch 49 examples ---

Image 3136:
  True class: mushroom
  CLIP prediction: mushroom
  ICE prediction: mushroom
  Captions:
    a photo of: a photo of a field with a small patch of grass
    a photo containing: a photo containing with the image of a man in a field
    the object in the photo is: the object in the photo is a small white object
  CLIP confidence: 10.80%
  ICE confidence: 10.80%

Image 3137:
  True class: basketball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a basketball game
    a photo containing: a photo containing the basketball game
    the object in the photo is: the object in the photo is a basketball player
  CLIP confidence: 90.92%
  ICE confidence: 90.97%

Image 3138:
  True class: rocking chair
  CLIP prediction: rocking chair
  ICE prediction: rocking chair
  Captions:
    a photo of: a photo of a chair with a clock on it
    a photo containing: a photo containing the new chair
    the object in the 

 32%|███▏      | 51/157 [23:32<49:03, 27.77s/it]


--- Batch 50 examples ---

Image 3200:
  True class: water jug
  CLIP prediction: Christmas stocking
  ICE prediction: Christmas stocking
  Captions:
    a photo of: a photo of a white and red flower
    a photo containing: a photo containing the cover of the book the secret of the secret garden
    the object in the photo is: the object in the photo is a white object with a red and yellow flower
  CLIP confidence: 18.73%
  ICE confidence: 18.73%

Image 3201:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a group of people walking through a forest
    a photo containing: a photo containing the video of the video of the video of the video of the video of the video
    the object in the photo is: the object in the photo is a large green tree
  CLIP confidence: 97.46%
  ICE confidence: 97.46%

Image 3202:
  True class: koala
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a wolf laying 

 33%|███▎      | 52/157 [23:59<48:19, 27.61s/it]


--- Batch 51 examples ---

Image 3264:
  True class: lakeside
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a field with a blue sky
    a photo containing: a photo containing the site of the proposed gold mine in the kimberley
    the object in the photo is: the object in the photo is a white dog
  CLIP confidence: 20.19%
  ICE confidence: 20.19%

Image 3265:
  True class: golden retriever
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog standing in the water
    a photo containing: a photo containing of a dog in the water
    the object in the photo is: the object in the photo is a brown dog
  CLIP confidence: 69.48%
  ICE confidence: 69.48%

Image 3266:
  True class: dugong
  CLIP prediction: dugong
  ICE prediction: dugong
  Captions:
    a photo of: a photo of a dolphin swimming in the ocean
    a photo containing: a photo containing of a dolphin swimming in the ocean
    the objec

 34%|███▍      | 53/157 [24:27<47:43, 27.53s/it]


--- Batch 52 examples ---

Image 3328:
  True class: bow tie
  CLIP prediction: neck brace
  ICE prediction: neck brace
  Captions:
    a photo of: a photo of a man in a green shirt
    a photo containing: a photo containing the image of a man in a green shirt
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 38.28%
  ICE confidence: 38.28%

Image 3329:
  True class: ox
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a black bear in a field
    a photo containing: a photo containing of a bear and a bear cub
    the object in the photo is: the object in the photo is a bear
  CLIP confidence: 43.73%
  ICE confidence: 43.77%

Image 3330:
  True class: Egyptian cat
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a black cat looking out the window
    a photo containing: a photo containing of a black cat
    the object in the photo is: the object in the photo is a

 34%|███▍      | 54/157 [24:54<47:08, 27.46s/it]


--- Batch 53 examples ---

Image 3392:
  True class: trilobite
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a brown substance
    a photo containing: a photo containing the texture of a sand dune
    the object in the photo is: the object in the photo is a large rock
  CLIP confidence: 16.54%
  ICE confidence: 16.54%

Image 3393:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a building with a large window
    a photo containing: a photo containing of the site of the ancient tomb of the pharaoh
    the object in the photo is: the object in the photo is a small boat
  CLIP confidence: 60.64%
  ICE confidence: 60.64%

Image 3394:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a street with a water tower in the background
    a photo containing: a photo containing the sky
    the

 35%|███▌      | 55/157 [25:22<46:52, 27.57s/it]


--- Batch 54 examples ---

Image 3456:
  True class: moving van
  CLIP prediction: moving van
  ICE prediction: moving van
  Captions:
    a photo of: a photo of a man walking in front of a truck
    a photo containing: a photo containing of a truck parked in a parking
    the object in the photo is: the object in the photo is a truck
  CLIP confidence: 77.29%
  ICE confidence: 81.10%

Image 3457:
  True class: albatross
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a white cat on a boat
    a photo containing: a photo containing of a polar bear on a boat
    the object in the photo is: the object in the photo is a white bird
  CLIP confidence: 99.07%
  ICE confidence: 99.07%

Image 3458:
  True class: stopwatch
  CLIP prediction: projectile
  ICE prediction: projectile
  Captions:
    a photo of: a photo of a man holding a blue and white object
    a photo containing: a photo containing a man holding a blue and white object
    the ob

 36%|███▌      | 56/157 [25:49<46:16, 27.49s/it]


--- Batch 55 examples ---

Image 3520:
  True class: Labrador retriever
  CLIP prediction: Labrador retriever
  ICE prediction: Labrador retriever
  Captions:
    a photo of: a photo of a dog on the beach
    a photo containing: a photo containing of a dog on the beach
    the object in the photo is: the object in the photo is a dog
  CLIP confidence: 35.13%
  ICE confidence: 35.13%

Image 3521:
  True class: jellyfish
  CLIP prediction: jellyfish
  ICE prediction: jellyfish
  Captions:
    a photo of: a photo of a large cloud in the sky
    a photo containing: a photo containing the image of a planet
    the object in the photo is: the object in the photo is a large white object
  CLIP confidence: 19.43%
  ICE confidence: 19.43%

Image 3522:
  True class: ox
  CLIP prediction: sombrero
  ICE prediction: sombrero
  Captions:
    a photo of: a photo of a group of people standing in front of a christmas tree
    a photo containing: a photo containing of the family of the missing man
   

 36%|███▋      | 57/157 [26:17<45:54, 27.54s/it]


--- Batch 56 examples ---

Image 3584:
  True class: kimono
  CLIP prediction: kimono
  ICE prediction: kimono
  Captions:
    a photo of: a photo of a woman in a pink dress
    a photo containing: a photo containing of a barbie doll
    the object in the photo is: the object in the photo is a pink dress
  CLIP confidence: 73.93%
  ICE confidence: 73.93%

Image 3585:
  True class: plate
  CLIP prediction: plate
  ICE prediction: plate
  Captions:
    a photo of: a photo of a bowl of food on a table
    a photo containing: a photo containing of a bowl of food
    the object in the photo is: the object in the photo is a bowl of food
  CLIP confidence: 46.14%
  ICE confidence: 52.98%

Image 3586:
  True class: water tower
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a field with a tree in the background
    a photo containing: a photo containing of a field with a tree in the background
    the object in the photo is: the object in the photo 

 37%|███▋      | 58/157 [26:45<45:36, 27.64s/it]


--- Batch 57 examples ---

Image 3648:
  True class: hog
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a bear drinking water from a pond
    a photo containing: a photo containing of a bear cub
    the object in the photo is: the object in the photo is a bear
  CLIP confidence: 83.98%
  ICE confidence: 83.98%

Image 3649:
  True class: pill bottle
  CLIP prediction: pill bottle
  ICE prediction: pill bottle
  Captions:
    a photo of: a photo of a yellow and white cup
    a photo containing: a photo containing the words ' i love you '
    the object in the photo is: the object in the photo is a canister
  CLIP confidence: 98.44%
  ICE confidence: 98.44%

Image 3650:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a woman in a dress
    a photo containing: a photo containing the image of a woman in a dress
    the object in the photo is: the object in the photo i

 38%|███▊      | 59/157 [27:13<45:19, 27.75s/it]


--- Batch 58 examples ---

Image 3712:
  True class: dumbbell
  CLIP prediction: dumbbell
  ICE prediction: dumbbell
  Captions:
    a photo of: a photo of a blue and white cat
    a photo containing: a photo containing of a blue and white diamond ring
    the object in the photo is: the object in the photo is a blue ball
  CLIP confidence: 44.09%
  ICE confidence: 44.09%

Image 3713:
  True class: trolleybus
  CLIP prediction: trolleybus
  ICE prediction: trolleybus
  Captions:
    a photo of: a photo of a bus driving down a street
    a photo containing: a photo containing of a bus on a city street
    the object in the photo is: the object in the photo is a bus
  CLIP confidence: 95.56%
  ICE confidence: 95.75%

Image 3714:
  True class: academic gown
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a group of people in a field
    a photo containing: a photo containing the image of the students in the school
    the object in the photo is

 38%|███▊      | 60/157 [27:39<44:21, 27.44s/it]


--- Batch 59 examples ---

Image 3776:
  True class: viaduct
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a lake with a boat in the water
    a photo containing: a photo containing the image of a lake
    the object in the photo is: the object in the photo is a boat
  CLIP confidence: 37.11%
  ICE confidence: 44.73%

Image 3777:
  True class: orangutan
  CLIP prediction: fur coat
  ICE prediction: fur coat
  Captions:
    a photo of: a photo of a cat in a cage
    a photo containing: a photo containing the image of a rat
    the object in the photo is: the object in the photo is a red and white object
  CLIP confidence: 23.50%
  ICE confidence: 23.50%

Image 3778:
  True class: convertible
  CLIP prediction: convertible
  ICE prediction: convertible
  Captions:
    a photo of: a photo of a car with a trailer in the background
    a photo containing: a photo containing of the body of a car that was found in the crash
    the object in th

 39%|███▉      | 61/157 [28:09<44:46, 27.99s/it]


--- Batch 60 examples ---

Image 3840:
  True class: alp
  CLIP prediction: alp
  ICE prediction: alp
  Captions:
    a photo of: a photo of a group of people walking in the snow
    a photo containing: a photo containing of the snow - covered mount everest
    the object in the photo is: the object in the photo is a snow - covered mountain
  CLIP confidence: 68.36%
  ICE confidence: 68.36%

Image 3841:
  True class: cardigan
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a woman in a white dress
    a photo containing: a photo containing of a woman in a white dress
    the object in the photo is: the object in the photo is a woman
  CLIP confidence: 17.93%
  ICE confidence: 17.93%

Image 3842:
  True class: limousine
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a person walking on a street
    a photo containing: a photo containing the image of a person walking on a street
    the object

 39%|███▉      | 62/157 [28:37<44:25, 28.06s/it]


--- Batch 61 examples ---

Image 3904:
  True class: barrel
  CLIP prediction: barrel
  ICE prediction: barrel
  Captions:
    a photo of: a photo of a group of people in a crowd
    a photo containing: a photo containing the image of a group of people in a crowd
    the object in the photo is: the object in the photo is a red object
  CLIP confidence: 39.04%
  ICE confidence: 39.04%

Image 3905:
  True class: school bus
  CLIP prediction: school bus
  ICE prediction: school bus
  Captions:
    a photo of: a photo of a large truck with a man on it
    a photo containing: a photo containing of a large yellow truck
    the object in the photo is: the object in the photo is a large orange truck
  CLIP confidence: 25.59%
  ICE confidence: 32.54%

Image 3906:
  True class: space heater
  CLIP prediction: space heater
  ICE prediction: space heater
  Captions:
    a photo of: a photo of a cube with a small square on top
    a photo containing: a photo containing the gold cube
    the object

 40%|████      | 63/157 [29:04<43:17, 27.63s/it]


--- Batch 62 examples ---

Image 3968:
  True class: albatross
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a bird in the water
    a photo containing: a photo containing the image of a seagul
    the object in the photo is: the object in the photo is a seagul
  CLIP confidence: 99.85%
  ICE confidence: 99.85%

Image 3969:
  True class: barn
  CLIP prediction: alp
  ICE prediction: alp
  Captions:
    a photo of: a photo of a city at night
    a photo containing: a photo containing the night sky
    the object in the photo is: the object in the photo is a mountain
  CLIP confidence: 27.88%
  ICE confidence: 27.88%

Image 3970:
  True class: seashore
  CLIP prediction: obelisk
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a lighthouse on a rocky shore
    a photo containing: a photo containing the ocean
    the object in the photo is: the object in the photo is a lighthouse
  CLIP confidence: 27.64%
  ICE confidence

 41%|████      | 64/157 [29:32<43:06, 27.81s/it]


--- Batch 63 examples ---

Image 4032:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a white van driving down a street
    a photo containing: a photo containing of a van on a road
    the object in the photo is: the object in the photo is a white van
  CLIP confidence: 57.32%
  ICE confidence: 58.98%

Image 4033:
  True class: spiny lobster
  CLIP prediction: spiny lobster
  ICE prediction: spiny lobster
  Captions:
    a photo of: a photo of a green and yellow field
    a photo containing: a photo containing the image of the city of london
    the object in the photo is: the object in the photo is a green and yellow object
  CLIP confidence: 15.21%
  ICE confidence: 15.21%

Image 4034:
  True class: teddy
  CLIP prediction: teddy
  ICE prediction: teddy
  Captions:
    a photo of: a photo of a teddy bear sitting on a blue blanket
    a photo containing: a photo containing of a teddy bear
    the object in t

 41%|████▏     | 65/157 [29:59<42:27, 27.69s/it]


--- Batch 64 examples ---

Image 4096:
  True class: broom
  CLIP prediction: broom
  ICE prediction: broom
  Captions:
    a photo of: a photo of a mouse mouse with a red nose
    a photo containing: a photo containing the image of a mouse mouse
    the object in the photo is: the object in the photo is a wooden stick with a red and yellow flower on it
  CLIP confidence: 91.50%
  ICE confidence: 91.50%

Image 4097:
  True class: projectile
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a small hole in the ground
    a photo containing: a photo containing the image of a yellow - spotted object
    the object in the photo is: the object in the photo is a small hole
  CLIP confidence: 14.14%
  ICE confidence: 14.14%

Image 4098:
  True class: meat loaf
  CLIP prediction: meat loaf
  ICE prediction: meat loaf
  Captions:
    a photo of: a photo of a kitchen with a cake on the counter
    a photo containing: a photo containing the kitch

 42%|████▏     | 66/157 [30:27<42:03, 27.73s/it]


--- Batch 65 examples ---

Image 4160:
  True class: Labrador retriever
  CLIP prediction: Labrador retriever
  ICE prediction: Labrador retriever
  Captions:
    a photo of: a photo of a white dog sitting in the grass
    a photo containing: a photo containing of a white labie puppy
    the object in the photo is: the object in the photo is a white dog
  CLIP confidence: 80.81%
  ICE confidence: 80.81%

Image 4161:
  True class: sea slug
  CLIP prediction: spiny lobster
  ICE prediction: spiny lobster
  Captions:
    a photo of: a photo of a small white flower
    a photo containing: a photo containing the image of a star
    the object in the photo is: the object in the photo is a small white object
  CLIP confidence: 64.75%
  ICE confidence: 64.75%

Image 4162:
  True class: frying pan
  CLIP prediction: wok
  ICE prediction: frying pan
  Captions:
    a photo of: a photo of a frying pan with a frying pan
    a photo containing: a photo containing the image of a frying pan of fried

 43%|████▎     | 67/157 [30:54<41:03, 27.37s/it]


--- Batch 66 examples ---

Image 4224:
  True class: black stork
  CLIP prediction: black stork
  ICE prediction: black stork
  Captions:
    a photo of: a photo of an eagle flying in the sky
    a photo containing: a photo containing the image of an eagle soaring through the sky
    the object in the photo is: the object in the photo is a bird
  CLIP confidence: 54.69%
  ICE confidence: 54.69%

Image 4225:
  True class: lemon
  CLIP prediction: orange
  ICE prediction: orange
  Captions:
    a photo of: a photo of a slice of orange
    a photo containing: a photo containing the orange juice
    the object in the photo is: the object in the photo is a lemon
  CLIP confidence: 33.11%
  ICE confidence: 40.62%

Image 4226:
  True class: sunglasses
  CLIP prediction: sunglasses
  ICE prediction: sunglasses
  Captions:
    a photo of: a photo of a little girl in sunglasses
    a photo containing: a photo containing of a baby girl
    the object in the photo is: the object in the photo is a

 43%|████▎     | 68/157 [31:21<40:38, 27.40s/it]


--- Batch 67 examples ---

Image 4288:
  True class: umbrella
  CLIP prediction: trolleybus
  ICE prediction: trolleybus
  Captions:
    a photo of: a photo of a fire truck and a fire truck
    a photo containing: a photo containing of the scene in the scene
    the object in the photo is: the object in the photo is a red object
  CLIP confidence: 22.88%
  ICE confidence: 22.88%

Image 4289:
  True class: freight car
  CLIP prediction: freight car
  ICE prediction: freight car
  Captions:
    a photo of: a photo of a boat in the water
    a photo containing: a photo containing the ship ' s name
    the object in the photo is: the object in the photo is a boat
  CLIP confidence: 68.99%
  ICE confidence: 68.99%

Image 4290:
  True class: cliff
  CLIP prediction: cliff
  ICE prediction: cliff
  Captions:
    a photo of: a photo of a mountain with a blue sky
    a photo containing: a photo containing the image of a mountain
    the object in the photo is: the object in the photo is a rock

 44%|████▍     | 69/157 [31:48<40:03, 27.31s/it]


--- Batch 68 examples ---

Image 4352:
  True class: golden retriever
  CLIP prediction: Labrador retriever
  ICE prediction: golden retriever
  Captions:
    a photo of: a photo of a dog laying on the ground
    a photo containing: a photo containing of a golden retrieve
    the object in the photo is: the object in the photo is a golden retrieve
  CLIP confidence: 41.82%
  ICE confidence: 45.39%

Image 4353:
  True class: butcher shop
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a boat with flowers on it
    a photo containing: a photo containing the image of a boat full of flowers
    the object in the photo is: the object in the photo is a boat
  CLIP confidence: 17.55%
  ICE confidence: 17.61%

Image 4354:
  True class: apron
  CLIP prediction: apron
  ICE prediction: apron
  Captions:
    a photo of: a photo of a woman in a dress
    a photo containing: a photo containing of a christmas tree
    the object in the photo i

 45%|████▍     | 70/157 [32:15<39:36, 27.31s/it]


--- Batch 69 examples ---

Image 4416:
  True class: beacon
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a lighthouse on a hill
    a photo containing: a photo containing the lighthouse at cape point
    the object in the photo is: the object in the photo is a lighthouse
  CLIP confidence: 69.04%
  ICE confidence: 73.83%

Image 4417:
  True class: pop bottle
  CLIP prediction: jinrikisha
  ICE prediction: jinrikisha
  Captions:
    a photo of: a photo of a bottle of wine and a bottle of wine
    a photo containing: a photo containing the bottle of the bottle
    the object in the photo is: the object in the photo is a bottle of wine
  CLIP confidence: 15.21%
  ICE confidence: 15.21%

Image 4418:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a mountain with trees and bushes
    a photo containing: a photo containing of a man on a mountain
    the object 

 45%|████▌     | 71/157 [32:43<39:18, 27.42s/it]


--- Batch 70 examples ---

Image 4480:
  True class: American lobster
  CLIP prediction: plate
  ICE prediction: plate
  Captions:
    a photo of: a photo of a plate of food with a glass of wine
    a photo containing: a photo containing the image of a plate of food
    the object in the photo is: the object in the photo is a plate of food
  CLIP confidence: 37.74%
  ICE confidence: 45.39%

Image 4481:
  True class: volleyball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a football game with the ball in the air
    a photo containing: a photo containing the action of a football game
    the object in the photo is: the object in the photo is a ball
  CLIP confidence: 98.78%
  ICE confidence: 99.80%

Image 4482:
  True class: sea slug
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a nebula with a red nebula in the background
    a photo containing: a photo containing the image of a star
 

 46%|████▌     | 72/157 [33:09<38:23, 27.10s/it]


--- Batch 71 examples ---

Image 4544:
  True class: bullfrog
  CLIP prediction: bullfrog
  ICE prediction: bullfrog
  Captions:
    a photo of: a photo of a man and woman dancing
    a photo containing: a photo containing the image of a frog
    the object in the photo is: the object in the photo is a green object
  CLIP confidence: 54.93%
  ICE confidence: 54.93%

Image 4545:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a suit and tie hanging on a rack
    a photo containing: a photo containing of a suit and tie
    the object in the photo is: the object in the photo is a suit
  CLIP confidence: 89.55%
  ICE confidence: 89.55%

Image 4546:
  True class: obelisk
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a tall tower in a city
    a photo containing: a photo containing the statue of liberty
    the object in the photo is: the object in the photo is a

 46%|████▋     | 73/157 [33:37<38:19, 27.38s/it]


--- Batch 72 examples ---

Image 4608:
  True class: pole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a building with a sky background
    a photo containing: a photo containing the pyramid of the pyramids of git
    the object in the photo is: the object in the photo is a pyramid
  CLIP confidence: 47.02%
  ICE confidence: 47.02%

Image 4609:
  True class: basketball
  CLIP prediction: basketball
  ICE prediction: basketball
  Captions:
    a photo of: a photo of a basketball player in action
    a photo containing: a photo containing the basketball game between the two teams
    the object in the photo is: the object in the photo is a basketball player
  CLIP confidence: 80.66%
  ICE confidence: 86.91%

Image 4610:
  True class: drumstick
  CLIP prediction: drumstick
  ICE prediction: drumstick
  Captions:
    a photo of: a photo of a drum
    a photo containing: a photo containing the drum
    the object in the photo is: the object in

 47%|████▋     | 74/157 [34:06<38:28, 27.81s/it]


--- Batch 73 examples ---

Image 4672:
  True class: water jug
  CLIP prediction: water jug
  ICE prediction: water jug
  Captions:
    a photo of: a photo of a blue glass
    a photo containing: a photo containing the blue diamond
    the object in the photo is: the object in the photo is a blue glass
  CLIP confidence: 33.59%
  ICE confidence: 33.67%

Image 4673:
  True class: seashore
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a beach with a rock in the water
    a photo containing: a photo containing the ocean
    the object in the photo is: the object in the photo is a rock
  CLIP confidence: 25.66%
  ICE confidence: 33.30%

Image 4674:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a dance class
    a photo containing: a photo containing of a dance class
    the object in the photo is: the object in the photo is a white object
  CLIP confidence: 81.98%
  ICE conf

 48%|████▊     | 75/157 [34:34<37:47, 27.65s/it]


--- Batch 74 examples ---

Image 4736:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a brick wall with a hole in it
    a photo containing: a photo containing the site of the ancient tomb of the pharaoh
    the object in the photo is: the object in the photo is a large rock
  CLIP confidence: 99.46%
  ICE confidence: 99.46%

Image 4737:
  True class: oboe
  CLIP prediction: oboe
  ICE prediction: oboe
  Captions:
    a photo of: a photo of a classroom
    a photo containing: a photo containing a classroom
    the object in the photo is: the object in the photo is a white object
  CLIP confidence: 21.84%
  ICE confidence: 21.84%

Image 4738:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a table with a bunch of flowers
    a photo containing: a photo containing of a table with a bunch of flowers
    the object i

 48%|████▊     | 76/157 [35:01<37:15, 27.60s/it]


--- Batch 75 examples ---

Image 4800:
  True class: mantis
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a praying praying praying praying praying praying praying praying praying praying praying praying praying praying praying praying praying
    a photo containing: a photo containing with the image of a praying mantis
    the object in the photo is: the object in the photo is a praying praying praying praying praying praying praying praying praying praying praying praying praying praying
  CLIP confidence: 87.84%
  ICE confidence: 87.89%

Image 4801:
  True class: teddy
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a cake with many different colors
    a photo containing: a photo containing the image of a woman ' s face
    the object in the photo is: the object in the photo is a teddy bear
  CLIP confidence: 36.55%
  ICE confidence: 39.40%

Image 4802:
  True class: beer bottle
  C

 49%|████▉     | 77/157 [35:29<36:57, 27.71s/it]


--- Batch 76 examples ---

Image 4864:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly on a flower
    a photo containing: a photo containing the butterfly effect
    the object in the photo is: the object in the photo is a butterfly
  CLIP confidence: 58.84%
  ICE confidence: 60.06%

Image 4865:
  True class: birdhouse
  CLIP prediction: birdhouse
  ICE prediction: birdhouse
  Captions:
    a photo of: a photo of a kayak in the water
    a photo containing: a photo containing the image of a kayak
    the object in the photo is: the object in the photo is a boat
  CLIP confidence: 99.41%
  ICE confidence: 99.41%

Image 4866:
  True class: cockroach
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a bug on a table
    a photo containing: a photo containing of a bug on a table
    the object in the photo is: the object in the photo is a bug
  CLIP confidence: 57.13%
  ICE

 50%|████▉     | 78/157 [35:57<36:45, 27.91s/it]


--- Batch 77 examples ---

Image 4928:
  True class: fur coat
  CLIP prediction: fur coat
  ICE prediction: fur coat
  Captions:
    a photo of: a photo of a woman in a fur coat
    a photo containing: a photo containing of a woman in a fur coat
    the object in the photo is: the object in the photo is a woman in a black dress
  CLIP confidence: 78.61%
  ICE confidence: 84.96%

Image 4929:
  True class: plate
  CLIP prediction: meat loaf
  ICE prediction: meat loaf
  Captions:
    a photo of: a photo of a plate of food with a fork
    a photo containing: a photo containing the recipe of the chicken and rice cass
    the object in the photo is: the object in the photo is a plate of food
  CLIP confidence: 44.63%
  ICE confidence: 44.63%

Image 4930:
  True class: tailed frog
  CLIP prediction: tailed frog
  ICE prediction: tailed frog
  Captions:
    a photo of: a photo of a mouse on a blue background
    a photo containing: a photo containing the image of a mouse
    the object in th

 50%|█████     | 79/157 [36:25<36:10, 27.83s/it]


--- Batch 78 examples ---

Image 4992:
  True class: jinrikisha
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a horse drawn carriage
    a photo containing: a photo containing of a bug buggy
    the object in the photo is: the object in the photo is a bug bug
  CLIP confidence: 21.29%
  ICE confidence: 21.29%

Image 4993:
  True class: black stork
  CLIP prediction: black stork
  ICE prediction: black stork
  Captions:
    a photo of: a photo of a man in a boat
    a photo containing: a photo containing the image of a man in a boat
    the object in the photo is: the object in the photo is a bird
  CLIP confidence: 88.28%
  ICE confidence: 88.28%

Image 4994:
  True class: bathtub
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man on a ladder
    a photo containing: a photo containing of a man climbing a ladder
    the object in the photo is: the object in the photo is a ladder
 

 51%|█████     | 80/157 [36:53<35:45, 27.86s/it]


--- Batch 79 examples ---

Image 5056:
  True class: king penguin
  CLIP prediction: king penguin
  ICE prediction: king penguin
  Captions:
    a photo of: a photo of a penguin walking on the beach
    a photo containing: a photo containing of a penguin
    the object in the photo is: the object in the photo is a penguin
  CLIP confidence: 99.27%
  ICE confidence: 104.39%

Image 5057:
  True class: dining table
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a living room with a couch and a television
    a photo containing: a photo containing of a living room with a couch and a table
    the object in the photo is: the object in the photo is a clock
  CLIP confidence: 93.21%
  ICE confidence: 95.56%

Image 5058:
  True class: scorpion
  CLIP prediction: scorpion
  ICE prediction: scorpion
  Captions:
    a photo of: a photo of a small dog on the ground
    a photo containing: a photo containing of a small black dog
    the object

 52%|█████▏    | 81/157 [37:21<35:09, 27.76s/it]


--- Batch 80 examples ---

Image 5120:
  True class: triumphal arch
  CLIP prediction: triumphal arch
  ICE prediction: triumphal arch
  Captions:
    a photo of: a photo of a group of people walking in front of a building
    a photo containing: a photo containing the ruins of the great wall of china
    the object in the photo is: the object in the photo is a stone wall
  CLIP confidence: 98.14%
  ICE confidence: 98.14%

Image 5121:
  True class: pretzel
  CLIP prediction: oboe
  ICE prediction: oboe
  Captions:
    a photo of: a photo of a man sitting at a table
    a photo containing: a photo containing the image of a man sitting at a table
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 7.15%
  ICE confidence: 7.15%

Image 5122:
  True class: thatch
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a house with a thatched roof
    a photo containing: a photo containing of the house of the late king of the

 52%|█████▏    | 82/157 [37:48<34:30, 27.61s/it]


--- Batch 81 examples ---

Image 5184:
  True class: sea cucumber
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a small animal in the grass
    a photo containing: a photo containing of a red - spotted hawk
    the object in the photo is: the object in the photo is a small, black object
  CLIP confidence: 92.87%
  ICE confidence: 92.87%

Image 5185:
  True class: guacamole
  CLIP prediction: pizza
  ICE prediction: plate
  Captions:
    a photo of: a photo of a plate of food with a green sauce
    a photo containing: a photo containing of a plate of food
    the object in the photo is: the object in the photo is a fish
  CLIP confidence: 33.18%
  ICE confidence: 33.89%

Image 5186:
  True class: Persian cat
  CLIP prediction: Persian cat
  ICE prediction: Persian cat
  Captions:
    a photo of: a photo of a cat in a blue box
    a photo containing: a photo containing of a kitten
    the object in the photo is: the object in the p

 53%|█████▎    | 83/157 [38:17<34:29, 27.97s/it]


--- Batch 82 examples ---

Image 5248:
  True class: apron
  CLIP prediction: jinrikisha
  ICE prediction: jinrikisha
  Captions:
    a photo of: a photo of a woman in a white dress
    a photo containing: a photo containing of a woman in a white dress
    the object in the photo is: the object in the photo is a blue and white object
  CLIP confidence: 30.05%
  ICE confidence: 30.05%

Image 5249:
  True class: suspension bridge
  CLIP prediction: triumphal arch
  ICE prediction: triumphal arch
  Captions:
    a photo of: a photo of a castle in the distance
    a photo containing: a photo containing of the site of the former castle of the city of khari
    the object in the photo is: the object in the photo is a large tree
  CLIP confidence: 36.35%
  ICE confidence: 36.35%

Image 5250:
  True class: jellyfish
  CLIP prediction: coral reef
  ICE prediction: coral reef
  Captions:
    a photo of: a photo of a woman on a television screen
    a photo containing: a photo containing the ima

 54%|█████▎    | 84/157 [38:44<33:57, 27.92s/it]


--- Batch 83 examples ---

Image 5312:
  True class: limousine
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a car with a blue background
    a photo containing: a photo containing of a car with a blue background
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 99.37%
  ICE confidence: 99.37%

Image 5313:
  True class: frying pan
  CLIP prediction: wok
  ICE prediction: wok
  Captions:
    a photo of: a photo of a pan of food with vegetables
    a photo containing: a photo containing the image of a pan of food
    the object in the photo is: the object in the photo is a large piece of food
  CLIP confidence: 38.48%
  ICE confidence: 38.48%

Image 5314:
  True class: obelisk
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a tower with a clock on top
    a photo containing: a photo containing the pyramid of the pyramid of gi gi gi gi gi gi gi gi gi gi gi gi gi

 54%|█████▍    | 85/157 [39:12<33:13, 27.68s/it]


--- Batch 84 examples ---

Image 5376:
  True class: CD player
  CLIP prediction: CD player
  ICE prediction: CD player
  Captions:
    a photo of: a photo of a black and red oven
    a photo containing: a photo containing the image of a black and red fire
    the object in the photo is: the object in the photo is a red light
  CLIP confidence: 33.81%
  ICE confidence: 33.81%

Image 5377:
  True class: Chihuahua
  CLIP prediction: Chihuahua
  ICE prediction: Chihuahua
  Captions:
    a photo of: a photo of a dog laying on a blanket
    a photo containing: a photo containing of a dog ' s head
    the object in the photo is: the object in the photo is a dog
  CLIP confidence: 33.69%
  ICE confidence: 33.81%

Image 5378:
  True class: torch
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a fire hydra in the night
    a photo containing: a photo containing a fire hydrator
    the object in the photo is: the object in the photo is a fire hydra
  CLIP

 55%|█████▍    | 86/157 [39:40<32:58, 27.86s/it]


--- Batch 85 examples ---

Image 5440:
  True class: Yorkshire terrier
  CLIP prediction: Yorkshire terrier
  ICE prediction: Yorkshire terrier
  Captions:
    a photo of: a photo of a small dog in the grass
    a photo containing: a photo containing of a miniature scr puppy
    the object in the photo is: the object in the photo is a small dog
  CLIP confidence: 86.08%
  ICE confidence: 86.08%

Image 5441:
  True class: pill bottle
  CLIP prediction: iPod
  ICE prediction: iPod
  Captions:
    a photo of: a photo of a bunch of books on a table
    a photo containing: a photo containing of a pile of books
    the object in the photo is: the object in the photo is a pile of books
  CLIP confidence: 19.54%
  ICE confidence: 19.54%

Image 5442:
  True class: dumbbell
  CLIP prediction: dumbbell
  ICE prediction: dumbbell
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing the image of a man in a suit
    the object in the photo is: th

 55%|█████▌    | 87/157 [40:08<32:36, 27.94s/it]


--- Batch 86 examples ---

Image 5504:
  True class: Egyptian cat
  CLIP prediction: Persian cat
  ICE prediction: Persian cat
  Captions:
    a photo of: a photo of a group of sheeps in a pen
    a photo containing: a photo containing of a group of sheep
    the object in the photo is: the object in the photo is a dog
  CLIP confidence: 49.61%
  ICE confidence: 49.61%

Image 5505:
  True class: neck brace
  CLIP prediction: miniskirt
  ICE prediction: miniskirt
  Captions:
    a photo of: a photo of a man on a stage
    a photo containing: a photo containing of a man on stage
    the object in the photo is: the object in the photo is a man
  CLIP confidence: 23.34%
  ICE confidence: 23.34%

Image 5506:
  True class: orangutan
  CLIP prediction: orangutan
  ICE prediction: orangutan
  Captions:
    a photo of: a photo of a forest with a bird in the middle
    a photo containing: a photo containing of a forest in the amazon rainforest
    the object in the photo is: the object in the p

 56%|█████▌    | 88/157 [40:35<31:52, 27.72s/it]


--- Batch 87 examples ---

Image 5568:
  True class: organ
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a large building with a tower
    a photo containing: a photo containing the image of the statue of liberty
    the object in the photo is: the object in the photo is a large white object
  CLIP confidence: 19.47%
  ICE confidence: 19.47%

Image 5569:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching
    a photo containing: a photo containing the punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching
    the object in the photo is: the object in the photo is a woman doing a kick
  CLIP confidence: 99.66%
  ICE confidenc

 57%|█████▋    | 89/157 [41:02<31:14, 27.56s/it]


--- Batch 88 examples ---

Image 5632:
  True class: vestment
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a white shirt
    a photo containing: a photo containing of a man in a white shirt
    the object in the photo is: the object in the photo is a white object
  CLIP confidence: 54.98%
  ICE confidence: 54.98%

Image 5633:
  True class: sulphur butterfly
  CLIP prediction: sulphur butterfly
  ICE prediction: sulphur butterfly
  Captions:
    a photo of: a photo of a yellow leaf
    a photo containing: a photo containing the yellow leaf
    the object in the photo is: the object in the photo is a yellow leaf
  CLIP confidence: 61.91%
  ICE confidence: 61.91%

Image 5634:
  True class: Yorkshire terrier
  CLIP prediction: guinea pig
  ICE prediction: guinea pig
  Captions:
    a photo of: a photo of a bird in the grass
    a photo containing: a photo containing the image of a squirrel in a field
    the object in the photo is:

 57%|█████▋    | 90/157 [41:30<30:53, 27.66s/it]


--- Batch 89 examples ---

Image 5696:
  True class: dumbbell
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man sitting in a chair
    a photo containing: a photo containing the person ' s chair
    the object in the photo is: the object in the photo is a chair
  CLIP confidence: 22.05%
  ICE confidence: 22.05%

Image 5697:
  True class: turnstile
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a lobby with a large screen
    a photo containing: a photo containing the lobby of a hotel
    the object in the photo is: the object in the photo is a white object
  CLIP confidence: 66.94%
  ICE confidence: 66.94%

Image 5698:
  True class: desk
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a room with a desk and a computer
    a photo containing: a photo containing of a room with a desk and a computer
    the object in the phot

 58%|█████▊    | 91/157 [42:00<30:58, 28.15s/it]


--- Batch 90 examples ---

Image 5760:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of three peppers on a table
    a photo containing: a photo containing the image of peppers
    the object in the photo is: the object in the photo is a bell pepper
  CLIP confidence: 96.92%
  ICE confidence: 102.64%

Image 5761:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly in a field
    a photo containing: a photo containing the butterfly effect
    the object in the photo is: the object in the photo is a butterfly
  CLIP confidence: 77.15%
  ICE confidence: 78.76%

Image 5762:
  True class: computer keyboard
  CLIP prediction: computer keyboard
  ICE prediction: computer keyboard
  Captions:
    a photo of: a photo of a small white dog
    a photo containing: a photo containing the damage of a computer
    the object in the photo is: the objec

 59%|█████▊    | 92/157 [42:26<30:05, 27.78s/it]


--- Batch 91 examples ---

Image 5824:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a herd of buffalo grazing in a field
    a photo containing: a photo containing the wild boars
    the object in the photo is: the object in the photo is a bison
  CLIP confidence: 96.09%
  ICE confidence: 101.27%

Image 5825:
  True class: meat loaf
  CLIP prediction: meat loaf
  ICE prediction: meat loaf
  Captions:
    a photo of: a photo of a plate of food on a table
    a photo containing: a photo containing a plate of food
    the object in the photo is: the object in the photo is a hot dog
  CLIP confidence: 22.39%
  ICE confidence: 22.39%

Image 5826:
  True class: space heater
  CLIP prediction: abacus
  ICE prediction: abacus
  Captions:
    a photo of: a photo of a computer with a keyboard
    a photo containing: a photo containing the new york transit system
    the object in the photo is: the object in the photo is a large meta

 59%|█████▉    | 93/157 [42:54<29:38, 27.78s/it]


--- Batch 92 examples ---

Image 5888:
  True class: desk
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a table with a bunch of flowers
    a photo containing: a photo containing the color of a blue and white
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 26.56%
  ICE confidence: 30.18%

Image 5889:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a plate of food with chicken
    a photo containing: a photo containing the recipe of a chicken pot pie
    the object in the photo is: the object in the photo is a plate of food
  CLIP confidence: 99.85%
  ICE confidence: 99.85%

Image 5890:
  True class: guinea pig
  CLIP prediction: guinea pig
  ICE prediction: guinea pig
  Captions:
    a photo of: a photo of a white rat
    a photo containing: a photo containing of a rat
    the object in the photo is: the object in the photo is a ham

 60%|█████▉    | 94/157 [43:22<29:08, 27.75s/it]


--- Batch 93 examples ---

Image 5952:
  True class: walking stick
  CLIP prediction: nail
  ICE prediction: nail
  Captions:
    a photo of: a photo of a person playing a piano
    a photo containing: a photo containing of a person ' s hand
    the object in the photo is: the object in the photo is a hand
  CLIP confidence: 14.92%
  ICE confidence: 14.92%

Image 5953:
  True class: sock
  CLIP prediction: sock
  ICE prediction: sock
  Captions:
    a photo of: a photo of a red star on a concrete surface
    a photo containing: a photo containing the star of the day
    the object in the photo is: the object in the photo is a red star
  CLIP confidence: 57.67%
  ICE confidence: 57.67%

Image 5954:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a large ball of sand
    a photo containing: a photo containing the image of a giant jellyfish
    the object in the photo is: the object in the photo is a ball
  CLI

 61%|██████    | 95/157 [43:50<28:42, 27.78s/it]


--- Batch 94 examples ---

Image 6016:
  True class: military uniform
  CLIP prediction: military uniform
  ICE prediction: military uniform
  Captions:
    a photo of: a photo of a group of men in uniform
    a photo containing: a photo containing the royal wedding of prince william and kate
    the object in the photo is: the object in the photo is a black and white uniform
  CLIP confidence: 64.06%
  ICE confidence: 70.61%

Image 6017:
  True class: dragonfly
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a bird sitting on a plant
    a photo containing: a photo containing the image of a butterfly
    the object in the photo is: the object in the photo is a butterfly
  CLIP confidence: 69.53%
  ICE confidence: 69.53%

Image 6018:
  True class: reel
  CLIP prediction: reel
  ICE prediction: reel
  Captions:
    a photo of: a photo of a spinning spinning machine
    a photo containing: a photo containing of a spinning rod
    the objec

 61%|██████    | 96/157 [44:17<28:11, 27.73s/it]


--- Batch 95 examples ---

Image 6080:
  True class: triumphal arch
  CLIP prediction: triumphal arch
  ICE prediction: triumphal arch
  Captions:
    a photo of: a photo of a large ship in the water
    a photo containing: a photo containing with the flag of the united states is seen on a wall in the east side of the pentagon pentagon
    the object in the photo is: the object in the photo is a large object
  CLIP confidence: 57.76%
  ICE confidence: 57.76%

Image 6081:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a plant in a pot
    a photo containing: a photo containing of a plant
    the object in the photo is: the object in the photo is a plant
  CLIP confidence: 16.08%
  ICE confidence: 16.08%

Image 6082:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a lizard on a leaf
    a photo containing: a photo containing of a li

 62%|██████▏   | 97/157 [44:45<27:34, 27.57s/it]


--- Batch 96 examples ---

Image 6144:
  True class: teddy
  CLIP prediction: teddy
  ICE prediction: teddy
  Captions:
    a photo of: a photo of a teddy bear sitting on a table
    a photo containing: a photo containing of a teddy bear
    the object in the photo is: the object in the photo is a teddy bear
  CLIP confidence: 94.82%
  ICE confidence: 100.68%

Image 6145:
  True class: backpack
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a black and white dog
    a photo containing: a photo containing of a black and white dog
    the object in the photo is: the object in the photo is a black and white dog
  CLIP confidence: 28.37%
  ICE confidence: 28.37%

Image 6146:
  True class: ice lolly
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog with a tennis ball
    a photo containing: a photo containing of a dog with a tennis ball
    the object in the photo is: the object i

 62%|██████▏   | 98/157 [45:13<27:29, 27.96s/it]


--- Batch 97 examples ---

Image 6208:
  True class: gazelle
  CLIP prediction: gazelle
  ICE prediction: gazelle
  Captions:
    a photo of: a photo of a deer in the woods
    a photo containing: a photo containing of a deer in the woods
    the object in the photo is: the object in the photo is a deer
  CLIP confidence: 46.61%
  ICE confidence: 46.63%

Image 6209:
  True class: rocking chair
  CLIP prediction: rocking chair
  ICE prediction: rocking chair
  Captions:
    a photo of: a photo of a living room with a couch and a chair
    a photo containing: a photo containing of a room with a couch and a chair
    the object in the photo is: the object in the photo is a red chair
  CLIP confidence: 98.63%
  ICE confidence: 104.00%

Image 6210:
  True class: CD player
  CLIP prediction: CD player
  ICE prediction: CD player
  Captions:
    a photo of: a photo of a radio in a car
    a photo containing: a photo containing of the radio system
    the object in the photo is: the object in

 63%|██████▎   | 99/157 [45:41<26:48, 27.73s/it]


--- Batch 98 examples ---

Image 6272:
  True class: umbrella
  CLIP prediction: umbrella
  ICE prediction: umbrella
  Captions:
    a photo of: a photo of a flooded street with a blue umbrella
    a photo containing: a photo containing the flood
    the object in the photo is: the object in the photo is a blue umbrella
  CLIP confidence: 93.31%
  ICE confidence: 99.12%

Image 6273:
  True class: brass
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a building with a large tower
    a photo containing: a photo containing the image of the pyramid of git
    the object in the photo is: the object in the photo is a large, triangular shaped object
  CLIP confidence: 22.07%
  ICE confidence: 23.02%

Image 6274:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bison in a field
    a photo containing: a photo containing of a bison in a field
    the object in the photo is: the object in t

 64%|██████▎   | 100/157 [46:08<26:21, 27.74s/it]


--- Batch 99 examples ---

Image 6336:
  True class: rugby ball
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a person wadi in a lake
    a photo containing: a photo containing of a man wadiing in a lake
    the object in the photo is: the object in the photo is a man in a red shirt
  CLIP confidence: 26.59%
  ICE confidence: 34.28%

Image 6337:
  True class: cash machine
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a store with a sign on the front
    a photo containing: a photo containing the video of a man running a marathon
    the object in the photo is: the object in the photo is a fire hydra
  CLIP confidence: 23.65%
  ICE confidence: 23.65%

Image 6338:
  True class: dam
  CLIP prediction: suspension bridge
  ICE prediction: suspension bridge
  Captions:
    a photo of: a photo of a road with a car driving down it
    a photo containing: a photo containing the road
    the object

 64%|██████▍   | 101/157 [46:37<26:07, 27.99s/it]


--- Batch 100 examples ---

Image 6400:
  True class: ox
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a crowd of people walking down a street
    a photo containing: a photo containing of the scene in the scene of the attack
    the object in the photo is: the object in the photo is a red car
  CLIP confidence: 9.71%
  ICE confidence: 9.71%

Image 6401:
  True class: snorkel
  CLIP prediction: dugong
  ICE prediction: dugong
  Captions:
    a photo of: a photo of a green nebula
    a photo containing: a photo containing the milky
    the object in the photo is: the object in the photo is a black hole
  CLIP confidence: 45.17%
  ICE confidence: 45.17%

Image 6402:
  True class: spider web
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a large black hole in the sky
    a photo containing: a photo containing with the image of the comet
    the object in the photo is: the object in the ph

 65%|██████▍   | 102/157 [47:05<25:33, 27.87s/it]


--- Batch 101 examples ---

Image 6464:
  True class: ox
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a brown dog running through the grass
    a photo containing: a photo containing of a bull
    the object in the photo is: the object in the photo is a brown cow
  CLIP confidence: 69.68%
  ICE confidence: 72.71%

Image 6465:
  True class: orangutan
  CLIP prediction: orangutan
  ICE prediction: orangutan
  Captions:
    a photo of: a photo of a man with a monkey on his shoulder
    a photo containing: a photo containing of a monkey
    the object in the photo is: the object in the photo is a monkey
  CLIP confidence: 96.29%
  ICE confidence: 96.34%

Image 6466:
  True class: bucket
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a white plastic trash can
    a photo containing: a photo containing thermic of a plastic cup
    the object in the photo is: the object in the photo is a white plastic container
 

 66%|██████▌   | 103/157 [47:32<24:54, 27.68s/it]


--- Batch 102 examples ---

Image 6528:
  True class: slug
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing the fire
    the object in the photo is: the object in the photo is a green object
  CLIP confidence: 41.41%
  ICE confidence: 41.41%

Image 6529:
  True class: lawn mower
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a man riding a horse
    a photo containing: a photo containing of a man in a park
    the object in the photo is: the object in the photo is a red car
  CLIP confidence: 57.81%
  ICE confidence: 57.81%

Image 6530:
  True class: thatch
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of the ruins of the castle
    a photo containing: a photo containing the site of the former person ' s house in the village of person
    the object in the photo is: the object in the photo is a

 66%|██████▌   | 104/157 [48:00<24:35, 27.85s/it]


--- Batch 103 examples ---

Image 6592:
  True class: koala
  CLIP prediction: koala
  ICE prediction: koala
  Captions:
    a photo of: a photo of a koloa in a tree
    a photo containing: a photo containing the image of a koloa
    the object in the photo is: the object in the photo is a squirrel
  CLIP confidence: 32.64%
  ICE confidence: 32.69%

Image 6593:
  True class: vestment
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing with the chinese flag
    the object in the photo is: the object in the photo is a red and blue object
  CLIP confidence: 89.21%
  ICE confidence: 89.21%

Image 6594:
  True class: snorkel
  CLIP prediction: swimming trunks
  ICE prediction: swimming trunks
  Captions:
    a photo of: a photo of a blue sky with stars
    a photo containing: a photo containing the image of a comet
    the object in the photo is: the object in the photo is a small w

 67%|██████▋   | 105/157 [48:27<23:59, 27.68s/it]


--- Batch 104 examples ---

Image 6656:
  True class: wooden spoon
  CLIP prediction: neck brace
  ICE prediction: neck brace
  Captions:
    a photo of: a photo of a man playing drums
    a photo containing: a photo containing the image of a drummer playing the drums
    the object in the photo is: the object in the photo is a drum player
  CLIP confidence: 32.62%
  ICE confidence: 32.62%

Image 6657:
  True class: Egyptian cat
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a woman sitting on a bed
    a photo containing: a photo containing of a woman sitting on a bed
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 31.10%
  ICE confidence: 31.10%

Image 6658:
  True class: banana
  CLIP prediction: orange
  ICE prediction: orange
  Captions:
    a photo of: a photo of a bunch of oranges and bananas
    a photo containing: a photo containing a banana and a banana
    the object in the photo is

 68%|██████▊   | 106/157 [48:54<23:14, 27.34s/it]


--- Batch 105 examples ---

Image 6720:
  True class: birdhouse
  CLIP prediction: birdhouse
  ICE prediction: birdhouse
  Captions:
    a photo of: a photo of a bird house in the snow
    a photo containing: a photo containing the snow storm in the area
    the object in the photo is: the object in the photo is a bird house
  CLIP confidence: 99.85%
  ICE confidence: 105.47%

Image 6721:
  True class: plunger
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a man in a baseball uniform
    a photo containing: a photo containing of a man throwing a ball
    the object in the photo is: the object in the photo is a red balloon
  CLIP confidence: 20.43%
  ICE confidence: 20.43%

Image 6722:
  True class: bee
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of yellow flowers
    a photo containing: a photo containing the yellow flowers of the chlosa
    the object in the photo is: the object in the photo is a bee
  

 68%|██████▊   | 107/157 [49:22<22:59, 27.59s/it]


--- Batch 106 examples ---

Image 6784:
  True class: scorpion
  CLIP prediction: chain
  ICE prediction: chain
  Captions:
    a photo of: a photo of a small lizard on the sidewalk
    a photo containing: a photo containing of a small lizard on the sidewalk
    the object in the photo is: the object in the photo is a small lizard
  CLIP confidence: 28.39%
  ICE confidence: 28.39%

Image 6785:
  True class: picket fence
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a lighthouse on the beach
    a photo containing: a photo containing the lighthouse
    the object in the photo is: the object in the photo is a lighthouse
  CLIP confidence: 58.74%
  ICE confidence: 60.74%

Image 6786:
  True class: cash machine
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a man standing in front of a whiteboard
    a photo containing: a photo containing the image of a man in a lab
    the object in the pho

 69%|██████▉   | 108/157 [49:49<22:28, 27.53s/it]


--- Batch 107 examples ---

Image 6848:
  True class: obelisk
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of the washington monument
    a photo containing: a photo containing the monument
    the object in the photo is: the object in the photo is a cross
  CLIP confidence: 88.87%
  ICE confidence: 88.96%

Image 6849:
  True class: lemon
  CLIP prediction: lemon
  ICE prediction: lemon
  Captions:
    a photo of: a photo of a tree with green leaves
    a photo containing: a photo containing the image of a tree
    the object in the photo is: the object in the photo is a ball
  CLIP confidence: 57.62%
  ICE confidence: 57.62%

Image 6850:
  True class: sulphur butterfly
  CLIP prediction: sulphur butterfly
  ICE prediction: sulphur butterfly
  Captions:
    a photo of: a photo of a butterfly on a flower
    a photo containing: a photo containing the yellow butterfly
    the object in the photo is: the object in the photo is a butterfly
  CLI

 69%|██████▉   | 109/157 [50:16<21:53, 27.37s/it]


--- Batch 108 examples ---

Image 6912:
  True class: neck brace
  CLIP prediction: neck brace
  ICE prediction: neck brace
  Captions:
    a photo of: a photo of a woman doing yoga
    a photo containing: a photo containing the image of a woman doing yoga
    the object in the photo is: the object in the photo is a woman doing yoga
  CLIP confidence: 61.67%
  ICE confidence: 61.67%

Image 6913:
  True class: sunglasses
  CLIP prediction: sunglasses
  ICE prediction: sunglasses
  Captions:
    a photo of: a photo of a woman with sunglasses on
    a photo containing: a photo containing of a woman with sunglasses
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 96.48%
  ICE confidence: 102.25%

Image 6914:
  True class: mantis
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a person on a skateboard
    a photo containing: a photo containing of a person on a skateboard
    the object in the photo is: the object 

 70%|███████   | 110/157 [50:44<21:26, 27.37s/it]


--- Batch 109 examples ---

Image 6976:
  True class: pizza
  CLIP prediction: pizza
  ICE prediction: pizza
  Captions:
    a photo of: a photo of a pizza with a pizza cutter
    a photo containing: a photo containing the pizza
    the object in the photo is: the object in the photo is a pizza
  CLIP confidence: 93.21%
  ICE confidence: 99.12%

Image 6977:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a building with a sign on it
    a photo containing: a photo containing the site of the former person ' s house
    the object in the photo is: the object in the photo is a black and white object
  CLIP confidence: 99.66%
  ICE confidence: 99.66%

Image 6978:
  True class: mushroom
  CLIP prediction: mushroom
  ICE prediction: mushroom
  Captions:
    a photo of: a photo of a mushroom with a mushroom in the background
    a photo containing: a photo containing the image of a mushroom
    the object in the photo

 71%|███████   | 111/157 [51:14<21:33, 28.13s/it]


--- Batch 110 examples ---

Image 7040:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a man in a graduation gown
    a photo containing: a photo containing of a man in a graduation gown
    the object in the photo is: the object in the photo is a bear
  CLIP confidence: 99.85%
  ICE confidence: 105.47%

Image 7041:
  True class: volleyball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a group of people jumping in the air
    a photo containing: a photo containing the beach volleyball team
    the object in the photo is: the object in the photo is a kite
  CLIP confidence: 90.33%
  ICE confidence: 94.58%

Image 7042:
  True class: birdhouse
  CLIP prediction: crane
  ICE prediction: crane
  Captions:
    a photo of: a photo of a street light with flowers in the background
    a photo containing: a photo containing the sky
    the object in the photo 

 71%|███████▏  | 112/157 [51:40<20:47, 27.71s/it]


--- Batch 111 examples ---

Image 7104:
  True class: dining table
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a living room with a couch and a table
    a photo containing: a photo containing of a dining area with a table and chairs
    the object in the photo is: the object in the photo is a table
  CLIP confidence: 62.60%
  ICE confidence: 69.38%

Image 7105:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a person walking down a street
    a photo containing: a photo containing of a car and a person
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 39.28%
  ICE confidence: 39.28%

Image 7106:
  True class: ice lolly
  CLIP prediction: ice lolly
  ICE prediction: ice lolly
  Captions:
    a photo of: a photo of a little boy eating a strawberry
    a photo containing: a photo containing of a child eating a strawberr

 72%|███████▏  | 113/157 [52:09<20:33, 28.04s/it]


--- Batch 112 examples ---

Image 7168:
  True class: stopwatch
  CLIP prediction: stopwatch
  ICE prediction: stopwatch
  Captions:
    a photo of: a photo of a bowl on a stove
    a photo containing: a photo containing the black and white image of a glass jar
    the object in the photo is: the object in the photo is a glass bowl
  CLIP confidence: 93.46%
  ICE confidence: 93.46%

Image 7169:
  True class: orange
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a blood orange on a black background
    a photo containing: a photo containing with a blood drop and blood
    the object in the photo is: the object in the photo is a blood drop
  CLIP confidence: 66.80%
  ICE confidence: 66.80%

Image 7170:
  True class: school bus
  CLIP prediction: school bus
  ICE prediction: school bus
  Captions:
    a photo of: a photo of a school bus on a highway
    a photo containing: a photo containing a school bus on a highway
    the object in 

 73%|███████▎  | 114/157 [52:35<19:39, 27.44s/it]


--- Batch 113 examples ---

Image 7232:
  True class: beer bottle
  CLIP prediction: beer bottle
  ICE prediction: beer bottle
  Captions:
    a photo of: a photo of a bear standing next to a bottle of beer
    a photo containing: a photo containing the bear and the bear cub
    the object in the photo is: the object in the photo is a bear
  CLIP confidence: 47.63%
  ICE confidence: 47.63%

Image 7233:
  True class: pole
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a cat on the sidewalk
    a photo containing: a photo containing of a cat on the sidewalk
    the object in the photo is: the object in the photo is a black cat
  CLIP confidence: 36.45%
  ICE confidence: 36.45%

Image 7234:
  True class: standard poodle
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a white dog sitting on the ground
    a photo containing: a photo containing of a white poodle puppy
    the object in th

 73%|███████▎  | 115/157 [53:04<19:28, 27.83s/it]


--- Batch 114 examples ---

Image 7296:
  True class: banana
  CLIP prediction: banana
  ICE prediction: banana
  Captions:
    a photo of: a photo of a banana
    a photo containing: a photo containing of a banana
    the object in the photo is: the object in the photo is a banana
  CLIP confidence: 97.31%
  ICE confidence: 103.03%

Image 7297:
  True class: European fire salamander
  CLIP prediction: European fire salamander
  ICE prediction: European fire salamander
  Captions:
    a photo of: a photo of a large, dark, brown area with a few green spots
    a photo containing: a photo containing the image of the earth
    the object in the photo is: the object in the photo is a large, bright, green, and yellow star
  CLIP confidence: 98.00%
  ICE confidence: 98.00%

Image 7298:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a cemetery with a cross in the middle
    a photo containing: a photo containing the image of 

 74%|███████▍  | 116/157 [53:31<18:51, 27.60s/it]


--- Batch 115 examples ---

Image 7360:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a candle with a christmas tree in the background
    a photo containing: a photo containing the candle
    the object in the photo is: the object in the photo is a candle
  CLIP confidence: 94.68%
  ICE confidence: 100.49%

Image 7361:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a pizza on a table
    a photo containing: a photo containing a pizza with a slice missing
    the object in the photo is: the object in the photo is a pizza
  CLIP confidence: 34.69%
  ICE confidence: 34.69%

Image 7362:
  True class: cougar
  CLIP prediction: fur coat
  ICE prediction: fur coat
  Captions:
    a photo of: a photo of a small mouse sitting on a table
    a photo containing: a photo containing of a mouse
    the object in the photo is: the object in the photo is a small mouse
  CL

 75%|███████▍  | 117/157 [53:58<18:15, 27.39s/it]


--- Batch 116 examples ---

Image 7424:
  True class: jellyfish
  CLIP prediction: rugby ball
  ICE prediction: rugby ball
  Captions:
    a photo of: a photo of a light in the water
    a photo containing: a photo containing with the image of a ufo - like object
    the object in the photo is: the object in the photo is a large white object
  CLIP confidence: 12.96%
  ICE confidence: 12.96%

Image 7425:
  True class: volleyball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a group of people playing volleyball
    a photo containing: a photo containing the beach volleyball game
    the object in the photo is: the object in the photo is a volleyball ball
  CLIP confidence: 99.76%
  ICE confidence: 105.47%

Image 7426:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a black and white dog
    a photo containing: a photo containing with the image of a black hol

 75%|███████▌  | 118/157 [54:26<17:56, 27.61s/it]


--- Batch 117 examples ---

Image 7488:
  True class: pizza
  CLIP prediction: pizza
  ICE prediction: pizza
  Captions:
    a photo of: a photo of a pizza with a pizza in it
    a photo containing: a photo containing the recipe of the pizza
    the object in the photo is: the object in the photo is a pizza
  CLIP confidence: 80.81%
  ICE confidence: 87.16%

Image 7489:
  True class: Labrador retriever
  CLIP prediction: Labrador retriever
  ICE prediction: Labrador retriever
  Captions:
    a photo of: a photo of a white dog in the grass
    a photo containing: a photo containing of a white labie puppy
    the object in the photo is: the object in the photo is a white dog
  CLIP confidence: 86.67%
  ICE confidence: 86.67%

Image 7490:
  True class: black stork
  CLIP prediction: black stork
  ICE prediction: black stork
  Captions:
    a photo of: a photo of a bird in a field
    a photo containing: a photo containing of a bird in a field
    the object in the photo is: the object in

 76%|███████▌  | 119/157 [54:53<17:25, 27.52s/it]


--- Batch 118 examples ---

Image 7552:
  True class: dugong
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a jellyfish
    a photo containing: a photo containing the image of a jellyfish
    the object in the photo is: the object in the photo is a jelly
  CLIP confidence: 43.04%
  ICE confidence: 43.04%

Image 7553:
  True class: binoculars
  CLIP prediction: cannon
  ICE prediction: cannon
  Captions:
    a photo of: a photo of a bunch of green and white beads
    a photo containing: a photo containing the gold nugge
    the object in the photo is: the object in the photo is a gold plated object
  CLIP confidence: 19.02%
  ICE confidence: 19.03%

Image 7554:
  True class: water jug
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a group of people riding motorcycles
    a photo containing: a photo containing of a group of people riding motorcycles
    the object in the photo is: the object in t

 76%|███████▋  | 120/157 [55:21<16:57, 27.51s/it]


--- Batch 119 examples ---

Image 7616:
  True class: wooden spoon
  CLIP prediction: wooden spoon
  ICE prediction: wooden spoon
  Captions:
    a photo of: a photo of a frying pan with a frying pan full of chicken
    a photo containing: a photo containing the gold flakes
    the object in the photo is: the object in the photo is a frying pan
  CLIP confidence: 39.36%
  ICE confidence: 39.36%

Image 7617:
  True class: hourglass
  CLIP prediction: hourglass
  ICE prediction: hourglass
  Captions:
    a photo of: a photo of a clock with a clock face
    a photo containing: a photo containing the image of a gold - plated hourpiece
    the object in the photo is: the object in the photo is a gold object
  CLIP confidence: 92.53%
  ICE confidence: 92.53%

Image 7618:
  True class: dam
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a waterfall in the jungle
    a photo containing: a photo containing the site of the ancient tomb of the great buddha
   

 77%|███████▋  | 121/157 [55:48<16:30, 27.51s/it]


--- Batch 120 examples ---

Image 7680:
  True class: nail
  CLIP prediction: nail
  ICE prediction: nail
  Captions:
    a photo of: a photo of a cross on a stone wall
    a photo containing: a photo containing of a cross found in the area
    the object in the photo is: the object in the photo is a cross
  CLIP confidence: 31.79%
  ICE confidence: 31.79%

Image 7681:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a bug on a wall
    a photo containing: a photo containing of a bug
    the object in the photo is: the object in the photo is a bug
  CLIP confidence: 82.91%
  ICE confidence: 82.96%

Image 7682:
  True class: crane
  CLIP prediction: triumphal arch
  ICE prediction: triumphal arch
  Captions:
    a photo of: a photo of a building with a clock on it
    a photo containing: a photo containing the image of a statue
    the object in the photo is: the object in the photo is a bird
  CLIP confidence

 78%|███████▊  | 122/157 [56:16<16:07, 27.64s/it]


--- Batch 121 examples ---

Image 7744:
  True class: lakeside
  CLIP prediction: dam
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a lake with trees and water
    a photo containing: a photo containing the image of a lake
    the object in the photo is: the object in the photo is a boat
  CLIP confidence: 40.82%
  ICE confidence: 43.87%

Image 7745:
  True class: reel
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man walking down a street
    a photo containing: a photo containing a man walking in the street
    the object in the photo is: the object in the photo is a man
  CLIP confidence: 7.57%
  ICE confidence: 7.57%

Image 7746:
  True class: parking meter
  CLIP prediction: stopwatch
  ICE prediction: stopwatch
  Captions:
    a photo of: a photo of a car parked in front of a house
    a photo containing: a photo containing of a car with a number plate
    the object in the photo is: the object in the 

 78%|███████▊  | 123/157 [56:44<15:41, 27.68s/it]


--- Batch 122 examples ---

Image 7808:
  True class: pay-phone
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a yellow and blue building
    a photo containing: a photo containing the video of the video of the shooting incident
    the object in the photo is: the object in the photo is a yellow object
  CLIP confidence: 39.70%
  ICE confidence: 39.70%

Image 7809:
  True class: sea slug
  CLIP prediction: sea slug
  ICE prediction: sea slug
  Captions:
    a photo of: a photo of a white dog running through the grass
    a photo containing: a photo containing of a polar bear
    the object in the photo is: the object in the photo is a white dog
  CLIP confidence: 65.09%
  ICE confidence: 65.09%

Image 7810:
  True class: rugby ball
  CLIP prediction: rugby ball
  ICE prediction: rugby ball
  Captions:
    a photo of: a photo of a group of people playing soccer
    a photo containing: a photo containing the action of a rugby match
    th

 79%|███████▉  | 124/157 [57:13<15:22, 27.96s/it]


--- Batch 123 examples ---

Image 7872:
  True class: spider web
  CLIP prediction: tabby
  ICE prediction: tabby
  Captions:
    a photo of: a photo of a building with a tree in the fore
    a photo containing: a photo containing of the scene of the scene in the film
    the object in the photo is: the object in the photo is a tree
  CLIP confidence: 22.41%
  ICE confidence: 22.41%

Image 7873:
  True class: jinrikisha
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a group of people riding bikes
    a photo containing: a photo containing the image of a man on a bicycle
    the object in the photo is: the object in the photo is a bicycle
  CLIP confidence: 43.73%
  ICE confidence: 43.73%

Image 7874:
  True class: American alligator
  CLIP prediction: American alligator
  ICE prediction: American alligator
  Captions:
    a photo of: a photo of a bear in the woods
    a photo containing: a photo containing of a bear in the wild
    the obje

 80%|███████▉  | 125/157 [57:40<14:47, 27.73s/it]


--- Batch 124 examples ---

Image 7936:
  True class: centipede
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a small red flower on the ground
    a photo containing: a photo containing with the image of a red star
    the object in the photo is: the object in the photo is a small red object
  CLIP confidence: 24.49%
  ICE confidence: 24.49%

Image 7937:
  True class: school bus
  CLIP prediction: school bus
  ICE prediction: school bus
  Captions:
    a photo of: a photo of a yellow truck
    a photo containing: a photo containing the cover of the book
    the object in the photo is: the object in the photo is a yellow truck
  CLIP confidence: 82.86%
  ICE confidence: 87.99%

Image 7938:
  True class: slug
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a dog laying on the floor
    a photo containing: a photo containing the image of a dead fish
    the object in the photo is

 80%|████████  | 126/157 [58:08<14:23, 27.85s/it]


--- Batch 125 examples ---

Image 8000:
  True class: albatross
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a bald eagle
    a photo containing: a photo containing of a bald eagle
    the object in the photo is: the object in the photo is a bird
  CLIP confidence: 84.28%
  ICE confidence: 84.28%

Image 8001:
  True class: steel arch bridge
  CLIP prediction: steel arch bridge
  ICE prediction: steel arch bridge
  Captions:
    a photo of: a photo of a bridge
    a photo containing: a photo containing the bridge
    the object in the photo is: the object in the photo is a fish
  CLIP confidence: 91.60%
  ICE confidence: 91.75%

Image 8002:
  True class: tabby
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a cat sitting on a table
    a photo containing: a photo containing of a cat on a table
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 79.05%

 81%|████████  | 127/157 [58:35<13:51, 27.71s/it]


--- Batch 126 examples ---

Image 8064:
  True class: pop bottle
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a small dog with a blue shirt
    a photo containing: a photo containing of a dog with a blue vest
    the object in the photo is: the object in the photo is a toy
  CLIP confidence: 17.33%
  ICE confidence: 17.33%

Image 8065:
  True class: dragonfly
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a dragonfly
    a photo containing: a photo containing the dragonfly
    the object in the photo is: the object in the photo is a dragon
  CLIP confidence: 98.63%
  ICE confidence: 104.30%

Image 8066:
  True class: black stork
  CLIP prediction: black stork
  ICE prediction: black stork
  Captions:
    a photo of: a photo of a bird standing on a rock
    a photo containing: a photo containing the image of a bird on a rock
    the object in the photo is: the object in the photo is a bird
  C

 82%|████████▏ | 128/157 [59:01<13:09, 27.21s/it]


--- Batch 127 examples ---

Image 8128:
  True class: bee
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of a white flower with yellow center
    a photo containing: a photo containing the white flower
    the object in the photo is: the object in the photo is a white flower
  CLIP confidence: 94.68%
  ICE confidence: 96.14%

Image 8129:
  True class: lemon
  CLIP prediction: lemon
  ICE prediction: lemon
  Captions:
    a photo of: a photo of three donuts
    a photo containing: a photo containing the three apples
    the object in the photo is: the object in the photo is a dont
  CLIP confidence: 40.43%
  ICE confidence: 40.43%

Image 8130:
  True class: cockroach
  CLIP prediction: cockroach
  ICE prediction: cockroach
  Captions:
    a photo of: a photo of a spider crawling on a concrete floor
    a photo containing: a photo containing with a picture of a spider
    the object in the photo is: the object in the photo is a spider
  CLIP confidence:

 82%|████████▏ | 129/157 [59:28<12:40, 27.15s/it]


--- Batch 128 examples ---

Image 8192:
  True class: cliff
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a field with a red soil
    a photo containing: a photo containing the red soil of the red soil
    the object in the photo is: the object in the photo is a red mound
  CLIP confidence: 11.27%
  ICE confidence: 11.27%

Image 8193:
  True class: steel arch bridge
  CLIP prediction: steel arch bridge
  ICE prediction: steel arch bridge
  Captions:
    a photo of: a photo of the bridge that is being built
    a photo containing: a photo containing the bridge that was built in the 1930s
    the object in the photo is: the object in the photo is a large arch
  CLIP confidence: 84.28%
  ICE confidence: 86.87%

Image 8194:
  True class: koala
  CLIP prediction: spiny lobster
  ICE prediction: spiny lobster
  Captions:
    a photo of: a photo of a man in a room
    a photo containing: a photo containing a person in a room
    the object in t

 83%|████████▎ | 130/157 [59:56<12:15, 27.23s/it]


--- Batch 129 examples ---

Image 8256:
  True class: Labrador retriever
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a black bear in the woods
    a photo containing: a photo containing of a bear in the wild
    the object in the photo is: the object in the photo is a black bear
  CLIP confidence: 9.92%
  ICE confidence: 9.92%

Image 8257:
  True class: abacus
  CLIP prediction: abacus
  ICE prediction: abacus
  Captions:
    a photo of: a photo of a man in a blue shirt
    a photo containing: a photo containing the first page of the book, the first page of the book
    the object in the photo is: the object in the photo is a blue background
  CLIP confidence: 31.05%
  ICE confidence: 31.05%

Image 8258:
  True class: desk
  CLIP prediction: desk
  ICE prediction: desk
  Captions:
    a photo of: a photo of a computer
    a photo containing: a photo containing the legos
    the object in the photo is: the object in the photo is a computer
  CLIP c

 83%|████████▎ | 131/157 [1:00:24<11:56, 27.57s/it]


--- Batch 130 examples ---

Image 8320:
  True class: pomegranate
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a heart made out of red roses
    a photo containing: a photo containing with a heart made of red roses
    the object in the photo is: the object in the photo is a heart shaped object
  CLIP confidence: 93.02%
  ICE confidence: 93.02%

Image 8321:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of the team at the start of the race
    a photo containing: a photo containing the team of the men ' s team
    the object in the photo is: the object in the photo is a red and blue jacket
  CLIP confidence: 89.84%
  ICE confidence: 89.84%

Image 8322:
  True class: plate
  CLIP prediction: wooden spoon
  ICE prediction: wooden spoon
  Captions:
    a photo of: a photo of a watch with a brown dial
    a photo containing: a photo containing the watch


 84%|████████▍ | 132/157 [1:00:52<11:29, 27.60s/it]


--- Batch 131 examples ---

Image 8384:
  True class: sea cucumber
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a worm on the ground
    a photo containing: a photo containing of a black worm
    the object in the photo is: the object in the photo is a black worm
  CLIP confidence: 49.17%
  ICE confidence: 49.17%

Image 8385:
  True class: bucket
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a glass of water
    a photo containing: a photo containing the color of the orange
    the object in the photo is: the object in the photo is a glass
  CLIP confidence: 57.18%
  ICE confidence: 57.18%

Image 8386:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a bottle of liquid
    a photo containing: a photo containing the chemical process
    the object in the photo is: the object in the photo is a bottle
  CLIP confidence: 45.21%


 85%|████████▍ | 133/157 [1:01:19<11:01, 27.58s/it]


--- Batch 132 examples ---

Image 8448:
  True class: dam
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a bridge over a river
    a photo containing: a photo containing the water
    the object in the photo is: the object in the photo is a large white object
  CLIP confidence: 98.78%
  ICE confidence: 99.02%

Image 8449:
  True class: chain
  CLIP prediction: chain
  ICE prediction: chain
  Captions:
    a photo of: a photo of a man in a suit and tie
    a photo containing: a photo containing the image of a man in a suit
    the object in the photo is: the object in the photo is a man with a beard
  CLIP confidence: 49.56%
  ICE confidence: 49.56%

Image 8450:
  True class: European fire salamander
  CLIP prediction: European fire salamander
  ICE prediction: European fire salamander
  Captions:
    a photo of: a photo of a bird in the woods
    a photo containing: a photo containing of a black bear in the wild
    the object in the photo is: the 

 85%|████████▌ | 134/157 [1:01:47<10:34, 27.60s/it]


--- Batch 133 examples ---

Image 8512:
  True class: bighorn
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a mountain range
    a photo containing: a photo containing the mountains
    the object in the photo is: the object in the photo is a moose
  CLIP confidence: 36.82%
  ICE confidence: 39.21%

Image 8513:
  True class: fountain
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a city at night
    a photo containing: a photo containing the night scene of a lake
    the object in the photo is: the object in the photo is a fountain
  CLIP confidence: 97.85%
  ICE confidence: 97.95%

Image 8514:
  True class: turnstile
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a lobby with a large screen
    a photo containing: a photo containing the lobby of the new york international airport
    the object in the photo is: the object in the photo is a wh

 86%|████████▌ | 135/157 [1:02:14<10:03, 27.42s/it]


--- Batch 134 examples ---

Image 8576:
  True class: cauliflower
  CLIP prediction: cauliflower
  ICE prediction: cauliflower
  Captions:
    a photo of: a photo of a tennis ball in the dark
    a photo containing: a photo containing the image of a tennis ball
    the object in the photo is: the object in the photo is a tennis ball
  CLIP confidence: 62.30%
  ICE confidence: 62.30%

Image 8577:
  True class: goldfish
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a fish
    a photo containing: a photo containing the oranges
    the object in the photo is: the object in the photo is a fish
  CLIP confidence: 93.02%
  ICE confidence: 93.21%

Image 8578:
  True class: alp
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a person walking on a snowy path
    a photo containing: a photo containing the night sky
    the object in the photo is: the object in the photo is a plane
  CLIP confidence: 33.20%


 87%|████████▋ | 136/157 [1:02:41<09:29, 27.14s/it]


--- Batch 135 examples ---

Image 8640:
  True class: standard poodle
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a dog
    a photo containing: a photo containing of a white dog
    the object in the photo is: the object in the photo is a white dog
  CLIP confidence: 96.19%
  ICE confidence: 96.19%

Image 8641:
  True class: Arabian camel
  CLIP prediction: Arabian camel
  ICE prediction: Arabian camel
  Captions:
    a photo of: a photo of a camel and a dog
    a photo containing: a photo containing of a camel and a dog
    the object in the photo is: the object in the photo is a camel
  CLIP confidence: 99.32%
  ICE confidence: 104.69%

Image 8642:
  True class: pomegranate
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a red flower on a blue sky background
    a photo containing: a photo containing the poins
    the object in the photo is: the object in the photo is a red flower

 87%|████████▋ | 137/157 [1:03:10<09:15, 27.79s/it]


--- Batch 136 examples ---

Image 8704:
  True class: bikini
  CLIP prediction: bikini
  ICE prediction: bikini
  Captions:
    a photo of: a photo of a woman on a boat
    a photo containing: a photo containing of a woman on a boat
    the object in the photo is: the object in the photo is a sail
  CLIP confidence: 42.33%
  ICE confidence: 42.33%

Image 8705:
  True class: fur coat
  CLIP prediction: fur coat
  ICE prediction: fur coat
  Captions:
    a photo of: a photo of a man in a white coat
    a photo containing: a photo containing of a man in a white coat
    the object in the photo is: the object in the photo is a white coat
  CLIP confidence: 61.77%
  ICE confidence: 66.60%

Image 8706:
  True class: bikini
  CLIP prediction: bikini
  ICE prediction: bikini
  Captions:
    a photo of: a photo of a man standing on a baseball field
    a photo containing: a photo containing the person who was the first person to play in the game
    the object in the photo is: the object in th

 88%|████████▊ | 138/157 [1:03:40<08:58, 28.35s/it]


--- Batch 137 examples ---

Image 8768:
  True class: golden retriever
  CLIP prediction: golden retriever
  ICE prediction: golden retriever
  Captions:
    a photo of: a photo of a dog sitting on the grass
    a photo containing: a photo containing of a golden retrieve
    the object in the photo is: the object in the photo is a dog
  CLIP confidence: 79.10%
  ICE confidence: 85.30%

Image 8769:
  True class: plunger
  CLIP prediction: bannister
  ICE prediction: bannister
  Captions:
    a photo of: a photo of a room with a bed and a window
    a photo containing: a photo containing of a room with a bed and a window
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 25.83%
  ICE confidence: 25.83%

Image 8770:
  True class: cannon
  CLIP prediction: cannon
  ICE prediction: cannon
  Captions:
    a photo of: a photo of a group of people standing in a field
    a photo containing: a photo containing of a fire in a field
    the object in the photo is

 89%|████████▊ | 139/157 [1:04:07<08:23, 27.98s/it]


--- Batch 138 examples ---

Image 8832:
  True class: iPod
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a cell phone with a green and black cover
    a photo containing: a photo containing the new iphone 5
    the object in the photo is: the object in the photo is connected to the device
  CLIP confidence: 22.97%
  ICE confidence: 23.41%

Image 8833:
  True class: oboe
  CLIP prediction: oboe
  ICE prediction: oboe
  Captions:
    a photo of: a photo of a necklace with a black and white bea
    a photo containing: a photo containing the image of a person in a wheelchair
    the object in the photo is: the object in the photo is a small white object
  CLIP confidence: 93.36%
  ICE confidence: 93.36%

Image 8834:
  True class: brass
  CLIP prediction: computer keyboard
  ICE prediction: computer keyboard
  Captions:
    a photo of: a photo of a man in a boat
    a photo containing: a photo containing of the new york police station
    t

 89%|████████▉ | 140/157 [1:04:35<07:58, 28.17s/it]


--- Batch 139 examples ---

Image 8896:
  True class: tarantula
  CLIP prediction: tarantula
  ICE prediction: tarantula
  Captions:
    a photo of: a photo of a black and white spider
    a photo containing: a photo containing with the image of a black hole in the ground
    the object in the photo is: the object in the photo is a black hole
  CLIP confidence: 87.94%
  ICE confidence: 87.94%

Image 8897:
  True class: dining table
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a dining area with a table and chairs
    a photo containing: a photo containing of a room with a table and chairs
    the object in the photo is: the object in the photo is a large object
  CLIP confidence: 38.75%
  ICE confidence: 38.75%

Image 8898:
  True class: European fire salamander
  CLIP prediction: European fire salamander
  ICE prediction: European fire salamander
  Captions:
    a photo of: a photo of a field with a bird in the middle
    a

 90%|████████▉ | 141/157 [1:05:03<07:27, 27.96s/it]


--- Batch 140 examples ---

Image 8960:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a train with a sign on it
    a photo containing: a photo containing of the scene of the scene in the film
    the object in the photo is: the object in the photo is a train
  CLIP confidence: 96.39%
  ICE confidence: 96.39%

Image 8961:
  True class: go-kart
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a man riding a motorcycle
    a photo containing: a photo containing of a man riding a motorcycle
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 97.51%
  ICE confidence: 97.51%

Image 8962:
  True class: moving van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a white truck parked in a parking lot
    a photo containing: a photo containing of a white truck with a white trailer
    the object 

 90%|█████████ | 142/157 [1:05:30<06:57, 27.81s/it]


--- Batch 141 examples ---

Image 9024:
  True class: pretzel
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a dog
    a photo containing: a photo containing of a dog in a boat
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 24.60%
  ICE confidence: 24.60%

Image 9025:
  True class: boa constrictor
  CLIP prediction: boa constrictor
  ICE prediction: boa constrictor
  Captions:
    a photo of: a photo of a woman with a snake on her neck
    a photo containing: a photo containing the gold ring found in the gold ring found in the gold ring found in the gold ring found in the gold ring found in the gold ring
    the object in the photo is: the object in the photo is a snake
  CLIP confidence: 97.02%
  ICE confidence: 97.02%

Image 9026:
  True class: brown bear
  CLIP prediction: brown bear
  ICE prediction: brown bear
  Captions:
    a photo of: a photo of a bear walking across a road
    a photo containing: 

 91%|█████████ | 143/157 [1:05:58<06:30, 27.87s/it]


--- Batch 142 examples ---

Image 9088:
  True class: orange
  CLIP prediction: orange
  ICE prediction: orange
  Captions:
    a photo of: a photo of a slice of orange
    a photo containing: a photo containing the orange
    the object in the photo is: the object in the photo is a orange
  CLIP confidence: 81.15%
  ICE confidence: 87.45%

Image 9089:
  True class: fountain
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a rainbow fountain in a park
    a photo containing: a photo containing a rainbow
    the object in the photo is: the object in the photo is a rainbow
  CLIP confidence: 63.04%
  ICE confidence: 68.51%

Image 9090:
  True class: sandal
  CLIP prediction: king penguin
  ICE prediction: king penguin
  Captions:
    a photo of: a photo of two penguins on a yellow field
    a photo containing: a photo containing the cover of the album ' the best of the best '
    the object in the photo is: the object in the photo is a black 

 92%|█████████▏| 144/157 [1:06:27<06:07, 28.26s/it]


--- Batch 143 examples ---

Image 9152:
  True class: parking meter
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a blue and white building
    a photo containing: a photo containing the image of a man in a suit
    the object in the photo is: the object in the photo is a blue vase
  CLIP confidence: 50.29%
  ICE confidence: 50.29%

Image 9153:
  True class: wok
  CLIP prediction: wok
  ICE prediction: wok
  Captions:
    a photo of: a photo of a skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill
    a photo containing: a photo containing the recipe of the chicken and rice cass
    the object in the photo is: the object in the photo is a pan of food
  CLIP confidence: 78.52%
  ICE confidence: 78.52%

Image 9154:
  True class: German shepherd
  CLIP prediction: German shepherd
  ICE prediction: German shepherd
  Captions:
    a photo of: a photo of a dog playing with a ball
    a photo 

 92%|█████████▏| 145/157 [1:06:54<05:33, 27.80s/it]


--- Batch 144 examples ---

Image 9216:
  True class: hog
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a wolf in the woods
    a photo containing: a photo containing of a wolf
    the object in the photo is: the object in the photo is a wolf
  CLIP confidence: 59.62%
  ICE confidence: 59.67%

Image 9217:
  True class: go-kart
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a white and black atv
    a photo containing: a photo containing of a polarcat atv
    the object in the photo is: the object in the photo is a small white car
  CLIP confidence: 64.01%
  ICE confidence: 64.01%

Image 9218:
  True class: sock
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a table with a bunch of flowers
    a photo containing: a photo containing the same image
    the object in the photo is: the object in the photo is a cat
  CLIP confidence: 19.96%
  ICE confidence: 19.96%



 93%|█████████▎| 146/157 [1:07:21<05:04, 27.65s/it]


--- Batch 145 examples ---

Image 9280:
  True class: cash machine
  CLIP prediction: cash machine
  ICE prediction: cash machine
  Captions:
    a photo of: a photo of a red and white bathroom
    a photo containing: a photo containing the red and white color of a kitchen
    the object in the photo is: the object in the photo is a red object
  CLIP confidence: 43.99%
  ICE confidence: 43.99%

Image 9281:
  True class: lion
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a dog playing with a ball
    a photo containing: a photo containing of a lion cub
    the object in the photo is: the object in the photo is a dog
  CLIP confidence: 33.08%
  ICE confidence: 33.08%

Image 9282:
  True class: guacamole
  CLIP prediction: guacamole
  ICE prediction: guacamole
  Captions:
    a photo of: a photo of a plate of food with brocco
    a photo containing: a photo containing of a plate of fried fried green vegetables
    the object in the 

 94%|█████████▎| 147/157 [1:07:50<04:38, 27.82s/it]


--- Batch 146 examples ---

Image 9344:
  True class: jinrikisha
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a man pushing a cart
    a photo containing: a photo containing of a man pushing a cart
    the object in the photo is: the object in the photo is a man pushing a cart
  CLIP confidence: 36.60%
  ICE confidence: 36.60%

Image 9345:
  True class: lemon
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a stack of cookies on a table
    a photo containing: a photo containing the image of a stack of oranges
    the object in the photo is: the object in the photo is a cake
  CLIP confidence: 18.91%
  ICE confidence: 24.69%

Image 9346:
  True class: bighorn
  CLIP prediction: bighorn
  ICE prediction: bighorn
  Captions:
    a photo of: a photo of a herd of sheep in the snow
    a photo containing: a photo containing the image of a herd of sheep
    the object in the photo is: the obj

 94%|█████████▍| 148/157 [1:08:18<04:10, 27.88s/it]


--- Batch 147 examples ---

Image 9408:
  True class: gondola
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a street with a car parked in the middle
    a photo containing: a photo containing the cover of the album ' the best of the best '
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 42.75%
  ICE confidence: 42.75%

Image 9409:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a small white bird in the woods
    a photo containing: a photo containing with the image of a man in a suit
    the object in the photo is: the object in the photo is a small white object
  CLIP confidence: 45.09%
  ICE confidence: 45.09%

Image 9410:
  True class: dining table
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a restaurant with a table and chairs
    a photo containing: a photo conta

 95%|█████████▍| 149/157 [1:08:45<03:40, 27.58s/it]


--- Batch 148 examples ---

Image 9472:
  True class: mushroom
  CLIP prediction: mushroom
  ICE prediction: mushroom
  Captions:
    a photo of: a photo of a tree with a green background
    a photo containing: a photo containing the cover of the book, the garden of the dead
    the object in the photo is: the object in the photo is a tree
  CLIP confidence: 85.89%
  ICE confidence: 85.89%

Image 9473:
  True class: plate
  CLIP prediction: guacamole
  ICE prediction: guacamole
  Captions:
    a photo of: a photo of a table with plates and bowls
    a photo containing: a photo containing the food
    the object in the photo is: the object in the photo is a plate
  CLIP confidence: 52.78%
  ICE confidence: 52.78%

Image 9474:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police van and a police car
    a photo containing: a photo containing of the crash scene at the scene of the crash
    the object in the 

 96%|█████████▌| 150/157 [1:09:12<03:13, 27.64s/it]


--- Batch 149 examples ---

Image 9536:
  True class: sock
  CLIP prediction: swimming trunks
  ICE prediction: swimming trunks
  Captions:
    a photo of: a photo of a man in a red shirt and blue shorts
    a photo containing: a photo containing of a man in a red shirt and blue shorts
    the object in the photo is: the object in the photo is a soccer ball
  CLIP confidence: 30.03%
  ICE confidence: 36.01%

Image 9537:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a castle with a dark sky in the background
    a photo containing: a photo containing the image of the ruins of the ancient city of ural
    the object in the photo is: the object in the photo is a black and white image of a building
  CLIP confidence: 39.09%
  ICE confidence: 39.09%

Image 9538:
  True class: cockroach
  CLIP prediction: centipede
  ICE prediction: centipede
  Captions:
    a photo of: a photo of a bee
    a photo cont

 96%|█████████▌| 151/157 [1:09:40<02:46, 27.76s/it]


--- Batch 150 examples ---

Image 9600:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bison grazing in a field
    a photo containing: a photo containing of a bison grazing in a field
    the object in the photo is: the object in the photo is a bison
  CLIP confidence: 90.04%
  ICE confidence: 96.04%

Image 9601:
  True class: freight car
  CLIP prediction: freight car
  ICE prediction: freight car
  Captions:
    a photo of: a photo of a truck with a trailer in the background
    a photo containing: a photo containing of a truck with a trailer behind it
    the object in the photo is: the object in the photo is a blue truck
  CLIP confidence: 84.08%
  ICE confidence: 84.13%

Image 9602:
  True class: viaduct
  CLIP prediction: viaduct
  ICE prediction: viaduct
  Captions:
    a photo of: a photo of a mountain with a lake
    a photo containing: a photo containing the view of the mountains
    the object in the photo is: 

 97%|█████████▋| 152/157 [1:10:08<02:18, 27.61s/it]


--- Batch 151 examples ---

Image 9664:
  True class: koala
  CLIP prediction: koala
  ICE prediction: koala
  Captions:
    a photo of: a photo of a dog running in the dirt
    a photo containing: a photo containing of a dog in a yard
    the object in the photo is: the object in the photo is a white dog
  CLIP confidence: 44.82%
  ICE confidence: 44.82%

Image 9665:
  True class: torch
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a man in a red and white outfit
    a photo containing: a photo containing the red cross
    the object in the photo is: the object in the photo is a car
  CLIP confidence: 51.56%
  ICE confidence: 51.56%

Image 9666:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a boat in the water
    a photo containing: a photo containing a white and blue boat
    the object in the photo is: the object in the photo is a boat
  CLIP confide

 97%|█████████▋| 153/157 [1:10:35<01:49, 27.45s/it]


--- Batch 152 examples ---

Image 9728:
  True class: crane
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a statue in the middle of a park
    a photo containing: a photo containing the statue of liberty
    the object in the photo is: the object in the photo is a large blue object
  CLIP confidence: 38.09%
  ICE confidence: 38.09%

Image 9729:
  True class: teapot
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a teapot with a handle
    a photo containing: a photo containing the teapot
    the object in the photo is: the object in the photo is a teapot
  CLIP confidence: 41.31%
  ICE confidence: 41.31%

Image 9730:
  True class: lakeside
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a tree in the middle of a field
    a photo containing: a photo containing the image of a tree in the middle of a lake
    the object in the photo is: the object in the ph

 98%|█████████▊| 154/157 [1:11:01<01:21, 27.16s/it]


--- Batch 153 examples ---

Image 9792:
  True class: American alligator
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a bird in the grass
    a photo containing: a photo containing of a bear in the wild
    the object in the photo is: the object in the photo is a bird
  CLIP confidence: 9.06%
  ICE confidence: 9.06%

Image 9793:
  True class: computer keyboard
  CLIP prediction: computer keyboard
  ICE prediction: computer keyboard
  Captions:
    a photo of: a photo of a keyboard with a keyboard key
    a photo containing: a photo containing the new keyboard
    the object in the photo is: the object in the photo is a keyboard
  CLIP confidence: 95.85%
  ICE confidence: 101.66%

Image 9794:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a swan swimming in the water
    a photo containing: a photo containing of a swan and a swan
    the object in the photo is: the object i

 99%|█████████▊| 155/157 [1:11:30<00:55, 27.71s/it]


--- Batch 154 examples ---

Image 9856:
  True class: oboe
  CLIP prediction: oboe
  ICE prediction: oboe
  Captions:
    a photo of: a photo of a woman playing a drum
    a photo containing: a photo containing of a woman playing a drum
    the object in the photo is: the object in the photo is a red object
  CLIP confidence: 54.10%
  ICE confidence: 54.10%

Image 9857:
  True class: spider web
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a black and white image of a black and white image of a black and white image of a black and white image of a
    a photo containing: a photo containing the image of the moon
    the object in the photo is: the object in the photo is a black and white image
  CLIP confidence: 7.38%
  ICE confidence: 7.38%

Image 9858:
  True class: computer keyboard
  CLIP prediction: computer keyboard
  ICE prediction: computer keyboard
  Captions:
    a photo of: a photo of a city at night
    a photo contain

 99%|█████████▉| 156/157 [1:11:58<00:27, 27.85s/it]


--- Batch 155 examples ---

Image 9920:
  True class: flagpole
  CLIP prediction: flagpole
  ICE prediction: flagpole
  Captions:
    a photo of: a photo of a kite flying in the sky
    a photo containing: a photo containing the sun and a kite
    the object in the photo is: the object in the photo is a kite
  CLIP confidence: 89.60%
  ICE confidence: 89.60%

Image 9921:
  True class: black widow
  CLIP prediction: black widow
  ICE prediction: black widow
  Captions:
    a photo of: a photo of a black and white dog
    a photo containing: a photo containing of a black - footed hawk
    the object in the photo is: the object in the photo is a small bird
  CLIP confidence: 77.20%
  ICE confidence: 77.20%

Image 9922:
  True class: steel arch bridge
  CLIP prediction: steel arch bridge
  ICE prediction: steel arch bridge
  Captions:
    a photo of: a photo of a bridge over a body of water
    a photo containing: a photo containing the bridge over the river
    the object in the photo is

100%|██████████| 157/157 [1:12:05<00:00, 27.55s/it]


--- Batch 156 examples ---

Image 9984:
  True class: chain
  CLIP prediction: boa constrictor
  ICE prediction: boa constrictor
  Captions:
    a photo of: a photo of a man in a wet suit
    a photo containing: a photo containing of a man in a swimming suit
    the object in the photo is: the object in the photo is a shark
  CLIP confidence: 15.65%
  ICE confidence: 15.65%

Image 9985:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a cake with candles on it
    a photo containing: a photo containing the cover of the book the dark side
    the object in the photo is: the object in the photo is a candle
  CLIP confidence: 74.02%
  ICE confidence: 80.42%

Image 9986:
  True class: jellyfish
  CLIP prediction: jellyfish
  ICE prediction: jellyfish
  Captions:
    a photo of: a photo of a blue jellyfish in the ocean
    a photo containing: a photo containing with the image of a blue jellyfish
    the object in the photo is: t


