In [2]:
!pip install git+https://github.com/openai/CLIP.git



Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-6n9lekho
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-6n9lekho
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->clip==1.0)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->clip==1.0)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->clip==1.0)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (

In [16]:
def fine_tune_clip(clip_model, train_loader, val_loader, classnames, num_epochs=10):
    # 1. Model Preparation
    # Freeze all layers except the projection layers
    for name, param in clip_model.named_parameters():
        if 'visual.proj' not in name and 'text_projection' not in name:
            param.requires_grad = False
        else:
            print(f"Training layer: {name}")

    # 2. Optimizer Configuration
    optimizer = torch.optim.AdamW(
        filter(lambda p: p.requires_grad, clip_model.parameters()),
        lr=1e-6,  # Very conservative learning rate
        betas=(0.9, 0.98),
        eps=1e-6,
        weight_decay=0.01
    )
    
    # 3. Learning Rate Scheduling with Warmup
    warmup_steps = 500
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lambda step: min(1.0, step / warmup_steps)
    )
    
    criterion = torch.nn.CrossEntropyLoss()
    
    # 4. Text Input Preparation
    with torch.no_grad():
        text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in classnames]).to(device)
        text_features = clip_model.encode_text(text_inputs)
        text_features = text_features / text_features.norm(dim=1, keepdim=True)
    
    # 5. CLIP Normalization Parameters
    clip_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073], device=device).view(1, 3, 1, 1)
    clip_std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=device).view(1, 3, 1, 1)
    
    # 6. Training Loop with Stability Checks
    best_acc = 0
    
    for epoch in range(num_epochs):
        clip_model.train()
        train_loss = 0
        grad_norms = []
        
        for batch_idx, (images, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}")):
            images, labels = images.to(device), labels.to(device)
            
            # Apply CLIP normalization with numerical stability checks
            images = (images - clip_mean) / clip_std
            if torch.isnan(images).any():
                print("NaN detected in images after normalization!")
                continue
            
            # Forward pass with stability checks
            optimizer.zero_grad()
            
            image_features = clip_model.encode_image(images)
            if torch.isnan(image_features).any():
                print("NaN in image features!")
                continue
                
            image_features = image_features / image_features.norm(dim=1, keepdim=True)
            
            # Use pre-computed text features
            logit_scale = clip_model.logit_scale.exp().clamp(max=100)  # Clamp to prevent overflow
            logits = logit_scale * image_features @ text_features.T
            
            if torch.isnan(logits).any():
                print(f"NaN in logits! Scale: {logit_scale.item()}")
                continue
                
            loss = criterion(logits, labels)
            
            if torch.isnan(loss):
                print("NaN loss detected!")
                continue
                
            # Backward pass with gradient monitoring
            loss.backward()
            
            # Gradient clipping and monitoring
            grad_norm = torch.nn.utils.clip_grad_norm_(
                filter(lambda p: p.requires_grad, clip_model.parameters()),
                max_norm=1.0
            )
            grad_norms.append(grad_norm.item())
            
            if torch.isnan(grad_norm):
                print("NaN gradients detected!")
                optimizer.zero_grad()
                continue
                
            optimizer.step()
            scheduler.step()
            
            train_loss += loss.item()
            
            # Debug prints every 100 batches
            if batch_idx % 100 == 0:
                print(f"\nBatch {batch_idx}:")
                print(f"  Loss: {loss.item():.4f}")
                print(f"  Grad norm: {grad_norm:.4f}")
                print(f"  Logit scale: {logit_scale.item():.4f}")
                print(f"  Max logit: {logits.max().item():.4f}")
                print(f"  Min logit: {logits.min().item():.4f}")
        
        avg_train_loss = train_loss / len(train_loader)
        avg_grad_norm = sum(grad_norms) / len(grad_norms)
        print(f"\nEpoch {epoch+1}:")
        print(f"  Avg Train Loss: {avg_train_loss:.4f}")
        print(f"  Avg Grad Norm: {avg_grad_norm:.4f}")
        
        # Validation
        val_acc = validate_clip(clip_model, val_loader, text_features)
        print(f"  Val Acc: {val_acc:.2f}%")
        
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(clip_model.state_dict(), "best_clip_model.pth")
    
    return clip_model

def validate_clip(clip_model, val_loader, text_features):
    clip_model.eval()
    correct = 0
    total = 0
    
    clip_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073], device=device).view(1, 3, 1, 1)
    clip_std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=device).view(1, 3, 1, 1)
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            images = (images - clip_mean) / clip_std
            
            image_features = clip_model.encode_image(images)
            image_features = image_features / image_features.norm(dim=1, keepdim=True)
            
            logit_scale = clip_model.logit_scale.exp().clamp(max=100)
            logits = logit_scale * image_features @ text_features.T
            
            _, predicted = torch.max(logits, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    
    return 100 * correct / total

In [9]:
def fine_tune_blip(blip_model, blip_processor, train_loader, num_epochs=3, classnames):
    optimizer = torch.optim.AdamW(blip_model.parameters(), lr=5e-5)
    best_loss = float('inf')
    
    for epoch in range(num_epochs):
        print("Blip epoch:", epoch, num_epochs)
        blip_model.train()
        epoch_loss = 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images = images.to(device)
            
            # Convert to PIL images
            pil_images = []
            for img in images:
                np_img = img.cpu().numpy().transpose(1, 2, 0)
                np_img = np.clip(np_img, 0, 1)
                pil_img = Image.fromarray((np_img * 255).astype(np.uint8))
                pil_images.append(pil_img)
            
            # Create inputs - using classnames as prompts
            inputs = blip_processor(
                images=pil_images, 
                text=[f"a photo of a {classnames[l]}" for l in labels],
                return_tensors="pt",
                padding=True,
                truncation=True
            ).to(device)
            
            # Forward pass
            outputs = blip_model(**inputs)
            loss = outputs.loss
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        avg_loss = epoch_loss / len(train_loader)
        print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}")
        
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(blip_model.state_dict(), "best_blip_model.pth")
    
    return blip_model

In [17]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import clip
from transformers import BlipProcessor, BlipForConditionalGeneration
import numpy as np
from tqdm import tqdm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class TinyImageNet(Dataset):
    def __init__(self, root, split='val', transform=None):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.split = split
        
        if split == 'train':
            self.image_paths = []
            self.labels = []
            
            # Process train directory which has subdirectories by class
            train_dir = os.path.join(self.root, 'train')
            self.classes = sorted(os.listdir(train_dir))
            self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}
            
            for class_name in self.classes:
                class_dir = os.path.join(train_dir, class_name, 'images')
                if not os.path.isdir(class_dir):
                    continue
                    
                for img_name in os.listdir(class_dir):
                    if img_name.endswith('.JPEG'):
                        self.image_paths.append(os.path.join(class_dir, img_name))
                        self.labels.append(self.class_to_idx[class_name])
                        
        elif split == 'val':
            self.image_paths = []
            self.labels = []
            
            # Process val directory
            val_dir = os.path.join(self.root, 'val')
            images_dir = os.path.join(val_dir, 'images')
            
            # Read val annotations
            val_annotations_file = os.path.join(val_dir, 'val_annotations.txt')
            self.classes = []
            self.class_to_idx = {}
            
            with open(val_annotations_file, 'r') as f:
                for line in f:
                    parts = line.strip().split('\t')
                    img_name, class_id = parts[0], parts[1]
                    
                    if class_id not in self.class_to_idx:
                        self.classes.append(class_id)
                        self.class_to_idx[class_id] = len(self.classes) - 1
                    
                    self.image_paths.append(os.path.join(images_dir, img_name))
                    self.labels.append(self.class_to_idx[class_id])

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
            
        return img, label

def get_classnames(dataset, words_file_path):
    """Get formatted class names from dataset using WordNet mappings."""
    if hasattr(dataset, 'classes'):
        # Load WordNet ID to name mapping
        wordnet_map = {}
        with open(words_file_path, 'r') as f:
            for line in f:
                parts = line.strip().split('\t')
                if len(parts) == 2:
                    wordnet_id, names = parts
                    # Take the first name if there are multiple comma-separated ones
                    name = names.split(',')[0].strip()
                    wordnet_map[wordnet_id] = name
        
        # Replace WordNet IDs with human-readable names
        classnames = []
        for c in dataset.classes:
            if c in wordnet_map:
                classnames.append(wordnet_map[c])
            else:
                # Try without 'n' prefix if not found directly
                if c.startswith('n') and c[1:] in wordnet_map:
                    classnames.append(wordnet_map[c[1:]])
                else:
                    # Fallback to the original ID
                    classnames.append(c)
        
        return classnames
    return None

def prepare_dataset(root_dir):
    """Prepare and return the Tiny ImageNet dataset with appropriate transforms."""
    # Basic transform without normalization for dataset loading
    basic_transform = transforms.Compose([
        transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
    
    # Create dataset instance
    val_dataset = TinyImageNet(root_dir, split='val', transform=basic_transform)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
    
    return val_dataset, val_loader
    
def generate_captions(blip_model, blip_processor, images, prompts):
    """Generate captions for the given images using BLIP model."""
    all_captions = []
    
    # Create PIL images from tensors
    pil_images = []
    for img in images:
        # Convert tensor to PIL image
        # Ensure values are within 0-1 range
        np_img = img.cpu().numpy().transpose(1, 2, 0)
        np_img = np.clip(np_img, 0, 1)
        pil_img = Image.fromarray((np_img * 255).astype(np.uint8))
        pil_images.append(pil_img)
    
    # Process images with BLIP
    batch_size = len(pil_images)
    with torch.no_grad():
        for prompt in prompts:
            batch_captions = []
            
            # Process each image individually
            for i in range(batch_size):
                inputs = blip_processor(images=pil_images[i], text=prompt, return_tensors="pt").to(device)
                generated_ids = blip_model.generate(**inputs, max_length=50)
                caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
                batch_captions.append(caption)
                
            all_captions.append(batch_captions)
    
    return all_captions


def compute_image_embeddings(clip_model, images):
    """Compute CLIP image embeddings with proper normalization."""
    # Apply CLIP-specific normalization
    clip_mean = torch.tensor((0.48145466, 0.4578275, 0.40821073), device=images.device).view(1, 3, 1, 1)
    clip_std = torch.tensor((0.26862954, 0.26130258, 0.27577711), device=images.device).view(1, 3, 1, 1)
    normalized_images = (images - clip_mean) / clip_std
    
    with torch.no_grad():
        image_features = clip_model.encode_image(normalized_images)
        image_features = image_features / image_features.norm(dim=1, keepdim=True)
    return image_features

def compute_text_embeddings(clip_model, texts):
    """Compute CLIP text embeddings."""
    with torch.no_grad():
        text_features = clip_model.encode_text(texts)
        text_features = text_features / text_features.norm(dim=1, keepdim=True)
    return text_features

def image_caption_encoding(image_probs, caption_probs, K=5, xi=0.08, epsilon=1e-12):
    """
    Implement ICE method as described in the paper.
    
    Args:
        image_probs: Probability distribution from image embeddings
        caption_probs: Probability distribution from caption embeddings
        K: Number of top classes to consider
        xi: Scaling factor for lambda
        epsilon: Small constant to avoid division by zero
        
    Returns:
        Final probability distribution after ICE
    """
    # Find top K classes based on image probabilities
    topk_values, topk_indices = torch.topk(image_probs, k=K, dim=1)
    
    # Extract probabilities for top K classes
    image_topk_probs = torch.gather(image_probs, 1, topk_indices)
    caption_topk_probs = torch.gather(caption_probs, 1, topk_indices)
    
    # Compute standard deviation for confidence selection
    image_std = torch.std(image_topk_probs, dim=1, keepdim=True)
    caption_std = torch.std(caption_topk_probs, dim=1, keepdim=True)
    
    # Compute lambda based on equation 3 in the paper
    norm = torch.maximum(torch.norm(torch.cat([image_std, caption_std], dim=1), dim=1, keepdim=True), 
                         torch.tensor(epsilon, device=device))
    
    lambda_values = xi * (caption_std / norm)
    
    # Combine image and caption probabilities
    combined_topk_probs = image_topk_probs + lambda_values * caption_topk_probs
    
    # Create new probability distribution
    output_probs = torch.zeros_like(image_probs)
    output_probs.scatter_(1, topk_indices, combined_topk_probs)
    
    return output_probs

def evaluate_model(clip_model, blip_model, blip_processor, val_loader, classnames, caption_prompts):
    """Evaluate the model on the validation set."""
    text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in classnames]).to(device)
    text_features = compute_text_embeddings(clip_model, text_inputs)
    
    total = 0
    correct_base = 0
    correct_ice = 0
    
    # Print sample outputs for the first few batches only
    print_samples = 200
    sample_count = 0
    
    for batch_idx, (images, labels) in enumerate(tqdm(val_loader)):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = images.shape[0]
        total += batch_size
        
        # Compute image embeddings and probabilities
        image_features = compute_image_embeddings(clip_model, images)
        image_logits = 100.0 * image_features @ text_features.T
        image_probs = F.softmax(image_logits, dim=1)
        
        # Generate captions using BLIP
        batch_captions = generate_captions(blip_model, blip_processor, images, caption_prompts)
        
        # Process captions
        caption_features_list = []
        for prompt_captions in batch_captions:
            caption_tokens = clip.tokenize(prompt_captions).to(device)
            caption_features = compute_text_embeddings(clip_model, caption_tokens)
            caption_features_list.append(caption_features)
        
        # Compute centroid of caption embeddings
        caption_features = torch.stack(caption_features_list).mean(dim=0)
        
        # Compute caption probabilities
        caption_logits = 100.0 * caption_features @ text_features.T
        caption_probs = F.softmax(caption_logits, dim=1)
        
        # Apply ICE
        ice_probs = image_caption_encoding(image_probs, caption_probs)
        
        # Compute predictions
        _, base_preds = torch.max(image_probs, 1)
        _, ice_preds = torch.max(ice_probs, 1)
        
        # Update metrics
        correct_base += (base_preds == labels).sum().item()
        correct_ice += (ice_preds == labels).sum().item()
        
        # Print examples for debugging
        if batch_idx < print_samples:
            print(f"\n--- Batch {batch_idx} examples ---")
            # Print top 5 examples from the batch
            for i in range(min(5, batch_size)):
                true_label = labels[i].item()
                base_pred = base_preds[i].item()
                ice_pred = ice_preds[i].item()
                
                print(f"\nImage {sample_count + i}:")
                print(f"  True class: {classnames[true_label]}")
                print(f"  CLIP prediction: {classnames[base_pred]}")
                print(f"  ICE prediction: {classnames[ice_pred]}")
                
                # Print captions from all prompts
                print("  Captions:")
                for j, prompt in enumerate(caption_prompts):
                    print(f"    {prompt}: {batch_captions[j][i]}")
                
                # Print confidence scores for predictions
                base_conf = image_probs[i, base_pred].item() * 100
                ice_conf = ice_probs[i, ice_pred].item() * 100
                print(f"  CLIP confidence: {base_conf:.2f}%")
                print(f"  ICE confidence: {ice_conf:.2f}%")
            
            sample_count += batch_size
    
    base_acc = 100 * correct_base / total
    ice_acc = 100 * correct_ice / total
    
    return base_acc, ice_acc

def main():
    tiny_imagenet_path = '/kaggle/input/tiny-imagenet/tiny-imagenet-200'

    
    # Prepare datasets
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
    
    train_dataset = TinyImageNet(tiny_imagenet_path, split='train', transform=train_transform)
    val_dataset = TinyImageNet(tiny_imagenet_path, split='val', transform=val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
        
    # Load models
    clip_model, _ = clip.load("ViT-B/32", device=device)
    blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
    classnames = get_classnames(val_dataset, '/kaggle/input/tiny-imagenet/tiny-imagenet-200/words.txt')

    # Fine-tune models
    print("Fine-tuning CLIP...")
    clip_model = fine_tune_clip(clip_model, train_loader, val_loader, classnames)
    
    # Evaluate with fine-tuned models
    caption_prompts = # Dataset-specific prompts for BLIP
caption_prompts = [
    "Describe the specific object in this image for fine-grained classification",
    "What is the main object in this image? Include details like color, shape, and texture",
    "Identify this object precisely"
]
    
    print("\nEvaluating with fine-tuned models...")
    base_acc, ice_acc = evaluate_model(
        clip_model, blip_model, blip_processor, val_loader, classnames, caption_prompts
    )
    
    print(f"Base CLIP Accuracy: {base_acc:.2f}%")
    print(f"ICE Accuracy: {ice_acc:.2f}%")
    print(f"Improvement: {ice_acc - base_acc:.2f}%")
if __name__ == "__main__":
    main()

Using device: cuda
Fine-tuning CLIP...
Training layer: text_projection
Training layer: visual.proj


Epoch 1:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 8.2031
  Grad norm: 24.5781
  Logit scale: 100.0000
  Max logit: 32.6875
  Min logit: 9.2578


Epoch 1:   7%|▋         | 103/1563 [00:09<02:03, 11.85it/s]


Batch 100:
  Loss: 8.2188
  Grad norm: 24.8750
  Logit scale: 100.0000
  Max logit: 31.8750
  Min logit: 9.6953


Epoch 1:  13%|█▎        | 203/1563 [00:18<01:54, 11.85it/s]


Batch 200:
  Loss: 7.7500
  Grad norm: 21.7188
  Logit scale: 100.0000
  Max logit: 32.0938
  Min logit: 10.5391


Epoch 1:  19%|█▉        | 303/1563 [00:27<01:44, 12.00it/s]


Batch 300:
  Loss: 7.9570
  Grad norm: 22.2812
  Logit scale: 100.0000
  Max logit: 33.0625
  Min logit: 10.6328


Epoch 1:  26%|██▌       | 403/1563 [00:35<01:37, 11.93it/s]


Batch 400:
  Loss: 8.2734
  Grad norm: 23.3906
  Logit scale: 100.0000
  Max logit: 30.8750
  Min logit: 7.4648


Epoch 1:  32%|███▏      | 503/1563 [00:43<01:30, 11.76it/s]


Batch 500:
  Loss: 8.2188
  Grad norm: 23.5156
  Logit scale: 100.0000
  Max logit: 31.6562
  Min logit: 8.3281


Epoch 1:  39%|███▊      | 603/1563 [00:52<01:18, 12.20it/s]


Batch 600:
  Loss: 7.4648
  Grad norm: 20.8438
  Logit scale: 100.0000
  Max logit: 33.7500
  Min logit: 10.8672


Epoch 1:  45%|████▍     | 703/1563 [01:01<01:12, 11.80it/s]


Batch 700:
  Loss: 8.0938
  Grad norm: 22.5156
  Logit scale: 100.0000
  Max logit: 31.4375
  Min logit: 8.5391


Epoch 1:  51%|█████▏    | 803/1563 [01:10<01:02, 12.14it/s]


Batch 800:
  Loss: 7.8828
  Grad norm: 21.5469
  Logit scale: 100.0000
  Max logit: 32.2188
  Min logit: 8.8281


Epoch 1:  58%|█████▊    | 903/1563 [01:19<00:54, 12.16it/s]


Batch 900:
  Loss: 7.9531
  Grad norm: 20.1250
  Logit scale: 100.0000
  Max logit: 34.0312
  Min logit: 9.3906


Epoch 1:  64%|██████▍   | 1003/1563 [01:27<00:46, 12.14it/s]


Batch 1000:
  Loss: 7.6250
  Grad norm: 20.4844
  Logit scale: 100.0000
  Max logit: 32.2812
  Min logit: 10.4688


Epoch 1:  71%|███████   | 1103/1563 [01:36<00:38, 11.93it/s]


Batch 1100:
  Loss: 7.9570
  Grad norm: 20.4062
  Logit scale: 100.0000
  Max logit: 34.3438
  Min logit: 8.8672


Epoch 1:  77%|███████▋  | 1203/1563 [01:45<00:31, 11.29it/s]


Batch 1200:
  Loss: 7.6602
  Grad norm: 23.2812
  Logit scale: 100.0000
  Max logit: 32.5938
  Min logit: 10.7500


Epoch 1:  83%|████████▎ | 1303/1563 [01:54<00:26,  9.79it/s]


Batch 1300:
  Loss: 7.7109
  Grad norm: 21.1719
  Logit scale: 100.0000
  Max logit: 30.7500
  Min logit: 10.0234


Epoch 1:  90%|████████▉ | 1403/1563 [02:03<00:13, 11.69it/s]


Batch 1400:
  Loss: 7.5273
  Grad norm: 21.7188
  Logit scale: 100.0000
  Max logit: 30.2812
  Min logit: 6.7266


Epoch 1:  96%|█████████▌| 1503/1563 [02:12<00:05, 11.55it/s]


Batch 1500:
  Loss: 7.9102
  Grad norm: 21.0000
  Logit scale: 100.0000
  Max logit: 30.8594
  Min logit: 6.7773


Epoch 1: 100%|██████████| 1563/1563 [02:17<00:00, 11.38it/s]


Epoch 1:
  Avg Train Loss: 7.8429
  Avg Grad Norm: 22.3771



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.62%


Epoch 2:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 7.9570
  Grad norm: 22.3750
  Logit scale: 100.0000
  Max logit: 32.4062
  Min logit: 10.2656


Epoch 2:   7%|▋         | 103/1563 [00:08<01:57, 12.43it/s]


Batch 100:
  Loss: 7.8594
  Grad norm: 21.8906
  Logit scale: 100.0000
  Max logit: 31.0156
  Min logit: 9.1172


Epoch 2:  13%|█▎        | 203/1563 [00:16<01:48, 12.49it/s]


Batch 200:
  Loss: 7.7539
  Grad norm: 25.2188
  Logit scale: 100.0000
  Max logit: 30.8906
  Min logit: 8.7500


Epoch 2:  19%|█▉        | 303/1563 [00:25<01:58, 10.67it/s]


Batch 300:
  Loss: 7.0430
  Grad norm: 20.3750
  Logit scale: 100.0000
  Max logit: 30.6719
  Min logit: 11.3281


Epoch 2:  26%|██▌       | 403/1563 [00:33<01:32, 12.59it/s]


Batch 400:
  Loss: 7.5156
  Grad norm: 19.8594
  Logit scale: 100.0000
  Max logit: 30.6875
  Min logit: 9.2812


Epoch 2:  32%|███▏      | 503/1563 [00:41<01:24, 12.51it/s]


Batch 500:
  Loss: 7.4609
  Grad norm: 20.5000
  Logit scale: 100.0000
  Max logit: 30.7031
  Min logit: 9.4453


Epoch 2:  39%|███▊      | 603/1563 [00:49<01:14, 12.87it/s]


Batch 600:
  Loss: 7.6094
  Grad norm: 21.4062
  Logit scale: 100.0000
  Max logit: 31.6719
  Min logit: 7.5820


Epoch 2:  45%|████▍     | 703/1563 [00:57<01:25, 10.04it/s]


Batch 700:
  Loss: 8.2109
  Grad norm: 21.4531
  Logit scale: 100.0000
  Max logit: 33.0938
  Min logit: 9.0234


Epoch 2:  51%|█████▏    | 803/1563 [01:05<00:59, 12.85it/s]


Batch 800:
  Loss: 7.6719
  Grad norm: 20.2969
  Logit scale: 100.0000
  Max logit: 31.0625
  Min logit: 9.2109


Epoch 2:  58%|█████▊    | 903/1563 [01:13<00:51, 12.81it/s]


Batch 900:
  Loss: 7.4648
  Grad norm: 22.1250
  Logit scale: 100.0000
  Max logit: 31.7812
  Min logit: 9.8984


Epoch 2:  64%|██████▍   | 1003/1563 [01:21<00:45, 12.22it/s]


Batch 1000:
  Loss: 7.5273
  Grad norm: 20.2188
  Logit scale: 100.0000
  Max logit: 30.7500
  Min logit: 9.5234


Epoch 2:  71%|███████   | 1103/1563 [01:30<00:43, 10.57it/s]


Batch 1100:
  Loss: 7.2461
  Grad norm: 21.4531
  Logit scale: 100.0000
  Max logit: 33.1875
  Min logit: 6.2148


Epoch 2:  77%|███████▋  | 1203/1563 [01:38<00:28, 12.74it/s]


Batch 1200:
  Loss: 7.6758
  Grad norm: 24.3750
  Logit scale: 100.0000
  Max logit: 32.0625
  Min logit: 7.1562


Epoch 2:  83%|████████▎ | 1303/1563 [01:46<00:22, 11.80it/s]


Batch 1300:
  Loss: 7.4492
  Grad norm: 21.8438
  Logit scale: 100.0000
  Max logit: 31.7969
  Min logit: 9.2812


Epoch 2:  90%|████████▉ | 1403/1563 [01:55<00:13, 11.99it/s]


Batch 1400:
  Loss: 7.3906
  Grad norm: 21.4844
  Logit scale: 100.0000
  Max logit: 31.9375
  Min logit: 8.1172


Epoch 2:  96%|█████████▌| 1502/1563 [02:03<00:05, 10.59it/s]


Batch 1500:
  Loss: 7.2578
  Grad norm: 20.7500
  Logit scale: 100.0000
  Max logit: 30.8750
  Min logit: 10.3125


Epoch 2: 100%|██████████| 1563/1563 [02:08<00:00, 12.15it/s]


Epoch 2:
  Avg Train Loss: 7.5918
  Avg Grad Norm: 21.4418



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.97%


Epoch 3:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 7.8984
  Grad norm: 25.0469
  Logit scale: 100.0000
  Max logit: 30.8125
  Min logit: 9.3281


Epoch 3:   7%|▋         | 103/1563 [00:08<01:55, 12.69it/s]


Batch 100:
  Loss: 7.5820
  Grad norm: 20.4062
  Logit scale: 100.0000
  Max logit: 31.4844
  Min logit: 10.6094


Epoch 3:  13%|█▎        | 203/1563 [00:17<01:47, 12.68it/s]


Batch 200:
  Loss: 7.2539
  Grad norm: 20.8125
  Logit scale: 100.0000
  Max logit: 31.5000
  Min logit: 11.4688


Epoch 3:  19%|█▉        | 303/1563 [00:25<01:38, 12.75it/s]


Batch 300:
  Loss: 6.8828
  Grad norm: 19.9375
  Logit scale: 100.0000
  Max logit: 31.2188
  Min logit: 10.6016


Epoch 3:  26%|██▌       | 403/1563 [00:32<01:29, 13.00it/s]


Batch 400:
  Loss: 7.4102
  Grad norm: 20.6562
  Logit scale: 100.0000
  Max logit: 31.4844
  Min logit: 10.0938


Epoch 3:  32%|███▏      | 503/1563 [00:40<01:25, 12.45it/s]


Batch 500:
  Loss: 7.3008
  Grad norm: 19.0000
  Logit scale: 100.0000
  Max logit: 32.1562
  Min logit: 10.0938


Epoch 3:  39%|███▊      | 603/1563 [00:48<01:16, 12.55it/s]


Batch 600:
  Loss: 8.0625
  Grad norm: 23.4062
  Logit scale: 100.0000
  Max logit: 32.2188
  Min logit: 9.3203


Epoch 3:  45%|████▍     | 703/1563 [00:56<01:07, 12.73it/s]


Batch 700:
  Loss: 7.2617
  Grad norm: 19.8438
  Logit scale: 100.0000
  Max logit: 31.7656
  Min logit: 9.5078


Epoch 3:  51%|█████▏    | 803/1563 [01:04<01:03, 11.94it/s]


Batch 800:
  Loss: 7.3750
  Grad norm: 19.5781
  Logit scale: 100.0000
  Max logit: 30.8750
  Min logit: 9.0391


Epoch 3:  58%|█████▊    | 903/1563 [01:12<00:53, 12.33it/s]


Batch 900:
  Loss: 7.4297
  Grad norm: 20.0000
  Logit scale: 100.0000
  Max logit: 31.6094
  Min logit: 10.0391


Epoch 3:  64%|██████▍   | 1003/1563 [01:21<00:45, 12.29it/s]


Batch 1000:
  Loss: 7.8477
  Grad norm: 22.7031
  Logit scale: 100.0000
  Max logit: 32.0625
  Min logit: 10.8281


Epoch 3:  71%|███████   | 1103/1563 [01:29<00:36, 12.62it/s]


Batch 1100:
  Loss: 7.1992
  Grad norm: 19.2500
  Logit scale: 100.0000
  Max logit: 31.3125
  Min logit: 10.3828


Epoch 3:  77%|███████▋  | 1203/1563 [01:37<00:29, 12.35it/s]


Batch 1200:
  Loss: 7.3789
  Grad norm: 25.7812
  Logit scale: 100.0000
  Max logit: 32.0625
  Min logit: 8.6172


Epoch 3:  83%|████████▎ | 1303/1563 [01:45<00:20, 12.74it/s]


Batch 1300:
  Loss: 7.3984
  Grad norm: 23.1562
  Logit scale: 100.0000
  Max logit: 30.8906
  Min logit: 9.5078


Epoch 3:  90%|████████▉ | 1403/1563 [01:54<00:14, 11.10it/s]


Batch 1400:
  Loss: 7.2383
  Grad norm: 21.9375
  Logit scale: 100.0000
  Max logit: 29.5938
  Min logit: 10.5781


Epoch 3:  96%|█████████▌| 1501/1563 [02:02<00:05, 11.69it/s]


Batch 1500:
  Loss: 7.1992
  Grad norm: 20.4062
  Logit scale: 100.0000
  Max logit: 32.1250
  Min logit: 9.1953


Epoch 3: 100%|██████████| 1563/1563 [02:07<00:00, 12.22it/s]


Epoch 3:
  Avg Train Loss: 7.4302
  Avg Grad Norm: 21.0458



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 63.17%


Epoch 4:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 7.6523
  Grad norm: 21.5000
  Logit scale: 100.0000
  Max logit: 31.3594
  Min logit: 7.8867


Epoch 4:   7%|▋         | 103/1563 [00:09<01:56, 12.58it/s]


Batch 100:
  Loss: 7.1602
  Grad norm: 21.6250
  Logit scale: 100.0000
  Max logit: 32.7188
  Min logit: 9.6484


Epoch 4:  13%|█▎        | 203/1563 [00:17<01:46, 12.74it/s]


Batch 200:
  Loss: 7.1641
  Grad norm: 19.7500
  Logit scale: 100.0000
  Max logit: 31.7344
  Min logit: 11.7656


Epoch 4:  19%|█▉        | 303/1563 [00:25<01:37, 12.95it/s]


Batch 300:
  Loss: 7.7773
  Grad norm: 20.8281
  Logit scale: 100.0000
  Max logit: 30.2656
  Min logit: 9.9297


Epoch 4:  26%|██▌       | 401/1563 [00:33<01:51, 10.38it/s]


Batch 400:
  Loss: 7.3711
  Grad norm: 19.2656
  Logit scale: 100.0000
  Max logit: 32.1562
  Min logit: 8.0234


Epoch 4:  32%|███▏      | 503/1563 [00:41<01:25, 12.41it/s]


Batch 500:
  Loss: 7.5039
  Grad norm: 19.2969
  Logit scale: 100.0000
  Max logit: 31.2031
  Min logit: 11.5156


Epoch 4:  39%|███▊      | 603/1563 [00:49<01:14, 12.85it/s]


Batch 600:
  Loss: 7.2578
  Grad norm: 20.3281
  Logit scale: 100.0000
  Max logit: 31.5781
  Min logit: 9.5859


Epoch 4:  45%|████▍     | 703/1563 [00:57<01:09, 12.45it/s]


Batch 700:
  Loss: 6.8477
  Grad norm: 21.6875
  Logit scale: 100.0000
  Max logit: 31.9844
  Min logit: 9.2500


Epoch 4:  51%|█████▏    | 803/1563 [01:05<01:06, 11.43it/s]


Batch 800:
  Loss: 7.5039
  Grad norm: 20.0938
  Logit scale: 100.0000
  Max logit: 31.6094
  Min logit: 9.5547


Epoch 4:  58%|█████▊    | 903/1563 [01:13<00:52, 12.54it/s]


Batch 900:
  Loss: 7.3906
  Grad norm: 22.3594
  Logit scale: 100.0000
  Max logit: 31.5000
  Min logit: 10.9609


Epoch 4:  64%|██████▍   | 1003/1563 [01:22<00:43, 12.79it/s]


Batch 1000:
  Loss: 7.1797
  Grad norm: 19.5469
  Logit scale: 100.0000
  Max logit: 31.2969
  Min logit: 9.0469


Epoch 4:  71%|███████   | 1103/1563 [01:30<00:39, 11.77it/s]


Batch 1100:
  Loss: 7.0469
  Grad norm: 22.1562
  Logit scale: 100.0000
  Max logit: 30.1719
  Min logit: 11.1875


Epoch 4:  77%|███████▋  | 1203/1563 [01:38<00:29, 12.17it/s]


Batch 1200:
  Loss: 7.2305
  Grad norm: 20.3906
  Logit scale: 100.0000
  Max logit: 31.5000
  Min logit: 10.1484


Epoch 4:  83%|████████▎ | 1303/1563 [01:47<00:20, 12.49it/s]


Batch 1300:
  Loss: 7.3945
  Grad norm: 21.1875
  Logit scale: 100.0000
  Max logit: 31.3281
  Min logit: 10.8594


Epoch 4:  90%|████████▉ | 1403/1563 [01:55<00:13, 11.64it/s]


Batch 1400:
  Loss: 6.6602
  Grad norm: 20.3281
  Logit scale: 100.0000
  Max logit: 31.1406
  Min logit: 9.9141


Epoch 4:  96%|█████████▌| 1503/1563 [02:03<00:05, 11.72it/s]


Batch 1500:
  Loss: 6.8164
  Grad norm: 19.7031
  Logit scale: 100.0000
  Max logit: 31.3125
  Min logit: 9.4766


Epoch 4: 100%|██████████| 1563/1563 [02:09<00:00, 12.10it/s]


Epoch 4:
  Avg Train Loss: 7.3195
  Avg Grad Norm: 20.6991



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 63.09%


Epoch 5:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 7.5820
  Grad norm: 19.5938
  Logit scale: 100.0000
  Max logit: 32.1875
  Min logit: 9.6328


Epoch 5:   7%|▋         | 103/1563 [00:08<01:58, 12.36it/s]


Batch 100:
  Loss: 7.1094
  Grad norm: 19.9688
  Logit scale: 100.0000
  Max logit: 30.0000
  Min logit: 6.7070


Epoch 5:  13%|█▎        | 203/1563 [00:16<01:46, 12.73it/s]


Batch 200:
  Loss: 7.7578
  Grad norm: 20.8906
  Logit scale: 100.0000
  Max logit: 31.6406
  Min logit: 8.5078


Epoch 5:  19%|█▉        | 303/1563 [00:24<01:37, 12.89it/s]


Batch 300:
  Loss: 7.5273
  Grad norm: 20.4844
  Logit scale: 100.0000
  Max logit: 32.4688
  Min logit: 11.1250


Epoch 5:  26%|██▌       | 403/1563 [00:32<01:31, 12.65it/s]


Batch 400:
  Loss: 7.4883
  Grad norm: 23.1094
  Logit scale: 100.0000
  Max logit: 32.9688
  Min logit: 9.8281


Epoch 5:  32%|███▏      | 503/1563 [00:40<01:22, 12.90it/s]


Batch 500:
  Loss: 7.2539
  Grad norm: 19.4688
  Logit scale: 100.0000
  Max logit: 30.9219
  Min logit: 7.4219


Epoch 5:  39%|███▊      | 603/1563 [00:48<01:13, 12.99it/s]


Batch 600:
  Loss: 7.2852
  Grad norm: 21.3594
  Logit scale: 100.0000
  Max logit: 30.9219
  Min logit: 10.7031


Epoch 5:  45%|████▍     | 703/1563 [00:56<01:09, 12.37it/s]


Batch 700:
  Loss: 7.7734
  Grad norm: 21.7188
  Logit scale: 100.0000
  Max logit: 30.2031
  Min logit: 9.2578


Epoch 5:  51%|█████▏    | 803/1563 [01:04<01:00, 12.64it/s]


Batch 800:
  Loss: 7.6719
  Grad norm: 22.5625
  Logit scale: 100.0000
  Max logit: 31.2188
  Min logit: 10.5547


Epoch 5:  58%|█████▊    | 903/1563 [01:12<00:52, 12.62it/s]


Batch 900:
  Loss: 7.1797
  Grad norm: 21.3125
  Logit scale: 100.0000
  Max logit: 31.1875
  Min logit: 11.0781


Epoch 5:  64%|██████▍   | 1003/1563 [01:20<00:44, 12.49it/s]


Batch 1000:
  Loss: 6.3828
  Grad norm: 18.9844
  Logit scale: 100.0000
  Max logit: 29.6719
  Min logit: 10.6953


Epoch 5:  71%|███████   | 1103/1563 [01:28<00:37, 12.28it/s]


Batch 1100:
  Loss: 7.3633
  Grad norm: 22.4531
  Logit scale: 100.0000
  Max logit: 31.5000
  Min logit: 11.2812


Epoch 5:  77%|███████▋  | 1203/1563 [01:36<00:28, 12.62it/s]


Batch 1200:
  Loss: 6.6797
  Grad norm: 19.1250
  Logit scale: 100.0000
  Max logit: 28.9531
  Min logit: 9.5859


Epoch 5:  83%|████████▎ | 1303/1563 [01:44<00:20, 12.73it/s]


Batch 1300:
  Loss: 7.0312
  Grad norm: 21.9531
  Logit scale: 100.0000
  Max logit: 30.3438
  Min logit: 9.8281


Epoch 5:  90%|████████▉ | 1403/1563 [01:52<00:12, 12.56it/s]


Batch 1400:
  Loss: 7.1992
  Grad norm: 19.5312
  Logit scale: 100.0000
  Max logit: 30.5469
  Min logit: 10.9453


Epoch 5:  96%|█████████▌| 1503/1563 [02:01<00:04, 12.74it/s]


Batch 1500:
  Loss: 7.3633
  Grad norm: 21.3594
  Logit scale: 100.0000
  Max logit: 32.0938
  Min logit: 6.9141


Epoch 5: 100%|██████████| 1563/1563 [02:06<00:00, 12.38it/s]


Epoch 5:
  Avg Train Loss: 7.2535
  Avg Grad Norm: 20.6416



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.99%


Epoch 6:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 7.1523
  Grad norm: 19.8125
  Logit scale: 100.0000
  Max logit: 31.7656
  Min logit: 10.3281


Epoch 6:   7%|▋         | 103/1563 [00:08<01:54, 12.79it/s]


Batch 100:
  Loss: 7.3945
  Grad norm: 22.0469
  Logit scale: 100.0000
  Max logit: 31.0469
  Min logit: 8.5234


Epoch 6:  13%|█▎        | 203/1563 [00:17<01:45, 12.87it/s]


Batch 200:
  Loss: 6.8594
  Grad norm: 18.8750
  Logit scale: 100.0000
  Max logit: 30.5000
  Min logit: 12.8203


Epoch 6:  19%|█▉        | 303/1563 [00:24<01:39, 12.65it/s]


Batch 300:
  Loss: 7.0938
  Grad norm: 19.9375
  Logit scale: 100.0000
  Max logit: 29.8594
  Min logit: 8.4141


Epoch 6:  26%|██▌       | 403/1563 [00:32<01:31, 12.67it/s]


Batch 400:
  Loss: 7.2930
  Grad norm: 19.4375
  Logit scale: 100.0000
  Max logit: 30.5469
  Min logit: 7.5000


Epoch 6:  32%|███▏      | 503/1563 [00:40<01:25, 12.38it/s]


Batch 500:
  Loss: 7.4688
  Grad norm: 24.2188
  Logit scale: 100.0000
  Max logit: 29.7969
  Min logit: 9.5703


Epoch 6:  39%|███▊      | 603/1563 [00:48<01:15, 12.77it/s]


Batch 600:
  Loss: 7.1445
  Grad norm: 20.8906
  Logit scale: 100.0000
  Max logit: 30.3594
  Min logit: 9.4141


Epoch 6:  45%|████▍     | 703/1563 [00:56<01:06, 12.92it/s]


Batch 700:
  Loss: 7.4375
  Grad norm: 21.7188
  Logit scale: 100.0000
  Max logit: 31.5938
  Min logit: 9.1484


Epoch 6:  51%|█████▏    | 803/1563 [01:04<01:00, 12.46it/s]


Batch 800:
  Loss: 6.9141
  Grad norm: 19.0625
  Logit scale: 100.0000
  Max logit: 29.5312
  Min logit: 10.1875


Epoch 6:  58%|█████▊    | 903/1563 [01:12<00:51, 12.76it/s]


Batch 900:
  Loss: 7.6289
  Grad norm: 20.8906
  Logit scale: 100.0000
  Max logit: 31.2188
  Min logit: 10.4062


Epoch 6:  64%|██████▍   | 1003/1563 [01:20<00:44, 12.53it/s]


Batch 1000:
  Loss: 7.0117
  Grad norm: 21.2969
  Logit scale: 100.0000
  Max logit: 30.7188
  Min logit: 9.4609


Epoch 6:  71%|███████   | 1103/1563 [01:28<00:36, 12.72it/s]


Batch 1100:
  Loss: 7.1992
  Grad norm: 25.4531
  Logit scale: 100.0000
  Max logit: 32.5625
  Min logit: 9.9062


Epoch 6:  77%|███████▋  | 1203/1563 [01:36<00:28, 12.82it/s]


Batch 1200:
  Loss: 6.8008
  Grad norm: 19.0156
  Logit scale: 100.0000
  Max logit: 30.4219
  Min logit: 9.4453


Epoch 6:  83%|████████▎ | 1303/1563 [01:44<00:20, 12.56it/s]


Batch 1300:
  Loss: 7.4297
  Grad norm: 20.5000
  Logit scale: 100.0000
  Max logit: 29.7344
  Min logit: 9.3906


Epoch 6:  90%|████████▉ | 1401/1563 [01:52<00:13, 12.33it/s]


Batch 1400:
  Loss: 7.3711
  Grad norm: 20.9062
  Logit scale: 100.0000
  Max logit: 31.2812
  Min logit: 10.7266


Epoch 6:  96%|█████████▌| 1501/1563 [02:01<00:05, 12.39it/s]


Batch 1500:
  Loss: 7.2383
  Grad norm: 20.0781
  Logit scale: 100.0000
  Max logit: 33.5312
  Min logit: 10.0625


Epoch 6: 100%|██████████| 1563/1563 [02:06<00:00, 12.38it/s]


Epoch 6:
  Avg Train Loss: 7.1794
  Avg Grad Norm: 20.3724



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.94%


Epoch 7:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 7.3711
  Grad norm: 20.8750
  Logit scale: 100.0000
  Max logit: 32.1875
  Min logit: 9.4609


Epoch 7:   7%|▋         | 103/1563 [00:09<01:54, 12.81it/s]


Batch 100:
  Loss: 7.3867
  Grad norm: 21.0000
  Logit scale: 100.0000
  Max logit: 31.2500
  Min logit: 6.7344


Epoch 7:  13%|█▎        | 203/1563 [00:16<01:47, 12.67it/s]


Batch 200:
  Loss: 7.0234
  Grad norm: 20.2812
  Logit scale: 100.0000
  Max logit: 30.6875
  Min logit: 9.5156


Epoch 7:  19%|█▉        | 303/1563 [00:24<01:39, 12.72it/s]


Batch 300:
  Loss: 6.9062
  Grad norm: 19.6250
  Logit scale: 100.0000
  Max logit: 31.6875
  Min logit: 9.9844


Epoch 7:  26%|██▌       | 403/1563 [00:32<01:39, 11.69it/s]


Batch 400:
  Loss: 7.3047
  Grad norm: 22.7656
  Logit scale: 100.0000
  Max logit: 30.3906
  Min logit: 10.2344


Epoch 7:  32%|███▏      | 503/1563 [00:40<01:25, 12.44it/s]


Batch 500:
  Loss: 7.1602
  Grad norm: 19.6094
  Logit scale: 100.0000
  Max logit: 33.9062
  Min logit: 9.2891


Epoch 7:  39%|███▊      | 603/1563 [00:48<01:16, 12.47it/s]


Batch 600:
  Loss: 7.5938
  Grad norm: 22.7344
  Logit scale: 100.0000
  Max logit: 31.1406
  Min logit: 7.7656


Epoch 7:  45%|████▍     | 703/1563 [00:56<01:07, 12.65it/s]


Batch 700:
  Loss: 7.1133
  Grad norm: 20.4688
  Logit scale: 100.0000
  Max logit: 31.5469
  Min logit: 10.9609


Epoch 7:  51%|█████▏    | 803/1563 [01:04<01:01, 12.32it/s]


Batch 800:
  Loss: 7.1445
  Grad norm: 19.7344
  Logit scale: 100.0000
  Max logit: 30.7656
  Min logit: 10.7656


Epoch 7:  58%|█████▊    | 903/1563 [01:12<00:53, 12.31it/s]


Batch 900:
  Loss: 7.0586
  Grad norm: 20.1875
  Logit scale: 100.0000
  Max logit: 32.9688
  Min logit: 10.1484


Epoch 7:  64%|██████▍   | 1003/1563 [01:20<00:44, 12.59it/s]


Batch 1000:
  Loss: 7.4141
  Grad norm: 20.5625
  Logit scale: 100.0000
  Max logit: 32.3438
  Min logit: 10.8438


Epoch 7:  71%|███████   | 1103/1563 [01:28<00:36, 12.72it/s]


Batch 1100:
  Loss: 7.2344
  Grad norm: 20.2188
  Logit scale: 100.0000
  Max logit: 30.9219
  Min logit: 8.4453


Epoch 7:  77%|███████▋  | 1201/1563 [01:36<00:29, 12.44it/s]


Batch 1200:
  Loss: 7.2656
  Grad norm: 22.2344
  Logit scale: 100.0000
  Max logit: 31.3906
  Min logit: 7.2383


Epoch 7:  83%|████████▎ | 1303/1563 [01:45<00:21, 11.90it/s]


Batch 1300:
  Loss: 7.2656
  Grad norm: 19.7656
  Logit scale: 100.0000
  Max logit: 31.1094
  Min logit: 9.6641


Epoch 7:  90%|████████▉ | 1403/1563 [01:53<00:12, 12.41it/s]


Batch 1400:
  Loss: 7.2227
  Grad norm: 26.0938
  Logit scale: 100.0000
  Max logit: 31.0625
  Min logit: 10.4297


Epoch 7:  96%|█████████▌| 1501/1563 [02:01<00:05, 12.09it/s]


Batch 1500:
  Loss: 7.1055
  Grad norm: 19.7344
  Logit scale: 100.0000
  Max logit: 31.1719
  Min logit: 9.8047


Epoch 7: 100%|██████████| 1563/1563 [02:07<00:00, 12.31it/s]


Epoch 7:
  Avg Train Loss: 7.1371
  Avg Grad Norm: 20.2860



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.90%


Epoch 8:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 7.4102
  Grad norm: 17.9062
  Logit scale: 100.0000
  Max logit: 31.8750
  Min logit: 7.4258


Epoch 8:   7%|▋         | 103/1563 [00:08<01:54, 12.79it/s]


Batch 100:
  Loss: 6.7969
  Grad norm: 19.8906
  Logit scale: 100.0000
  Max logit: 30.9219
  Min logit: 11.7188


Epoch 8:  13%|█▎        | 203/1563 [00:16<01:45, 12.91it/s]


Batch 200:
  Loss: 6.7539
  Grad norm: 19.2500
  Logit scale: 100.0000
  Max logit: 30.0781
  Min logit: 10.4141


Epoch 8:  19%|█▉        | 303/1563 [00:24<01:42, 12.31it/s]


Batch 300:
  Loss: 7.1328
  Grad norm: 20.2500
  Logit scale: 100.0000
  Max logit: 31.2031
  Min logit: 10.5703


Epoch 8:  26%|██▌       | 403/1563 [00:32<01:32, 12.49it/s]


Batch 400:
  Loss: 7.0156
  Grad norm: 19.7031
  Logit scale: 100.0000
  Max logit: 32.4375
  Min logit: 9.5156


Epoch 8:  32%|███▏      | 503/1563 [00:40<01:22, 12.90it/s]


Batch 500:
  Loss: 7.3047
  Grad norm: 21.8906
  Logit scale: 100.0000
  Max logit: 31.9844
  Min logit: 11.5859


Epoch 8:  39%|███▊      | 603/1563 [00:48<01:15, 12.69it/s]


Batch 600:
  Loss: 6.8750
  Grad norm: 19.2344
  Logit scale: 100.0000
  Max logit: 31.1094
  Min logit: 10.9609


Epoch 8:  45%|████▍     | 703/1563 [00:56<01:12, 11.82it/s]


Batch 700:
  Loss: 7.0078
  Grad norm: 18.4844
  Logit scale: 100.0000
  Max logit: 31.9844
  Min logit: 8.6719


Epoch 8:  51%|█████▏    | 803/1563 [01:04<01:00, 12.65it/s]


Batch 800:
  Loss: 7.0391
  Grad norm: 19.5000
  Logit scale: 100.0000
  Max logit: 33.6875
  Min logit: 9.6641


Epoch 8:  58%|█████▊    | 903/1563 [01:11<00:50, 12.99it/s]


Batch 900:
  Loss: 7.0312
  Grad norm: 19.4219
  Logit scale: 100.0000
  Max logit: 30.8281
  Min logit: 10.3906


Epoch 8:  64%|██████▍   | 1003/1563 [01:19<00:43, 12.92it/s]


Batch 1000:
  Loss: 7.1953
  Grad norm: 19.0938
  Logit scale: 100.0000
  Max logit: 31.7812
  Min logit: 10.0469


Epoch 8:  70%|███████   | 1101/1563 [01:28<00:45, 10.15it/s]


Batch 1100:
  Loss: 7.3945
  Grad norm: 20.1406
  Logit scale: 100.0000
  Max logit: 30.3281
  Min logit: 8.3906


Epoch 8:  77%|███████▋  | 1203/1563 [01:36<00:29, 12.40it/s]


Batch 1200:
  Loss: 7.1914
  Grad norm: 20.2031
  Logit scale: 100.0000
  Max logit: 31.7188
  Min logit: 10.8906


Epoch 8:  83%|████████▎ | 1303/1563 [01:44<00:20, 12.59it/s]


Batch 1300:
  Loss: 7.3125
  Grad norm: 20.1406
  Logit scale: 100.0000
  Max logit: 33.5312
  Min logit: 9.3047


Epoch 8:  90%|████████▉ | 1403/1563 [01:52<00:12, 12.50it/s]


Batch 1400:
  Loss: 6.7734
  Grad norm: 19.6719
  Logit scale: 100.0000
  Max logit: 31.3750
  Min logit: 9.0703


Epoch 8:  96%|█████████▌| 1503/1563 [02:00<00:05, 10.79it/s]


Batch 1500:
  Loss: 6.9688
  Grad norm: 21.7031
  Logit scale: 100.0000
  Max logit: 30.2969
  Min logit: 9.6172


Epoch 8: 100%|██████████| 1563/1563 [02:05<00:00, 12.46it/s]


Epoch 8:
  Avg Train Loss: 7.1040
  Avg Grad Norm: 20.2125



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.83%


Epoch 9:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 


Batch 0:
  Loss: 6.7383
  Grad norm: 19.3906
  Logit scale: 100.0000
  Max logit: 30.4219
  Min logit: 12.7734


Epoch 9:   7%|▋         | 103/1563 [00:08<01:55, 12.67it/s]


Batch 100:
  Loss: 6.6367
  Grad norm: 18.6094
  Logit scale: 100.0000
  Max logit: 30.8594
  Min logit: 8.0547


Epoch 9:  13%|█▎        | 203/1563 [00:16<01:49, 12.39it/s]


Batch 200:
  Loss: 7.0742
  Grad norm: 21.0156
  Logit scale: 100.0000
  Max logit: 31.2969
  Min logit: 9.8516


Epoch 9:  19%|█▉        | 303/1563 [00:24<01:40, 12.50it/s]


Batch 300:
  Loss: 6.8789
  Grad norm: 21.1406
  Logit scale: 100.0000
  Max logit: 31.6562
  Min logit: 11.9766


Epoch 9:  26%|██▌       | 403/1563 [00:32<01:31, 12.68it/s]


Batch 400:
  Loss: 6.8125
  Grad norm: 20.4375
  Logit scale: 100.0000
  Max logit: 29.5938
  Min logit: 8.1406


Epoch 9:  32%|███▏      | 503/1563 [00:40<01:23, 12.76it/s]


Batch 500:
  Loss: 6.6758
  Grad norm: 18.9219
  Logit scale: 100.0000
  Max logit: 30.6875
  Min logit: 10.3438


Epoch 9:  39%|███▊      | 603/1563 [00:48<01:17, 12.35it/s]


Batch 600:
  Loss: 7.2852
  Grad norm: 20.6562
  Logit scale: 100.0000
  Max logit: 32.6562
  Min logit: 10.2500


Epoch 9:  45%|████▍     | 703/1563 [00:56<01:06, 12.84it/s]


Batch 700:
  Loss: 7.1992
  Grad norm: 20.5938
  Logit scale: 100.0000
  Max logit: 31.6250
  Min logit: 11.1641


Epoch 9:  51%|█████▏    | 803/1563 [01:04<00:59, 12.85it/s]


Batch 800:
  Loss: 7.4219
  Grad norm: 20.5156
  Logit scale: 100.0000
  Max logit: 31.0469
  Min logit: 10.8438


Epoch 9:  58%|█████▊    | 903/1563 [01:12<00:51, 12.71it/s]


Batch 900:
  Loss: 7.0664
  Grad norm: 17.9375
  Logit scale: 100.0000
  Max logit: 31.7500
  Min logit: 8.8828


Epoch 9:  64%|██████▍   | 1003/1563 [01:20<00:47, 11.69it/s]


Batch 1000:
  Loss: 6.8164
  Grad norm: 17.9219
  Logit scale: 100.0000
  Max logit: 31.4062
  Min logit: 11.8906


Epoch 9:  71%|███████   | 1103/1563 [01:28<00:36, 12.76it/s]


Batch 1100:
  Loss: 7.4219
  Grad norm: 21.3281
  Logit scale: 100.0000
  Max logit: 30.8438
  Min logit: 10.4531


Epoch 9:  77%|███████▋  | 1203/1563 [01:36<00:28, 12.79it/s]


Batch 1200:
  Loss: 6.7500
  Grad norm: 19.5000
  Logit scale: 100.0000
  Max logit: 32.0000
  Min logit: 7.6172


Epoch 9:  83%|████████▎ | 1303/1563 [01:44<00:20, 12.55it/s]


Batch 1300:
  Loss: 7.1758
  Grad norm: 20.0312
  Logit scale: 100.0000
  Max logit: 29.7656
  Min logit: 10.6094


Epoch 9:  90%|████████▉ | 1403/1563 [01:52<00:13, 11.46it/s]


Batch 1400:
  Loss: 7.0703
  Grad norm: 22.2969
  Logit scale: 100.0000
  Max logit: 31.0000
  Min logit: 10.7422


Epoch 9:  96%|█████████▌| 1503/1563 [02:00<00:04, 12.22it/s]


Batch 1500:
  Loss: 6.9336
  Grad norm: 18.7656
  Logit scale: 100.0000
  Max logit: 32.6875
  Min logit: 10.4844


Epoch 9: 100%|██████████| 1563/1563 [02:05<00:00, 12.48it/s]


Epoch 9:
  Avg Train Loss: 7.0754
  Avg Grad Norm: 20.0863



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.78%


Epoch 10:   0%|          | 0/1563 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used.


Batch 0:
  Loss: 6.9531
  Grad norm: 18.8906
  Logit scale: 100.0000
  Max logit: 30.1094
  Min logit: 10.1016


Epoch 10:   7%|▋         | 103/1563 [00:09<01:58, 12.32it/s]


Batch 100:
  Loss: 7.5312
  Grad norm: 21.5000
  Logit scale: 100.0000
  Max logit: 31.1562
  Min logit: 9.1094


Epoch 10:  13%|█▎        | 203/1563 [00:17<01:47, 12.70it/s]


Batch 200:
  Loss: 7.3750
  Grad norm: 20.0938
  Logit scale: 100.0000
  Max logit: 29.7969
  Min logit: 10.1797


Epoch 10:  19%|█▉        | 303/1563 [00:25<01:37, 12.92it/s]


Batch 300:
  Loss: 7.3633
  Grad norm: 20.5625
  Logit scale: 100.0000
  Max logit: 29.9531
  Min logit: 12.7422


Epoch 10:  26%|██▌       | 403/1563 [00:32<01:32, 12.53it/s]


Batch 400:
  Loss: 6.8281
  Grad norm: 18.7656
  Logit scale: 100.0000
  Max logit: 30.6719
  Min logit: 10.2422


Epoch 10:  32%|███▏      | 503/1563 [00:41<01:26, 12.32it/s]


Batch 500:
  Loss: 7.0938
  Grad norm: 20.2656
  Logit scale: 100.0000
  Max logit: 30.5781
  Min logit: 10.5312


Epoch 10:  39%|███▊      | 603/1563 [00:49<01:17, 12.39it/s]


Batch 600:
  Loss: 7.0078
  Grad norm: 19.2969
  Logit scale: 100.0000
  Max logit: 30.9844
  Min logit: 11.0156


Epoch 10:  45%|████▍     | 703/1563 [00:57<01:08, 12.56it/s]


Batch 700:
  Loss: 7.0430
  Grad norm: 19.6562
  Logit scale: 100.0000
  Max logit: 32.3438
  Min logit: 10.8984


Epoch 10:  51%|█████▏    | 803/1563 [01:05<01:00, 12.53it/s]


Batch 800:
  Loss: 7.3789
  Grad norm: 20.5781
  Logit scale: 100.0000
  Max logit: 30.4062
  Min logit: 10.7031


Epoch 10:  58%|█████▊    | 903/1563 [01:13<00:52, 12.47it/s]


Batch 900:
  Loss: 7.7578
  Grad norm: 20.8125
  Logit scale: 100.0000
  Max logit: 31.1250
  Min logit: 10.5781


Epoch 10:  64%|██████▍   | 1003/1563 [01:21<00:44, 12.63it/s]


Batch 1000:
  Loss: 6.9141
  Grad norm: 18.7812
  Logit scale: 100.0000
  Max logit: 33.5000
  Min logit: 11.4453


Epoch 10:  71%|███████   | 1103/1563 [01:29<00:36, 12.61it/s]


Batch 1100:
  Loss: 6.8594
  Grad norm: 19.6875
  Logit scale: 100.0000
  Max logit: 29.0625
  Min logit: 10.9922


Epoch 10:  77%|███████▋  | 1203/1563 [01:37<00:29, 12.40it/s]


Batch 1200:
  Loss: 6.8438
  Grad norm: 18.6562
  Logit scale: 100.0000
  Max logit: 30.7500
  Min logit: 10.7422


Epoch 10:  83%|████████▎ | 1303/1563 [01:46<00:20, 12.40it/s]


Batch 1300:
  Loss: 7.0234
  Grad norm: 20.9062
  Logit scale: 100.0000
  Max logit: 31.8906
  Min logit: 9.3672


Epoch 10:  90%|████████▉ | 1403/1563 [01:54<00:12, 12.38it/s]


Batch 1400:
  Loss: 6.9375
  Grad norm: 19.8438
  Logit scale: 100.0000
  Max logit: 29.7344
  Min logit: 10.3672


Epoch 10:  96%|█████████▌| 1503/1563 [02:02<00:04, 12.39it/s]


Batch 1500:
  Loss: 7.2109
  Grad norm: 19.3438
  Logit scale: 100.0000
  Max logit: 30.7031
  Min logit: 10.5938


Epoch 10: 100%|██████████| 1563/1563 [02:07<00:00, 12.22it/s]


Epoch 10:
  Avg Train Loss: 7.0461
  Avg Grad Norm: 20.0548



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A

  Val Acc: 62.77%

Evaluating with fine-tuned models...


  0%|          | 0/157 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling 


--- Batch 0 examples ---

Image 0:
  True class: go-kart
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a group of people sitting on the ground
    describe specifically: describe specifically, the police are not allowed
    be concise: be concisement of the police
  CLIP confidence: 87.89%
  ICE confidence: 87.89%

Image 1:
  True class: reel
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a sheep
    describe specifically: describe specifically, the sheep are not the same breed
    be concise: be concisely, the sheep is a very large animal
  CLIP confidence: 48.27%
  ICE confidence: 48.27%

Image 2:
  True class: refrigerator
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a city with a clock
    describe specifically: describe specifically, the image is not a blur
    be concise: be concisely, the best of all time
  CLIP confidence: 9.64%
  ICE confid

  1%|▏         | 2/157 [01:11<1:31:56, 35.59s/it]


--- Batch 1 examples ---

Image 64:
  True class: fountain
  CLIP prediction: beacon
  ICE prediction: fountain
  Captions:
    a photo of: a photo of the fountain at night
    describe specifically: describe specifically, the fountain is a symbol of the city of paris
    be concise: be concise - the fountain of light
  CLIP confidence: 32.86%
  ICE confidence: 34.94%

Image 65:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a field with trees and a building in the background
    describe specifically: describe specifically, the land is a lot of land that is not a lot of land, but the land is a lot of land that is
    be concise: be concise - real estate for sale in san
  CLIP confidence: 57.23%
  ICE confidence: 57.23%

Image 66:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a tent with people standing around it
    describe specifically: 

  2%|▏         | 3/157 [01:48<1:33:05, 36.27s/it]


--- Batch 2 examples ---

Image 128:
  True class: reel
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a man fishing on a lake
    describe specifically: describe specifically, the fly is a fly, but the fly is a fly
    be concise: be concisely, the best fly for trout
  CLIP confidence: 11.19%
  ICE confidence: 12.82%

Image 129:
  True class: brass
  CLIP prediction: cannon
  ICE prediction: cannon
  Captions:
    a photo of: a photo of a cat in the woods
    describe specifically: describe specifically, the cat is a white cat
    be concise: be concisely, the cat is a white cat
  CLIP confidence: 56.64%
  ICE confidence: 56.64%

Image 130:
  True class: bullfrog
  CLIP prediction: tailed frog
  ICE prediction: tailed frog
  Captions:
    a photo of: a photo of a lizard on a rock
    describe specifically: describe specifically, the frog is a member of the family of the family
    be concise: be concisement - a new species of the common 

  3%|▎         | 4/157 [02:26<1:33:46, 36.77s/it]


--- Batch 3 examples ---

Image 192:
  True class: black widow
  CLIP prediction: black widow
  ICE prediction: black widow
  Captions:
    a photo of: a photo of a spider on a piece of wood
    describe specifically: describe specifically, the spider is a type of spider
    be concise: be concisement of the spider
  CLIP confidence: 86.77%
  ICE confidence: 86.82%

Image 193:
  True class: sombrero
  CLIP prediction: sombrero
  ICE prediction: sombrero
  Captions:
    a photo of: a photo of two people posing for a picture
    describe specifically: describe specifically, the children are not the same person
    be concise: be concise - latin band / latin entertainment in los, california
  CLIP confidence: 58.89%
  ICE confidence: 58.89%

Image 194:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a candle
    describe specifically: describe specifically, the candle is a candle that is a candle that is a candle that is a ca

  3%|▎         | 5/157 [03:06<1:35:59, 37.89s/it]


--- Batch 4 examples ---

Image 256:
  True class: beach wagon
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a blue car driving on a bridge
    describe specifically: describe specifically, the vehicle is a vehicle that is not a vehicle
    be concise: be concisely with the new ford
  CLIP confidence: 48.68%
  ICE confidence: 48.71%

Image 257:
  True class: lion
  CLIP prediction: lion
  ICE prediction: lion
  Captions:
    a photo of: a photo of a lion in the wild
    describe specifically: describe specifically, the elephant is a lion
    be concise: be concisely, the elephant is a very large animal
  CLIP confidence: 70.46%
  ICE confidence: 76.95%

Image 258:
  True class: freight car
  CLIP prediction: freight car
  ICE prediction: freight car
  Captions:
    a photo of: a photo of a train on the tracks
    describe specifically: describe specifically, the name of the company is the name of the company
    be concise: be concise 

  4%|▍         | 6/157 [03:43<1:34:37, 37.60s/it]


--- Batch 5 examples ---

Image 320:
  True class: hourglass
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a hour hour hour hour hour hour hour hour hour hour hour hour hour hour hour hour hour
    describe specifically: describe specifically, the time is right for you to be
    be concise: be concisement - the best of the best
  CLIP confidence: 30.79%
  ICE confidence: 30.79%

Image 321:
  True class: sandal
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a light fixture
    describe specifically: describe specifically by the number of the bulbs
    be concise: be concise leds
  CLIP confidence: 43.70%
  ICE confidence: 43.70%

Image 322:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a train on the tracks
    describe specifically: describe specifically, the train is a train
    be concise: be concise train
  CLIP conf

  4%|▍         | 7/157 [04:22<1:35:38, 38.26s/it]


--- Batch 6 examples ---

Image 384:
  True class: jellyfish
  CLIP prediction: jellyfish
  ICE prediction: jellyfish
  Captions:
    a photo of: a photo of a guitar with a blue background
    describe specifically: describe specifically, the guitar is a very important instrument
    be concise: be concise - the best of the best
  CLIP confidence: 37.30%
  ICE confidence: 37.30%

Image 385:
  True class: orangutan
  CLIP prediction: orangutan
  ICE prediction: orangutan
  Captions:
    a photo of: a photo of a baby gorilla in the wild
    describe specifically: describe specifically, the gorilla is a type of primate
    be concise: be concisely, be concisely
  CLIP confidence: 98.14%
  ICE confidence: 98.14%

Image 386:
  True class: water jug
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a group of baskets
    describe specifically: describe specifically by the size of the baskets
    be concise: be concisel baskets
  CLIP confidence: 37.89

  5%|▌         | 8/157 [05:00<1:34:14, 37.95s/it]


--- Batch 7 examples ---

Image 448:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a man in a boat
    describe specifically: describe specifically, the water is the same as the land
    be concise: be concise - the best of the best
  CLIP confidence: 86.43%
  ICE confidence: 86.43%

Image 449:
  True class: snorkel
  CLIP prediction: snorkel
  ICE prediction: snorkel
  Captions:
    a photo of: a photo of a small boat in the water
    describe specifically: describe specifically, the light is the same as the light
    be concise: be concisement of the light
  CLIP confidence: 47.39%
  ICE confidence: 47.39%

Image 450:
  True class: desk
  CLIP prediction: altar
  ICE prediction: altar
  Captions:
    a photo of: a photo of a bed with a red and blue blanket
    describe specifically: describe specifically, the color of the quilt is red, white, and blue
    be concise: be concise - the best of the

  6%|▌         | 9/157 [05:37<1:33:29, 37.90s/it]


--- Batch 8 examples ---

Image 512:
  True class: beacon
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a lighthouse on the beach
    describe specifically: describe specifically, the lighthouse is a symbol of the lighthouses of the outer and inner states
    be concise: be concise lighthouse
  CLIP confidence: 58.69%
  ICE confidence: 62.94%

Image 513:
  True class: cliff
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a dog on the beach
    describe specifically: describe specifically, the dog is a dog
    be concise: be concisely, be concisely, be concisely, be concisely be conly, be conly be con
  CLIP confidence: 14.53%
  ICE confidence: 18.10%

Image 514:
  True class: scorpion
  CLIP prediction: scorpion
  ICE prediction: scorpion
  Captions:
    a photo of: a photo of a small lizard on a white surface
    describe specifically: describe specifically, the number of the two species of the 

  6%|▋         | 10/157 [06:13<1:30:45, 37.04s/it]


--- Batch 9 examples ---

Image 576:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly on a leaf
    describe specifically: describe specifically, the butterfly is a member of the family
    be concise: be concisely, be concisely
  CLIP confidence: 97.17%
  ICE confidence: 97.31%

Image 577:
  True class: pay-phone
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a parking meter in a city
    describe specifically: describe specifically, the public parking meter is a common feature in the city
    be concise: be concisely, the new way to use the public parking meter
  CLIP confidence: 88.77%
  ICE confidence: 88.77%

Image 578:
  True class: pay-phone
  CLIP prediction: remote control
  ICE prediction: remote control
  Captions:
    a photo of: a photo of a group of socks with different colors
    describe specifically: describe specifically, the number of the num

  7%|▋         | 11/157 [06:49<1:29:34, 36.81s/it]


--- Batch 10 examples ---

Image 640:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police van parked in a street
    describe specifically: describe specifically, the police are responsible and responsible
    be concise: be concise police vehicle
  CLIP confidence: 99.27%
  ICE confidence: 104.98%

Image 641:
  True class: thatch
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a house in the middle of a field
    describe specifically: describe specifically, the land of the first settlers of the united states, and the land of the first settlers of the united states
    be concise: be concise - the becond house
  CLIP confidence: 73.83%
  ICE confidence: 73.83%

Image 642:
  True class: walking stick
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a man on a skateboard
    describe specifically: describe specifically by the

  8%|▊         | 12/157 [07:25<1:28:37, 36.67s/it]


--- Batch 11 examples ---

Image 704:
  True class: boa constrictor
  CLIP prediction: boa constrictor
  ICE prediction: boa constrictor
  Captions:
    a photo of: a photo of a bird flying in the night sky
    describe specifically: describe specifically, the bird is a bird
    be concise: be concise - the best of the best
  CLIP confidence: 81.93%
  ICE confidence: 81.93%

Image 705:
  True class: refrigerator
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a glass door with a sign
    describe specifically: describe specifically, the glass is a very clear material
    be concise: be concisely, the glass is clear
  CLIP confidence: 85.60%
  ICE confidence: 85.60%

Image 706:
  True class: tabby
  CLIP prediction: Egyptian cat
  ICE prediction: tabby
  Captions:
    a photo of: a photo of a cat sitting on a table
    describe specifically: describe specifically, this is a cat
    be concise: be concisely a very large orange tabby


  8%|▊         | 13/157 [08:02<1:28:21, 36.81s/it]


--- Batch 12 examples ---

Image 768:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police car driving down a road
    describe specifically: describe specifically, the police car is a police car
    be concise: be concise police car
  CLIP confidence: 72.90%
  ICE confidence: 79.35%

Image 769:
  True class: sea slug
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a coral with a red coral in the background
    describe specifically: describe specifically, the coral is a coral that is a coral that is a coral that is a coral that is a coral that is a coral
    be concise: be concise - the best of the best
  CLIP confidence: 32.47%
  ICE confidence: 32.47%

Image 770:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a group of people sitting around a table
    describe specifically: describe spe

  9%|▉         | 14/157 [08:40<1:28:10, 37.00s/it]


--- Batch 13 examples ---

Image 832:
  True class: teapot
  CLIP prediction: teapot
  ICE prediction: teapot
  Captions:
    a photo of: a photo of a green vase on a table
    describe specifically: describe specifically, the glass is a very important material
    be concise: be concise green glass vase
  CLIP confidence: 83.54%
  ICE confidence: 83.54%

Image 833:
  True class: bucket
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a cup of coffee
    describe specifically: describe specifically by the cup
    be concise: be concise - white cup
  CLIP confidence: 77.54%
  ICE confidence: 77.59%

Image 834:
  True class: dam
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a bridge over a river
    describe specifically: describe specifically, the water is very clear
    be concise: be concise - the bridge
  CLIP confidence: 77.59%
  ICE confidence: 77.69%

Image 835:
  True class: barbershop
  CLIP predicti

 10%|▉         | 15/157 [09:18<1:28:42, 37.48s/it]


--- Batch 14 examples ---

Image 896:
  True class: American alligator
  CLIP prediction: bullfrog
  ICE prediction: bullfrog
  Captions:
    a photo of: a photo of a small insect on the ground
    describe specifically: describe specifically, the insects are not related to the species
    be concise: be concisement of a black beetle
  CLIP confidence: 43.29%
  ICE confidence: 43.29%

Image 897:
  True class: standard poodle
  CLIP prediction: Yorkshire terrier
  ICE prediction: Yorkshire terrier
  Captions:
    a photo of: a photo of a dog sitting in the grass
    describe specifically: describe specifically poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo poo po
    be concise: be concisely, a miniature poodle poo
  CLIP confidence: 11.32%
  ICE confidence: 11.32%

Image 898:
  True class: comic book
  CLIP prediction: basketball
  ICE prediction: basketball
  Captions:
    a photo of: a photo of a woman in a pink shirt
    describe specificall

 10%|█         | 16/157 [09:54<1:27:04, 37.05s/it]


--- Batch 15 examples ---

Image 960:
  True class: ice lolly
  CLIP prediction: ice lolly
  ICE prediction: ice lolly
  Captions:
    a photo of: a photo of a baby
    describe specifically: describe specifically, the baby is a baby
    be concise: be concisely, baby is in a high chair
  CLIP confidence: 43.38%
  ICE confidence: 43.38%

Image 961:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a water tower in the middle of a lake
    describe specifically: describe specifically, the water is the most important to the environment
    be concise: be concise - the water tower
  CLIP confidence: 68.51%
  ICE confidence: 75.20%

Image 962:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a bird flying over a body of water
    describe specifically: describe specifically, the geese are the most common birds
    be concise: be concisely, the geese are a 

 11%|█         | 17/157 [10:34<1:28:09, 37.78s/it]


--- Batch 16 examples ---

Image 1024:
  True class: candle
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a man sitting at a table with a cake
    describe specifically: describe specifically, the cake is a cake
    be concise: be concisely, the cake is a little bit of red
  CLIP confidence: 16.80%
  ICE confidence: 23.90%

Image 1025:
  True class: frying pan
  CLIP prediction: frying pan
  ICE prediction: frying pan
  Captions:
    a photo of: a photo of a fried egg in a pan
    describe specifically: describe specifically, the egg is the only egg in the egg
    be concise: be concisely, the light is a bit of the light
  CLIP confidence: 75.00%
  ICE confidence: 75.93%

Image 1026:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a baked pie
    describe specifically: describe specifically, the bread is a good thing to eat
    be concise: be concisely with this del

 11%|█▏        | 18/157 [11:12<1:27:29, 37.77s/it]


--- Batch 17 examples ---

Image 1088:
  True class: trilobite
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a rock in the ocean
    describe specifically: describe specifically, the earth is a planet that has a very large mass of matter
    be concise: be concisement - the golden nugg
  CLIP confidence: 42.65%
  ICE confidence: 42.65%

Image 1089:
  True class: pomegranate
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a red flower in the grass
    describe specifically: describe specifically, the red ball is a symbol of the red ball
    be concise: be concisement - red ball
  CLIP confidence: 80.66%
  ICE confidence: 80.66%

Image 1090:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of three plastic beaks
    describe specifically: describe specifically for the use of the glass
    be concise: be concisement test tube
  CLIP conf

 12%|█▏        | 19/157 [11:48<1:25:46, 37.29s/it]


--- Batch 18 examples ---

Image 1152:
  True class: teddy
  CLIP prediction: Arabian camel
  ICE prediction: Arabian camel
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the following the following the following the following the following the following the following the following the following
    be concise: be concise - the best of the best
  CLIP confidence: 16.02%
  ICE confidence: 16.02%

Image 1153:
  True class: cardigan
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the jacket is a good fit for the body
    be concise: be concise - the best of the best
  CLIP confidence: 43.46%
  ICE confidence: 43.51%

Image 1154:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a sewing machine
    describe specifically: 

 13%|█▎        | 20/157 [12:28<1:27:18, 38.24s/it]


--- Batch 19 examples ---

Image 1216:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man punching a punching punching punching punching
    describe specifically: describe specifically, the punching technique is a key component of the fight
    be concise: be conciser punching punching punching punching punching punching punching punching punching punching punching punching punching punching
  CLIP confidence: 52.93%
  ICE confidence: 57.03%

Image 1217:
  True class: lion
  CLIP prediction: lion
  ICE prediction: lion
  Captions:
    a photo of: a photo of a lion laying on the ground
    describe specifically: describe specifically, the lion is a male
    be concise: be concisely, be concisely, be concisely, be concisely, be concisely be concisely be concisely be be be be be
  CLIP confidence: 83.50%
  ICE confidence: 89.65%

Image 1218:
  True class: brain coral
  CLIP prediction: sea cucumber
  ICE

 13%|█▎        | 21/157 [13:05<1:25:40, 37.80s/it]


--- Batch 20 examples ---

Image 1280:
  True class: pill bottle
  CLIP prediction: pill bottle
  ICE prediction: pill bottle
  Captions:
    a photo of: a photo of a person laying on the floor
    describe specifically: describe specifically, the person is a person
    be concise: be concisely, the person is lying on the floor
  CLIP confidence: 98.88%
  ICE confidence: 98.88%

Image 1281:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a plate of food with a fork
    describe specifically: describe specifically, the chicken is a very good meal
    be concise: be concisely, the best chicken dish
  CLIP confidence: 67.72%
  ICE confidence: 67.72%

Image 1282:
  True class: refrigerator
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a refrigerator and a refrigerator freezer
    describe specifically: describe specifically, the refrigerator is a refrigerator
    be conci

 14%|█▍        | 22/157 [13:41<1:24:07, 37.39s/it]


--- Batch 21 examples ---

Image 1344:
  True class: tarantula
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a mountain with a blue sky
    describe specifically: describe specifically, the land is a land that is not a land, but the land is a land that is not a land
    be concise: be concise - the best way to get to the beach
  CLIP confidence: 14.20%
  ICE confidence: 14.20%

Image 1345:
  True class: confectionery
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a gold bar top
    describe specifically: describe specifically, the gold is the most precious
    be concise: be concise gold leaf table
  CLIP confidence: 92.04%
  ICE confidence: 92.14%

Image 1346:
  True class: lawn mower
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a field with sheep and a man
    describe specifically: describe specifically, the grass is the most impo

 15%|█▍        | 23/157 [14:19<1:23:51, 37.55s/it]


--- Batch 22 examples ---

Image 1408:
  True class: sea slug
  CLIP prediction: slug
  ICE prediction: slug
  Captions:
    a photo of: a photo of a snake with its mouth open
    describe specifically: describe specifically, the snake is a very large, long - necked, venom - like creature
    be concise: be concisement of the corn corn
  CLIP confidence: 30.10%
  ICE confidence: 30.15%

Image 1409:
  True class: brown bear
  CLIP prediction: baboon
  ICE prediction: baboon
  Captions:
    a photo of: a photo of a bear on a rock
    describe specifically: describe specifically, the bear is a member of the family
    be concise: be concise - bear on the rocks
  CLIP confidence: 31.69%
  ICE confidence: 31.69%

Image 1410:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a plate of tomatoes and peppers
    describe specifically: describe specifically, the pepper is a pepper pepper pepper pepper pepper pepper pep

 15%|█▌        | 24/157 [14:56<1:22:43, 37.32s/it]


--- Batch 23 examples ---

Image 1472:
  True class: Arabian camel
  CLIP prediction: Arabian camel
  ICE prediction: Arabian camel
  Captions:
    a photo of: a photo of a group of people riding camels in the desert
    describe specifically: describe specifically, the camels are not the only animals
    be concise: be concisely, the camels of the desert
  CLIP confidence: 72.46%
  ICE confidence: 77.05%

Image 1473:
  True class: cannon
  CLIP prediction: flagpole
  ICE prediction: flagpole
  Captions:
    a photo of: a photo of a statue of a man with a flag
    describe specifically: describe specifically, the canadian flag is a symbol of the canadian culture
    be concise: be concised by the canadian flag
  CLIP confidence: 91.36%
  ICE confidence: 96.83%

Image 1474:
  True class: butcher shop
  CLIP prediction: butcher shop
  ICE prediction: butcher shop
  Captions:
    a photo of: a photo of a person holding a bunch of strawberries
    describe specifically: describe specifica

 16%|█▌        | 25/157 [15:31<1:20:16, 36.49s/it]


--- Batch 24 examples ---

Image 1536:
  True class: barn
  CLIP prediction: barn
  ICE prediction: barn
  Captions:
    a photo of: a photo of a red barn in the snow
    describe specifically: describe specifically, the red barn is a great place to stay
    be concise: be concise farm bed and breakfast
  CLIP confidence: 92.82%
  ICE confidence: 98.63%

Image 1537:
  True class: nail
  CLIP prediction: abacus
  ICE prediction: abacus
  Captions:
    a photo of: a photo of a gold ring
    describe specifically: describe specifically in the form of a ring
    be concise: be concisement ring - gold
  CLIP confidence: 29.83%
  ICE confidence: 29.83%

Image 1538:
  True class: trilobite
  CLIP prediction: trilobite
  ICE prediction: trilobite
  Captions:
    a photo of: a photo of a man in a suit
    describe specifically: describe specifically on the front of the vehicle
    be concise: be concisement of the head
  CLIP confidence: 71.58%
  ICE confidence: 71.58%

Image 1539:
  True clas

 17%|█▋        | 26/157 [16:09<1:20:54, 37.06s/it]


--- Batch 25 examples ---

Image 1600:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the soil is the soil of the soil
    be concise: be concise - the best of the best
  CLIP confidence: 48.34%
  ICE confidence: 48.34%

Image 1601:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the following is the same as the following
    be concise: be concisement - a - a - a - b - c - c - c - c - c - c - c - c
  CLIP confidence: 15.80%
  ICE confidence: 15.80%

Image 1602:
  True class: espresso
  CLIP prediction: espresso
  ICE prediction: espresso
  Captions:
    a photo of: a photo of a cup of coffee and a plate of food
    describe specifically: describe specifically, the co

 17%|█▋        | 27/157 [16:46<1:20:25, 37.12s/it]


--- Batch 26 examples ---

Image 1664:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a stadium with a large screen
    describe specifically: describe specifically, the stadium is a place where the players are allowed
    be concise: be concise - the best of the best
  CLIP confidence: 92.63%
  ICE confidence: 92.77%

Image 1665:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a van parked in a driveway
    describe specifically: describe specifically, the vehicle is a van
    be concise: be concise van
  CLIP confidence: 98.44%
  ICE confidence: 100.29%

Image 1666:
  True class: vestment
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a city at night
    describe specifically: describe specifically, the city is a place where the city is a place where the city is a place where the city is 

 18%|█▊        | 28/157 [17:25<1:20:42, 37.54s/it]


--- Batch 27 examples ---

Image 1728:
  True class: grasshopper
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a pink dress with a green flower on it
    describe specifically: describe specifically, the green lizard is a very large lizard
    be concise: be concisely, be concisely, be concisely be concisely be concisely be concisely be con
  CLIP confidence: 48.34%
  ICE confidence: 48.34%

Image 1729:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a swan
    describe specifically: describe specifically, the birds are not in the water
    be concise: be concisely, the swan
  CLIP confidence: 51.03%
  ICE confidence: 58.01%

Image 1730:
  True class: stopwatch
  CLIP prediction: stopwatch
  ICE prediction: stopwatch
  Captions:
    a photo of: a photo of a person holding a clock
    describe specifically: describe specifically, the time is the most important part of the day
    be 

 18%|█▊        | 29/157 [18:01<1:19:18, 37.17s/it]


--- Batch 28 examples ---

Image 1792:
  True class: tractor
  CLIP prediction: tractor
  ICE prediction: tractor
  Captions:
    a photo of: a photo of a tractor with a trailer behind it
    describe specifically: describe specifically, the tractor is a good choice for the tractor
    be concise: be concise tractor for sale
  CLIP confidence: 93.90%
  ICE confidence: 99.76%

Image 1793:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a woman in a police uniform
    describe specifically: describe specifically, the police officer is a good person
    be concise: be concisement of the police
  CLIP confidence: 52.39%
  ICE confidence: 52.39%

Image 1794:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a house in the mountains
    describe specifically: describe specifically, the red rock is a symbol of the red rock fo

 19%|█▉        | 30/157 [18:39<1:18:57, 37.30s/it]


--- Batch 29 examples ---

Image 1856:
  True class: broom
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a group of people riding on a bike
    describe specifically: describe specifically, the car is a vehicle
    be concise: be concisely, the green color of the car
  CLIP confidence: 68.90%
  ICE confidence: 68.90%

Image 1857:
  True class: crane
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a sunset
    describe specifically: describe specifically, the water is calm
    be concise: be concisely, the sky is pink
  CLIP confidence: 27.81%
  ICE confidence: 29.42%

Image 1858:
  True class: standard poodle
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of two white pup sitting on a table
    describe specifically: describe specifically, the poodle poodle poodle poodle poodle poodle poodle poodle poodle poodle poodle poodle
    b

 20%|█▉        | 31/157 [19:17<1:18:54, 37.58s/it]


--- Batch 30 examples ---

Image 1920:
  True class: teapot
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a red object on a table
    describe specifically: describe specifically in the following the red color
    be concise: be concisement red
  CLIP confidence: 23.97%
  ICE confidence: 27.20%

Image 1921:
  True class: refrigerator
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the number of the cars is the number of the cars
    be concise: be concise - the best of the best
  CLIP confidence: 6.35%
  ICE confidence: 6.35%

Image 1922:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a bunch of oranges
    describe specifically: describe specifically, the fruit tree is a fruit tree that grows on the ground
    be concise: be conc

 20%|██        | 32/157 [19:56<1:19:11, 38.01s/it]


--- Batch 31 examples ---

Image 1984:
  True class: frying pan
  CLIP prediction: wok
  ICE prediction: wok
  Captions:
    a photo of: a photo of a coffee maker with a cup of coffee
    describe specifically: describe specifically in the following the following the following
    be concise: be concisement cup
  CLIP confidence: 50.78%
  ICE confidence: 50.78%

Image 1985:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man with a punching punching punching punching punching punching punching punching punching punching punching punching punching punching
    describe specifically: describe specifically, the product is a product that is not a product, but the product is a product that is a product that is a product
    be concise: be concisement training kit
  CLIP confidence: 47.22%
  ICE confidence: 47.22%

Image 1986:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captio

 21%|██        | 33/157 [20:35<1:19:00, 38.23s/it]


--- Batch 32 examples ---

Image 2048:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a yellow liquid being poured into a beakle
    describe specifically: describe specifically with the following of the yellow - green - green - green - green - green - green - green - green - green - green - green - green - green - - - - - - - - - - - - - - -
    be concise: be concisement for the use of therm
  CLIP confidence: 31.10%
  ICE confidence: 31.10%

Image 2049:
  True class: rugby ball
  CLIP prediction: rugby ball
  ICE prediction: rugby ball
  Captions:
    a photo of: a photo of a person playing soccer
    describe specifically: describe specifically, the player is a player
    be concise: be concisely, the player is running
  CLIP confidence: 90.72%
  ICE confidence: 90.72%

Image 2050:
  True class: ice cream
  CLIP prediction: mashed potato
  ICE prediction: mashed potato
  Captions:
    a photo of: a photo of a plate of

 22%|██▏       | 34/157 [21:15<1:19:44, 38.90s/it]


--- Batch 33 examples ---

Image 2112:
  True class: pay-phone
  CLIP prediction: Christmas stocking
  ICE prediction: Christmas stocking
  Captions:
    a photo of: a photo of a woman in a dress
    describe specifically: describe specifically, the dress is a little red dress with a heart pattern
    be concise: be concisely, be concisely, be concisely, be concisely be concisely be concisely be
  CLIP confidence: 19.35%
  ICE confidence: 19.35%

Image 2113:
  True class: birdhouse
  CLIP prediction: birdhouse
  ICE prediction: birdhouse
  Captions:
    a photo of: a photo of a black and white cat
    describe specifically: describe specifically, the shape of the triangle is the shape of the triangle
    be concise: be concise - the best of be con
  CLIP confidence: 7.84%
  ICE confidence: 7.84%

Image 2114:
  True class: trolleybus
  CLIP prediction: trolleybus
  ICE prediction: trolleybus
  Captions:
    a photo of: a photo of a bus parked in a parking
    describe specifically: des

 22%|██▏       | 35/157 [21:52<1:17:43, 38.23s/it]


--- Batch 34 examples ---

Image 2176:
  True class: butcher shop
  CLIP prediction: butcher shop
  ICE prediction: butcher shop
  Captions:
    a photo of: a photo of a bar with a neon sign
    describe specifically: describe specifically, the audience is the most important part of the audience
    be concise: be concise - live at the red rocks
  CLIP confidence: 96.19%
  ICE confidence: 96.19%

Image 2177:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a dog running in the grass
    describe specifically: describe specifically, the birds are not the same
    be concise: be concisely, the dog is a great companion for the sheep
  CLIP confidence: 96.39%
  ICE confidence: 97.12%

Image 2178:
  True class: cliff
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a black and white cat
    describe specifically: describe specifically, the term term term term term term term t

 23%|██▎       | 36/157 [22:27<1:15:15, 37.31s/it]


--- Batch 35 examples ---

Image 2240:
  True class: grasshopper
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a pink flower
    describe specifically: describe specifically, the flowers are pink and white
    be concise: be concise - pink - zinna
  CLIP confidence: 19.18%
  ICE confidence: 19.18%

Image 2241:
  True class: mashed potato
  CLIP prediction: plate
  ICE prediction: plate
  Captions:
    a photo of: a photo of a plate of food with a fork
    describe specifically: describe specifically, the food is not the same
    be concise: be concisely, the best way to eat
  CLIP confidence: 43.41%
  ICE confidence: 48.90%

Image 2242:
  True class: snail
  CLIP prediction: snail
  ICE prediction: snail
  Captions:
    a photo of: a photo of a snail on a leaf
    describe specifically: describe specifically, the snail is a member of the family
    be concise: be concise snail
  CLIP confidence: 94.53%
  ICE confidence: 100.39%

Image 

 24%|██▎       | 37/157 [23:03<1:13:48, 36.90s/it]


--- Batch 36 examples ---

Image 2304:
  True class: projectile
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a city with a lot of buildings
    describe specifically: describe specifically, the building is a complex of the city
    be concise: be concise - the new york
  CLIP confidence: 9.92%
  ICE confidence: 9.92%

Image 2305:
  True class: backpack
  CLIP prediction: backpack
  ICE prediction: backpack
  Captions:
    a photo of: a photo of a black bag with a red and white logo
    describe specifically: describe specifically in the corrective of the bag
    be concise: be concise bag
  CLIP confidence: 52.10%
  ICE confidence: 59.18%

Image 2306:
  True class: brass
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a large stone statue
    describe specifically: describe specifically, the stone is a very large, rectangular shape
    be concise: be concise - the monument
  CLIP confidenc

 24%|██▍       | 38/157 [23:38<1:12:17, 36.45s/it]


--- Batch 37 examples ---

Image 2368:
  True class: picket fence
  CLIP prediction: picket fence
  ICE prediction: picket fence
  Captions:
    a photo of: a photo of a white picket fence
    describe specifically: describe specifically, the bridge is a symbol of the city
    be concise: be concise the first of the new american fence
  CLIP confidence: 82.57%
  ICE confidence: 88.77%

Image 2369:
  True class: snail
  CLIP prediction: snail
  ICE prediction: snail
  Captions:
    a photo of: a photo of a squirrel walking on the road
    describe specifically: describe specifically, the tornt is a tornt
    be concise: be concisely, be concisely, be concisely, be concisely be concisely be concisely be
  CLIP confidence: 64.65%
  ICE confidence: 64.65%

Image 2370:
  True class: Yorkshire terrier
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog
    describe specifically: describe specifically, the dog is a golden retrieve

 25%|██▍       | 39/157 [24:15<1:11:47, 36.51s/it]


--- Batch 38 examples ---

Image 2432:
  True class: coral reef
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a sea turtle swimming in the ocean
    describe specifically: describe specifically, the water is the most important to the environment
    be concise: be concisely, the new book by john r brown
  CLIP confidence: 30.74%
  ICE confidence: 30.74%

Image 2433:
  True class: poncho
  CLIP prediction: poncho
  ICE prediction: poncho
  Captions:
    a photo of: a photo of a man in a white shirt
    describe specifically: describe specifically, the person is a person
    be concise: be concise - the best of the best
  CLIP confidence: 58.79%
  ICE confidence: 58.79%

Image 2434:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bison in the grass
    describe specifically: describe specifically, the bison is a very large, furry animal
    be concise: be concisely, the 

 25%|██▌       | 40/157 [24:50<1:10:08, 35.97s/it]


--- Batch 39 examples ---

Image 2496:
  True class: orange
  CLIP prediction: lemon
  ICE prediction: lemon
  Captions:
    a photo of: a photo of a lemon
    describe specifically: describe specifically, the lemon is the most important ingredient
    be concise: be concisely, be concisely
  CLIP confidence: 83.74%
  ICE confidence: 89.65%

Image 2497:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a street with cars parked on it
    describe specifically: describe specifically, the car is a car
    be concise: be concisement - the best way to get a car
  CLIP confidence: 81.25%
  ICE confidence: 81.25%

Image 2498:
  True class: altar
  CLIP prediction: altar
  ICE prediction: altar
  Captions:
    a photo of: a photo of a church with a cross in the middle
    describe specifically: describe specifically, the church is a place where the congregation is located
    be concise: be concisely, the church o

 26%|██▌       | 41/157 [25:25<1:09:16, 35.83s/it]


--- Batch 40 examples ---

Image 2560:
  True class: dragonfly
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a bug on a green leaf
    describe specifically: describe specifically, the insects are not related
    be concise: be concisely, the dragonfly is a species of the dragonfly family
  CLIP confidence: 83.15%
  ICE confidence: 85.55%

Image 2561:
  True class: boa constrictor
  CLIP prediction: Persian cat
  ICE prediction: Persian cat
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the tree is a tree that is a tree that is a tree that is a tree that is a tree that is a tree
    be concise: be concise - the best of the best
  CLIP confidence: 8.47%
  ICE confidence: 8.47%

Image 2562:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a water tower in the fog
    describe specifically: describ

 27%|██▋       | 42/157 [26:02<1:09:20, 36.18s/it]


--- Batch 41 examples ---

Image 2624:
  True class: altar
  CLIP prediction: altar
  ICE prediction: altar
  Captions:
    a photo of: a photo of a christmas tree with candles
    describe specifically: describe specifically, the christmas tree is a symbol of the holiday season
    be concise: be concise christmas
  CLIP confidence: 72.75%
  ICE confidence: 72.75%

Image 2625:
  True class: ladybug
  CLIP prediction: ladybug
  ICE prediction: ladybug
  Captions:
    a photo of: a photo of a yellow cake with a ladybug on top
    describe specifically: describe specifically, the yellow bird is a bird
    be concise: be concisely, the yellow cake
  CLIP confidence: 85.94%
  ICE confidence: 85.94%

Image 2626:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a man in a blue shirt
    describe specifically: describe specifically, the water is the most important to the environment
    be concise: be concise - the 

 27%|██▋       | 43/157 [26:42<1:10:29, 37.10s/it]


--- Batch 42 examples ---

Image 2688:
  True class: bison
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a bear that is laying down
    describe specifically: describe specifically, the bear is a very large, furry animal
    be concise: be concise - a bear
  CLIP confidence: 32.84%
  ICE confidence: 32.84%

Image 2689:
  True class: basketball
  CLIP prediction: basketball
  ICE prediction: basketball
  Captions:
    a photo of: a photo of a basketball game with the ball in the air
    describe specifically: describe specifically, the nba is a great sport
    be concise: be concisement, unicadoo de los clippers
  CLIP confidence: 73.05%
  ICE confidence: 79.44%

Image 2690:
  True class: picket fence
  CLIP prediction: picket fence
  ICE prediction: picket fence
  Captions:
    a photo of: a photo of a fence in the fog
    describe specifically: describe specifically, the fence is a good idea
    be concise: be concisely, be concisely
  CLIP confide

 28%|██▊       | 44/157 [27:18<1:09:15, 36.77s/it]


--- Batch 43 examples ---

Image 2752:
  True class: lifeboat
  CLIP prediction: lifeboat
  ICE prediction: lifeboat
  Captions:
    a photo of: a photo of a boat in the water
    describe specifically: describe specifically, the boat is a small boat
    be concise: be concise - the best infloor
  CLIP confidence: 99.61%
  ICE confidence: 99.61%

Image 2753:
  True class: African elephant
  CLIP prediction: African elephant
  ICE prediction: African elephant
  Captions:
    a photo of: a photo of a small village with a small house
    describe specifically: describe specifically, the elephant is a very large animal
    be concise: be concisely, the elephant
  CLIP confidence: 91.31%
  ICE confidence: 93.65%

Image 2754:
  True class: beer bottle
  CLIP prediction: beer bottle
  ICE prediction: beer bottle
  Captions:
    a photo of: a photo of a dog
    describe specifically: describe specifically, the dog is a black and white color
    be concise: be concisely, the cat is a very good

 29%|██▊       | 45/157 [27:52<1:07:15, 36.03s/it]


--- Batch 44 examples ---

Image 2816:
  True class: torch
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a man holding a frc
    describe specifically: describe specifically, the person is a person
    be concise: be concisely, the man is running in the street
  CLIP confidence: 40.36%
  ICE confidence: 40.36%

Image 2817:
  True class: golden retriever
  CLIP prediction: meat loaf
  ICE prediction: meat loaf
  Captions:
    a photo of: a photo of a dog playing with a toy
    describe specifically: describe specifically, the dog is a golden retrieve
    be concise: be concisely, a golden retrieve
  CLIP confidence: 10.57%
  ICE confidence: 10.57%

Image 2818:
  True class: bannister
  CLIP prediction: spider web
  ICE prediction: spider web
  Captions:
    a photo of: a photo of a building with a large window
    describe specifically: describe specifically, the two - dimensional plane is the same
    be concise: be concisement - a new era
  C

 29%|██▉       | 46/157 [28:27<1:05:55, 35.64s/it]


--- Batch 45 examples ---

Image 2880:
  True class: brain coral
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a man in a suit
    describe specifically: describe specifically, the most common phocant is the phocyst phocant
    be concise: be concisely, be concisely
  CLIP confidence: 42.26%
  ICE confidence: 42.26%

Image 2881:
  True class: gasmask
  CLIP prediction: neck brace
  ICE prediction: neck brace
  Captions:
    a photo of: a photo of a man with a blue hat
    describe specifically: describe specifically, the two men are fighting
    be concise: be concisely, the man is wearing a blue hat
  CLIP confidence: 30.96%
  ICE confidence: 30.96%

Image 2882:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a baseball field with a ball in the middle
    describe specifically: describe specifically, the field is a good place to play
    be concise: be concisely, the b

 30%|██▉       | 47/157 [29:01<1:04:47, 35.34s/it]


--- Batch 46 examples ---

Image 2944:
  True class: espresso
  CLIP prediction: espresso
  ICE prediction: espresso
  Captions:
    a photo of: a photo of a cup of coffee on a table
    describe specifically: describe specifically, the coffee is a little bit of tea
    be concise: be concisely, the coffee is a little bit of tea
  CLIP confidence: 99.76%
  ICE confidence: 102.05%

Image 2945:
  True class: bee
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of a spider on a flower
    describe specifically: describe specifically, the insect is a member of the family
    be concise: be concisement of a bee
  CLIP confidence: 21.75%
  ICE confidence: 29.30%

Image 2946:
  True class: goldfish
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a fire in the middle of a forest
    describe specifically: describe specifically, the color of the image is the same
    be concise: be concisely, the color of the sky is 

 31%|███       | 48/157 [29:38<1:05:06, 35.84s/it]


--- Batch 47 examples ---

Image 3008:
  True class: miniskirt
  CLIP prediction: miniskirt
  ICE prediction: miniskirt
  Captions:
    a photo of: a photo of a woman sitting on a chair
    describe specifically: describe specifically, the chair is not in use
    be concise: be concisely, be concisely
  CLIP confidence: 78.47%
  ICE confidence: 78.47%

Image 3009:
  True class: bighorn
  CLIP prediction: Labrador retriever
  ICE prediction: Labrador retriever
  Captions:
    a photo of: a photo of a horse in a field
    describe specifically: describe specifically, the horse is a very large, black horse
    be concise: be concisely, be concisely
  CLIP confidence: 26.17%
  ICE confidence: 26.17%

Image 3010:
  True class: space heater
  CLIP prediction: refrigerator
  ICE prediction: refrigerator
  Captions:
    a photo of: a photo of a red and white train
    describe specifically: describe specifically, the product is the product of the product
    be concise: be concise - the best 

 31%|███       | 49/157 [30:14<1:04:25, 35.79s/it]


--- Batch 48 examples ---

Image 3072:
  True class: organ
  CLIP prediction: organ
  ICE prediction: organ
  Captions:
    a photo of: a photo of a woman in a white dress
    describe specifically: describe specifically, the number of the number of the number of the number of the number of the number of the number of the number of the
    be concise: be concise - the best of the best
  CLIP confidence: 72.31%
  ICE confidence: 72.31%

Image 3073:
  True class: drumstick
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a man playing a drum
    describe specifically: describe specifically by the drummer
    be concise: be concise drum
  CLIP confidence: 27.76%
  ICE confidence: 28.27%

Image 3074:
  True class: black widow
  CLIP prediction: black widow
  ICE prediction: black widow
  Captions:
    a photo of: a photo of a spider crawling on the beach
    describe specifically: describe specifically, the spider is a member of the family of the s

 32%|███▏      | 50/157 [30:49<1:03:26, 35.57s/it]


--- Batch 49 examples ---

Image 3136:
  True class: mushroom
  CLIP prediction: mushroom
  ICE prediction: mushroom
  Captions:
    a photo of: a photo of a field with a small patch of grass
    describe specifically: describe specifically, the earth ' s surface is a flat surface
    be concise: be concisement of the moon
  CLIP confidence: 11.05%
  ICE confidence: 11.05%

Image 3137:
  True class: basketball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a basketball game
    describe specifically: describe specifically, the basketball players are not the same
    be concise: be concisely, the best of the best
  CLIP confidence: 82.18%
  ICE confidence: 82.28%

Image 3138:
  True class: rocking chair
  CLIP prediction: rocking chair
  ICE prediction: rocking chair
  Captions:
    a photo of: a photo of a chair with a clock on it
    describe specifically: describe specifically, the chair is a chair that can be used for a variety of 

 32%|███▏      | 51/157 [31:24<1:02:45, 35.53s/it]


--- Batch 50 examples ---

Image 3200:
  True class: water jug
  CLIP prediction: kimono
  ICE prediction: kimono
  Captions:
    a photo of: a photo of a white and red flower
    describe specifically: describe specifically, the number of the flowers is the number of the flowers
    be concise: be concise - white flowers
  CLIP confidence: 19.01%
  ICE confidence: 19.01%

Image 3201:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a group of people walking through a forest
    describe specifically: describe specifically, the tree is a tree that is a tree that is a tree that is a tree that is a tree that is a tree
    be concise: be concise - the best of the best
  CLIP confidence: 91.11%
  ICE confidence: 91.11%

Image 3202:
  True class: koala
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a wolf laying on the ground
    describe specifically: describe specifically, the wolf is a 

 33%|███▎      | 52/157 [31:59<1:01:56, 35.40s/it]


--- Batch 51 examples ---

Image 3264:
  True class: lakeside
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a field with a blue sky
    describe specifically: describe specifically, the land is a land that is not a land
    be concise: be concise - the beach
  CLIP confidence: 18.21%
  ICE confidence: 18.21%

Image 3265:
  True class: golden retriever
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog standing in the water
    describe specifically: describe specifically, the poo is a very large breed
    be concise: be concise golden retrieve
  CLIP confidence: 46.68%
  ICE confidence: 46.68%

Image 3266:
  True class: dugong
  CLIP prediction: dugong
  ICE prediction: dugong
  Captions:
    a photo of: a photo of a dolphin swimming in the ocean
    describe specifically: describe specifically, the dolphin is a type of fish
    be concise: be concisely, the dolphin
  CLIP confidence: 99

 34%|███▍      | 53/157 [32:36<1:01:43, 35.61s/it]


--- Batch 52 examples ---

Image 3328:
  True class: bow tie
  CLIP prediction: sunglasses
  ICE prediction: sunglasses
  Captions:
    a photo of: a photo of a man in a green shirt
    describe specifically: describe specifically, the person is a person
    be concise: be concisely, be concisely
  CLIP confidence: 39.89%
  ICE confidence: 39.89%

Image 3329:
  True class: ox
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a black bear in a field
    describe specifically: describe specifically, the bear is a bear
    be concise: be concisely, the black bear
  CLIP confidence: 36.79%
  ICE confidence: 36.96%

Image 3330:
  True class: Egyptian cat
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a black cat looking out the window
    describe specifically: describe specifically, this is a black cat
    be concise: be concisely adopt a domestic domestic
  CLIP confidence: 44.41%
  ICE confidenc

 34%|███▍      | 54/157 [33:13<1:02:07, 36.19s/it]


--- Batch 53 examples ---

Image 3392:
  True class: trilobite
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a brown substance
    describe specifically: describe specifically, the soil is a mixture of sand and sand
    be concise: be concise - brown - 1 5 oz
  CLIP confidence: 8.69%
  ICE confidence: 8.69%

Image 3393:
  True class: cliff dwelling
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a building with a large window
    describe specifically: describe specifically, the water is the same
    be concise: be concise - the best of the best
  CLIP confidence: 40.84%
  ICE confidence: 40.84%

Image 3394:
  True class: water tower
  CLIP prediction: water tower
  ICE prediction: water tower
  Captions:
    a photo of: a photo of a street with a water tower in the background
    describe specifically: describe specifically, the water tower is a great place to see the water
    be concise: be concise - th

 35%|███▌      | 55/157 [33:50<1:01:44, 36.32s/it]


--- Batch 54 examples ---

Image 3456:
  True class: moving van
  CLIP prediction: moving van
  ICE prediction: moving van
  Captions:
    a photo of: a photo of a man walking in front of a truck
    describe specifically: describe specifically, the trailer is a small, portable, portable, and portable
    be concise: be concise trailer
  CLIP confidence: 73.49%
  ICE confidence: 76.07%

Image 3457:
  True class: albatross
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a white cat on a boat
    describe specifically: describe specifically, the polar bear is a polar bear
    be concise: be concisely, be concisely, be concisely, be concisely be concisely be concisely be
  CLIP confidence: 96.73%
  ICE confidence: 96.73%

Image 3458:
  True class: stopwatch
  CLIP prediction: magnetic compass
  ICE prediction: magnetic compass
  Captions:
    a photo of: a photo of a man holding a blue and white object
    describe specifically: describe sp

 36%|███▌      | 56/157 [34:26<1:00:57, 36.21s/it]


--- Batch 55 examples ---

Image 3520:
  True class: Labrador retriever
  CLIP prediction: Labrador retriever
  ICE prediction: Labrador retriever
  Captions:
    a photo of: a photo of a dog on the beach
    describe specifically: describe specifically, this is a labrad
    be concise: be concise golden retrieve
  CLIP confidence: 29.22%
  ICE confidence: 29.22%

Image 3521:
  True class: jellyfish
  CLIP prediction: jellyfish
  ICE prediction: jellyfish
  Captions:
    a photo of: a photo of a large cloud in the sky
    describe specifically: describe specifically, the earth is a planet
    be concise: be concisement - the world of the universe
  CLIP confidence: 33.25%
  ICE confidence: 33.25%

Image 3522:
  True class: ox
  CLIP prediction: sombrero
  ICE prediction: sombrero
  Captions:
    a photo of: a photo of a group of people standing in front of a christmas tree
    describe specifically: describe specifically, the two men are the same person
    be concise: be concise - ch

 36%|███▋      | 57/157 [35:04<1:01:23, 36.83s/it]


--- Batch 56 examples ---

Image 3584:
  True class: kimono
  CLIP prediction: kimono
  ICE prediction: kimono
  Captions:
    a photo of: a photo of a woman in a pink dress
    describe specifically: describe specifically, the dress is a little girl ' s dream
    be concise: be concisely, be concisely, be concisely be concisely be concisely be concisely be con
  CLIP confidence: 70.17%
  ICE confidence: 70.17%

Image 3585:
  True class: plate
  CLIP prediction: plate
  ICE prediction: plate
  Captions:
    a photo of: a photo of a bowl of food on a table
    describe specifically: describe specifically, the food is a little bit of the food
    be concise: be concise - beef and pork stew
  CLIP confidence: 36.87%
  ICE confidence: 40.14%

Image 3586:
  True class: water tower
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a field with a tree in the background
    describe specifically: describe specifically, the land is a land that is not a

 37%|███▋      | 58/157 [35:41<1:00:42, 36.80s/it]


--- Batch 57 examples ---

Image 3648:
  True class: hog
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a bear drinking water from a pond
    describe specifically: describe specifically, the bear is a very large, furry animal
    be concise: be concisely, the world ' s largest and most - known animal
  CLIP confidence: 85.25%
  ICE confidence: 85.35%

Image 3649:
  True class: pill bottle
  CLIP prediction: pill bottle
  ICE prediction: pill bottle
  Captions:
    a photo of: a photo of a yellow and white cup
    describe specifically: describe specifically, the number of the numbers is the number of the numbers
    be concise: be concisely, be concisely
  CLIP confidence: 97.41%
  ICE confidence: 97.41%

Image 3650:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a woman in a dress
    describe specifically: describe specifically, the person is a person
    be 

 38%|███▊      | 59/157 [36:15<58:57, 36.10s/it]  


--- Batch 58 examples ---

Image 3712:
  True class: dumbbell
  CLIP prediction: dumbbell
  ICE prediction: dumbbell
  Captions:
    a photo of: a photo of a blue and white cat
    describe specifically: describe specifically, the blue is the color of the car
    be concise: be concise blue sapphire ring
  CLIP confidence: 35.99%
  ICE confidence: 35.99%

Image 3713:
  True class: trolleybus
  CLIP prediction: trolleybus
  ICE prediction: trolleybus
  Captions:
    a photo of: a photo of a bus driving down a street
    describe specifically: describe specifically, the bus is a bus
    be concise: be concisely, the bus is a bus
  CLIP confidence: 90.67%
  ICE confidence: 90.72%

Image 3714:
  True class: academic gown
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a group of people in a field
    describe specifically: describe specifically, the students are not the same person
    be concise: be concise - irish band / celtic band in new yor

 38%|███▊      | 60/157 [36:51<58:17, 36.05s/it]


--- Batch 59 examples ---

Image 3776:
  True class: viaduct
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a lake with a boat in the water
    describe specifically: describe specifically, the water is very calm
    be concise: be concisely, the lake
  CLIP confidence: 26.66%
  ICE confidence: 34.50%

Image 3777:
  True class: orangutan
  CLIP prediction: cougar
  ICE prediction: cougar
  Captions:
    a photo of: a photo of a cat in a cage
    describe specifically: describe specifically, and easily, to understand the difference between the two species of the same species
    be concise: be concise - red - 100 % polyester - 100g
  CLIP confidence: 14.14%
  ICE confidence: 14.14%

Image 3778:
  True class: convertible
  CLIP prediction: convertible
  ICE prediction: convertible
  Captions:
    a photo of: a photo of a car with a trailer in the background
    describe specifically: describe specifically, the car is a very good looking veh

 39%|███▉      | 61/157 [37:31<59:33, 37.23s/it]


--- Batch 60 examples ---

Image 3840:
  True class: alp
  CLIP prediction: alp
  ICE prediction: alp
  Captions:
    a photo of: a photo of a group of people walking in the snow
    describe specifically: describe specifically, the snow is the most important to the weather
    be concise: be concise - the best of the best of the best of the best of the best of the best of the best
  CLIP confidence: 60.01%
  ICE confidence: 60.01%

Image 3841:
  True class: cardigan
  CLIP prediction: neck brace
  ICE prediction: neck brace
  Captions:
    a photo of: a photo of a woman in a white dress
    describe specifically: describe specifically, the person who is the person who is the person who is the person who is the person who is the person who is the
    be concise: be concisely, be concisely, be conly, be conly, be conly, be conly, be con
  CLIP confidence: 11.59%
  ICE confidence: 11.59%

Image 3842:
  True class: limousine
  CLIP prediction: limousine
  ICE prediction: limousine
  Capt

 39%|███▉      | 62/157 [38:10<59:31, 37.60s/it]


--- Batch 61 examples ---

Image 3904:
  True class: barrel
  CLIP prediction: barrel
  ICE prediction: barrel
  Captions:
    a photo of: a photo of a group of people in a crowd
    describe specifically: describe specifically, the number of the people in the world is the number of the people in the world is the number of the people in the
    be concise: be concise - the best of the best
  CLIP confidence: 41.33%
  ICE confidence: 41.33%

Image 3905:
  True class: school bus
  CLIP prediction: school bus
  ICE prediction: school bus
  Captions:
    a photo of: a photo of a large truck with a man on it
    describe specifically: describe specifically for the use of the tractor
    be concise: be concisely, the new truck for the game
  CLIP confidence: 20.39%
  ICE confidence: 20.64%

Image 3906:
  True class: space heater
  CLIP prediction: space heater
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a cube with a small square on top
    describe specifically: 

 40%|████      | 63/157 [38:47<58:43, 37.49s/it]


--- Batch 62 examples ---

Image 3968:
  True class: albatross
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a bird in the water
    describe specifically: describe specifically, the birds are not the same species
    be concise: be concisely, the largest of the seabirds
  CLIP confidence: 99.61%
  ICE confidence: 99.61%

Image 3969:
  True class: barn
  CLIP prediction: alp
  ICE prediction: alp
  Captions:
    a photo of: a photo of a city at night
    describe specifically: describe specifically, the city of the future
    be concise: be concise - the view from the top of the mountain
  CLIP confidence: 23.63%
  ICE confidence: 23.66%

Image 3970:
  True class: seashore
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a lighthouse on a rocky shore
    describe specifically: describe specifically, the sky is blue
    be concise: be concisely, the sky is blue
  CLIP confidence: 36.60%
  ICE

 41%|████      | 64/157 [39:24<57:58, 37.40s/it]


--- Batch 63 examples ---

Image 4032:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a white van driving down a street
    describe specifically: describe specifically, the vehicle is a van
    be concise: be concisel van
  CLIP confidence: 44.51%
  ICE confidence: 45.78%

Image 4033:
  True class: spiny lobster
  CLIP prediction: spiny lobster
  ICE prediction: spiny lobster
  Captions:
    a photo of: a photo of a green and yellow field
    describe specifically: describe specifically, and interpret, and interpret the world of the universe
    be concise: be concise - the best of the best
  CLIP confidence: 21.94%
  ICE confidence: 21.94%

Image 4034:
  True class: teddy
  CLIP prediction: teddy
  ICE prediction: teddy
  Captions:
    a photo of: a photo of a teddy bear sitting on a blue blanket
    describe specifically: describe specifically, this is a teddy bear
    be concise: be concise teddy bear
  CL

 45%|████▍     | 70/157 [42:56<51:04, 35.23s/it]


--- Batch 69 examples ---

Image 4416:
  True class: beacon
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a lighthouse on a hill
    describe specifically: describe specifically, the lighthouses are not in the same location
    be concise: be concise lighthouse
  CLIP confidence: 50.24%
  ICE confidence: 57.08%

Image 4417:
  True class: pop bottle
  CLIP prediction: black widow
  ICE prediction: black widow
  Captions:
    a photo of: a photo of a bottle of wine and a bottle of wine
    describe specifically: describe specifically, the bottle is a bottle of wine
    be concise: be concise - the bottle
  CLIP confidence: 20.34%
  ICE confidence: 20.34%

Image 4418:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a mountain with trees and bushes
    describe specifically: describe specifically, the area is a natural area, and the area is a natural area
    

 45%|████▌     | 71/157 [43:32<50:48, 35.45s/it]


--- Batch 70 examples ---

Image 4480:
  True class: American lobster
  CLIP prediction: American lobster
  ICE prediction: American lobster
  Captions:
    a photo of: a photo of a plate of food with a glass of wine
    describe specifically: describe specifically, the food of the world is the most important to the world
    be concise: be concise - mexican food
  CLIP confidence: 38.92%
  ICE confidence: 38.92%

Image 4481:
  True class: volleyball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a football game with the ball in the air
    describe specifically: describe specifically, the game is a game that is played by a player
    be concise: be concisely, the best way to play football
  CLIP confidence: 96.14%
  ICE confidence: 96.53%

Image 4482:
  True class: sea slug
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a nebula with a red nebula in the background
    describe specifical

 46%|████▌     | 72/157 [44:10<51:00, 36.01s/it]


--- Batch 71 examples ---

Image 4544:
  True class: bullfrog
  CLIP prediction: bullfrog
  ICE prediction: bullfrog
  Captions:
    a photo of: a photo of a man and woman dancing
    describe specifically: describe specifically, the two main types of the human body are the human body, the human body, and the body
    be concise: be concise - the best of the best
  CLIP confidence: 49.68%
  ICE confidence: 49.68%

Image 4545:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a suit and tie hanging on a rack
    describe specifically: describe specifically, the suit is a suit that is a suit, a suit that is a suit, a suit that is a suit that is
    be concise: be concisely, the suit is a suit
  CLIP confidence: 86.57%
  ICE confidence: 86.62%

Image 4546:
  True class: obelisk
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a tall tower in a city
    describe spe

 46%|████▋     | 73/157 [44:46<50:37, 36.16s/it]


--- Batch 72 examples ---

Image 4608:
  True class: pole
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a building with a sky background
    describe specifically: describe specifically, the pyramids are not the same
    be concise: be concise - the pyramid
  CLIP confidence: 25.27%
  ICE confidence: 25.27%

Image 4609:
  True class: basketball
  CLIP prediction: basketball
  ICE prediction: basketball
  Captions:
    a photo of: a photo of a basketball player in action
    describe specifically: describe specifically, the basketball players are not the same
    be concise: be concisely, but, that ' s the way to go
  CLIP confidence: 64.94%
  ICE confidence: 70.02%

Image 4610:
  True class: drumstick
  CLIP prediction: drumstick
  ICE prediction: drumstick
  Captions:
    a photo of: a photo of a drum
    describe specifically: describe specifically by the drummer ' s name
    be concise: be concisely, the drum is a great instrument
  CLIP 

 47%|████▋     | 74/157 [45:22<50:03, 36.19s/it]


--- Batch 73 examples ---

Image 4672:
  True class: water jug
  CLIP prediction: water jug
  ICE prediction: water jug
  Captions:
    a photo of: a photo of a blue glass
    describe specifically: describe specifically with the following of the blue sapphire
    be concise: be concisement crystal
  CLIP confidence: 40.92%
  ICE confidence: 40.92%

Image 4673:
  True class: seashore
  CLIP prediction: seashore
  ICE prediction: seashore
  Captions:
    a photo of: a photo of a beach with a rock in the water
    describe specifically: describe specifically, the ocean is the most important place to the ocean
    be concise: be concisely, the ocean is a beautiful place to be
  CLIP confidence: 22.36%
  ICE confidence: 30.08%

Image 4674:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a dance class
    describe specifically: describe specifically, the dance is a very important part of the dance
    be concise: be concise 

 48%|████▊     | 75/157 [46:00<50:04, 36.64s/it]


--- Batch 74 examples ---

Image 4736:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a brick wall with a hole in it
    describe specifically: describe specifically, the soil is a mixture of soil and soil is a mixture of soil and soil
    be concise: be concise - the best of the best
  CLIP confidence: 98.68%
  ICE confidence: 98.68%

Image 4737:
  True class: oboe
  CLIP prediction: oboe
  ICE prediction: oboe
  Captions:
    a photo of: a photo of a classroom
    describe specifically: describe specifically, the audience is a person who is not a person
    be concise: be concisely, be concisely, be concisely, be concisely, be concisely be concisely be concisely be concisely be con
  CLIP confidence: 10.63%
  ICE confidence: 10.63%

Image 4738:
  True class: sewing machine
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a table with a bunc

 48%|████▊     | 76/157 [46:37<49:31, 36.69s/it]


--- Batch 75 examples ---

Image 4800:
  True class: mantis
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a praying praying praying praying praying praying praying praying praying praying praying praying praying praying praying praying praying
    describe specifically: describe specifically, the green mantis is a species of the mantis family
    be concise: be concisement - praying mantis
  CLIP confidence: 87.84%
  ICE confidence: 93.90%

Image 4801:
  True class: teddy
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a cake with many different colors
    describe specifically: describe specifically, the number of the flowers is the number of the flowers
    be concise: be concise - red rose
  CLIP confidence: 28.15%
  ICE confidence: 35.28%

Image 4802:
  True class: beer bottle
  CLIP prediction: beer bottle
  ICE prediction: beer bottle
  Captions:
    a photo of: a photo of a black

 49%|████▉     | 77/157 [47:13<48:45, 36.57s/it]


--- Batch 76 examples ---

Image 4864:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly on a flower
    describe specifically: describe specifically, the butterfly is a member of the family
    be concise: be concisement - the butterfly
  CLIP confidence: 52.59%
  ICE confidence: 54.35%

Image 4865:
  True class: birdhouse
  CLIP prediction: birdhouse
  ICE prediction: birdhouse
  Captions:
    a photo of: a photo of a kayak in the water
    describe specifically: describe specifically, the kayak is a great way to get out of the cold weather
    be concise: be concise kayak kayak
  CLIP confidence: 99.12%
  ICE confidence: 99.12%

Image 4866:
  True class: cockroach
  CLIP prediction: acorn
  ICE prediction: acorn
  Captions:
    a photo of: a photo of a bug on a table
    describe specifically: describe specifically, the spider is a member of the family
    be concise: be concisely, the best way to get a goo

 50%|████▉     | 78/157 [47:49<47:54, 36.39s/it]


--- Batch 77 examples ---

Image 4928:
  True class: fur coat
  CLIP prediction: fur coat
  ICE prediction: fur coat
  Captions:
    a photo of: a photo of a woman in a fur coat
    describe specifically: describe specifically, the woman is a real person
    be concise: be concisely, be the best
  CLIP confidence: 62.11%
  ICE confidence: 63.62%

Image 4929:
  True class: plate
  CLIP prediction: meat loaf
  ICE prediction: plate
  Captions:
    a photo of: a photo of a plate of food with a fork
    describe specifically: describe specifically, the food is not the same
    be concise: be concise - the best italian restaurant in the world
  CLIP confidence: 30.37%
  ICE confidence: 32.64%

Image 4930:
  True class: tailed frog
  CLIP prediction: tailed frog
  ICE prediction: tailed frog
  Captions:
    a photo of: a photo of a mouse on a blue background
    describe specifically: describe specifically, the mouse is a mouse
    be concise: be concise - gold - filled mouse mouse
  CLIP c

 50%|█████     | 79/157 [48:25<47:15, 36.35s/it]


--- Batch 78 examples ---

Image 4992:
  True class: jinrikisha
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a horse drawn carriage
    describe specifically: describe specifically, the bug is a bug that is not a bug
    be concise: be concise buggy
  CLIP confidence: 16.49%
  ICE confidence: 16.49%

Image 4993:
  True class: black stork
  CLIP prediction: black stork
  ICE prediction: black stork
  Captions:
    a photo of: a photo of a man in a boat
    describe specifically: describe specifically, the water is a little green
    be concise: be concisely, the water is green
  CLIP confidence: 78.32%
  ICE confidence: 78.32%

Image 4994:
  True class: bathtub
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a man on a ladder
    describe specifically: describe specifically, the ladder is a ladder
    be concise: be concisel ladder
  CLIP confidence: 8.61%
  ICE confidence: 8.61%

Image 4

 51%|█████     | 80/157 [49:05<47:48, 37.25s/it]


--- Batch 79 examples ---

Image 5056:
  True class: king penguin
  CLIP prediction: king penguin
  ICE prediction: king penguin
  Captions:
    a photo of: a photo of a penguin walking on the beach
    describe specifically: describe specifically, the penguin is a penguin
    be concise: be concisely, be consice, beje, beje, beje, beje, beje, beje, beje, beje, beje, beje, beje, beje, beje,
  CLIP confidence: 97.07%
  ICE confidence: 97.07%

Image 5057:
  True class: dining table
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a living room with a couch and a television
    describe specifically: describe specifically, the table is a table, the table is a table
    be concise: be concisely, the dining area is a great place to sit and enjoy
  CLIP confidence: 84.52%
  ICE confidence: 90.38%

Image 5058:
  True class: scorpion
  CLIP prediction: scorpion
  ICE prediction: scorpion
  Captions:
    a photo of: a photo of a small dog on

 52%|█████▏    | 81/157 [49:42<46:59, 37.10s/it]


--- Batch 80 examples ---

Image 5120:
  True class: triumphal arch
  CLIP prediction: triumphal arch
  ICE prediction: triumphal arch
  Captions:
    a photo of: a photo of a group of people walking in front of a building
    describe specifically: describe specifically, the ancient monuments of india
    be concise: be concise - the great wall of china
  CLIP confidence: 95.41%
  ICE confidence: 95.41%

Image 5121:
  True class: pretzel
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a man sitting at a table
    describe specifically: describe specifically, the person is a person
    be concise: be concisely, be concisely
  CLIP confidence: 7.75%
  ICE confidence: 7.75%

Image 5122:
  True class: thatch
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of a house with a thatched roof
    describe specifically: describe specifically, the house is a typical example of the traditional archite

 52%|█████▏    | 82/157 [50:18<46:11, 36.95s/it]


--- Batch 81 examples ---

Image 5184:
  True class: sea cucumber
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a small animal in the grass
    describe specifically: describe specifically, the soil is a mixture of soil and soil is a mixture of soil and soil
    be concise: be concise - the best of the best
  CLIP confidence: 79.79%
  ICE confidence: 79.79%

Image 5185:
  True class: guacamole
  CLIP prediction: pizza
  ICE prediction: pizza
  Captions:
    a photo of: a photo of a plate of food with a green sauce
    describe specifically: describe specifically, the food is not the same
    be concise: be concisely, the best mexican food in the world
  CLIP confidence: 29.98%
  ICE confidence: 30.40%

Image 5186:
  True class: Persian cat
  CLIP prediction: Persian cat
  ICE prediction: Persian cat
  Captions:
    a photo of: a photo of a cat in a blue box
    describe specifically: describe specifically, this is a kitten
    be

 53%|█████▎    | 83/157 [50:55<45:31, 36.92s/it]


--- Batch 82 examples ---

Image 5248:
  True class: apron
  CLIP prediction: jinrikisha
  ICE prediction: jinrikisha
  Captions:
    a photo of: a photo of a woman in a white dress
    describe specifically: describe specifically, the person is a person
    be concise: be concisement of the greek dance
  CLIP confidence: 36.08%
  ICE confidence: 36.08%

Image 5249:
  True class: suspension bridge
  CLIP prediction: triumphal arch
  ICE prediction: triumphal arch
  Captions:
    a photo of: a photo of a castle in the distance
    describe specifically: describe specifically, the castle is a great place to visit
    be concise: be concise - a great place to enjoy
  CLIP confidence: 18.93%
  ICE confidence: 18.93%

Image 5250:
  True class: jellyfish
  CLIP prediction: coral reef
  ICE prediction: coral reef
  Captions:
    a photo of: a photo of a woman on a television screen
    describe specifically: describe specifically, the tv is a very important part of the show
    be concise: b

 54%|█████▎    | 84/157 [51:31<44:39, 36.71s/it]


--- Batch 83 examples ---

Image 5312:
  True class: limousine
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a car with a blue background
    describe specifically: describe specifically for the vehicle
    be concise: be concisely, the new generation of the ford
  CLIP confidence: 97.90%
  ICE confidence: 97.90%

Image 5313:
  True class: frying pan
  CLIP prediction: wok
  ICE prediction: wok
  Captions:
    a photo of: a photo of a pan of food with vegetables
    describe specifically: describe specifically, the food is not the same
    be concise: be concise - italian sauce
  CLIP confidence: 39.97%
  ICE confidence: 39.97%

Image 5314:
  True class: obelisk
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a tower with a clock on top
    describe specifically: describe specifically, the pyramids are not the same
    be concise: be concise - the tower of the cathedral of the holy trinity
  

 54%|█████▍    | 85/157 [52:06<43:16, 36.07s/it]


--- Batch 84 examples ---

Image 5376:
  True class: CD player
  CLIP prediction: CD player
  ICE prediction: CD player
  Captions:
    a photo of: a photo of a black and red oven
    describe specifically: describe specifically in the following the following the following
    be concise: be concisement oven
  CLIP confidence: 28.52%
  ICE confidence: 28.52%

Image 5377:
  True class: Chihuahua
  CLIP prediction: Chihuahua
  ICE prediction: Chihuahua
  Captions:
    a photo of: a photo of a dog laying on a blanket
    describe specifically: describe specifically, the dog is a dog breed
    be concise: be concisely with these dog
  CLIP confidence: 23.50%
  ICE confidence: 23.55%

Image 5378:
  True class: torch
  CLIP prediction: torch
  ICE prediction: torch
  Captions:
    a photo of: a photo of a fire hydra in the night
    describe specifically: describe specifically, the fire is not burning
    be concise: be concise fire hydra
  CLIP confidence: 75.20%
  ICE confidence: 75.29%



 55%|█████▍    | 86/157 [52:43<43:01, 36.35s/it]


--- Batch 85 examples ---

Image 5440:
  True class: Yorkshire terrier
  CLIP prediction: Yorkshire terrier
  ICE prediction: Yorkshire terrier
  Captions:
    a photo of: a photo of a small dog in the grass
    describe specifically: describe specifically miniature scr scr scr scr scr scr scr scr scr scr scr scr sc
    be concise: be concise miniature scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr scr sc
  CLIP confidence: 83.74%
  ICE confidence: 83.74%

Image 5441:
  True class: pill bottle
  CLIP prediction: iPod
  ICE prediction: iPod
  Captions:
    a photo of: a photo of a bunch of books on a table
    describe specifically: describe specifically, the number of the items is the number of the items
    be concise: be concisely, be concisely, be concisely be concisely be concisely be concisely be con
  CLIP confidence: 20.40%
  ICE confidence: 20.40%

Image 5442:
  True class: dumbbell
  CLIP prediction: dumbbell
  ICE prediction: dumbbell
  C

 55%|█████▌    | 87/157 [53:20<42:50, 36.72s/it]


--- Batch 86 examples ---

Image 5504:
  True class: Egyptian cat
  CLIP prediction: Persian cat
  ICE prediction: Persian cat
  Captions:
    a photo of: a photo of a group of sheeps in a pen
    describe specifically: describe specifically, the dog is a domestic breed
    be concise: be concisely, the best dog for you
  CLIP confidence: 41.06%
  ICE confidence: 41.06%

Image 5505:
  True class: neck brace
  CLIP prediction: miniskirt
  ICE prediction: miniskirt
  Captions:
    a photo of: a photo of a man on a stage
    describe specifically: describe specifically, the audience is a little bit of the audience
    be concise: be concise live at the royal albert theatre
  CLIP confidence: 16.59%
  ICE confidence: 16.59%

Image 5506:
  True class: orangutan
  CLIP prediction: orangutan
  ICE prediction: orangutan
  Captions:
    a photo of: a photo of a forest with a bird in the middle
    describe specifically: describe specifically, the tree is a tree that is a tree that is a tree th

 56%|█████▌    | 88/157 [53:58<42:29, 36.95s/it]


--- Batch 87 examples ---

Image 5568:
  True class: organ
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a large building with a tower
    describe specifically: describe specifically, the building is a very complex
    be concise: be concise - the best of the best
  CLIP confidence: 7.10%
  ICE confidence: 7.10%

Image 5569:
  True class: punching bag
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching punching
    describe specifically: describe specifically, the punching punching punching is a key component of the fight
    be concise: be concise boxing punching punching punching punching punching punching punching punching punching punching punching punching punching punching
  CLIP confidence: 98.88%
  ICE confidence: 99.32%

Image 5570:
  Tr

 57%|█████▋    | 89/157 [54:33<41:12, 36.36s/it]


--- Batch 88 examples ---

Image 5632:
  True class: vestment
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a white shirt
    describe specifically: describe specifically, the person is a person
    be concise: be concisely, be concisely, be concisely, be concisely be con
  CLIP confidence: 26.71%
  ICE confidence: 26.71%

Image 5633:
  True class: sulphur butterfly
  CLIP prediction: sulphur butterfly
  ICE prediction: sulphur butterfly
  Captions:
    a photo of: a photo of a yellow leaf
    describe specifically: describe specifically, the yellow leaf is a common plant in the forest
    be concise: be concise - yellow leaf
  CLIP confidence: 63.62%
  ICE confidence: 63.62%

Image 5634:
  True class: Yorkshire terrier
  CLIP prediction: guinea pig
  ICE prediction: guinea pig
  Captions:
    a photo of: a photo of a bird in the grass
    describe specifically: describe specifically, the squirrel is a little bit of a squirrel
 

 57%|█████▋    | 90/157 [55:12<41:35, 37.24s/it]


--- Batch 89 examples ---

Image 5696:
  True class: dumbbell
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a man sitting in a chair
    describe specifically: describe specifically, the chair is a chair that is a chair that is a chair that is a chair that is a chair that is a chair
    be concise: be concise chair, c c
  CLIP confidence: 17.79%
  ICE confidence: 17.79%

Image 5697:
  True class: turnstile
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a lobby with a large screen
    describe specifically: describe specifically, the floor is a very clean
    be concise: be concisely, the lobby is a great place to work
  CLIP confidence: 58.50%
  ICE confidence: 58.50%

Image 5698:
  True class: desk
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a room with a desk and a computer
    describe specifically: describe specifically, t

 58%|█████▊    | 91/157 [55:47<40:10, 36.53s/it]


--- Batch 90 examples ---

Image 5760:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of three peppers on a table
    describe specifically: describe specifically, the pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper
    be concise: be concisely, the pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper pepper
  CLIP confidence: 92.29%
  ICE confidence: 98.14%

Image 5761:
  True class: monarch
  CLIP prediction: monarch
  ICE prediction: monarch
  Captions:
    a photo of: a photo of a butterfly in a field
    describe specifically: describe specifically, the butterfly is a member of the family
    be concise: be concisely, be concisely
  CLIP confidence: 68.75%
  ICE confidence: 68.95%

Image 5762:
 

 59%|█████▊    | 92/157 [56:22<38:57, 35.95s/it]


--- Batch 91 examples ---

Image 5824:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a herd of buffalo grazing in a field
    describe specifically: describe specifically, the bison is a large, black, mammal
    be concise: be concisely, the bison is a very large, black, male bison
  CLIP confidence: 92.63%
  ICE confidence: 98.54%

Image 5825:
  True class: meat loaf
  CLIP prediction: centipede
  ICE prediction: centipede
  Captions:
    a photo of: a photo of a plate of food on a table
    describe specifically: describe specifically, the food is not the same
    be concise: be concisely delicious food
  CLIP confidence: 17.19%
  ICE confidence: 17.19%

Image 5826:
  True class: space heater
  CLIP prediction: abacus
  ICE prediction: abacus
  Captions:
    a photo of: a photo of a computer with a keyboard
    describe specifically: describe specifically in the following the following the following
    be concise: be con

 59%|█████▉    | 93/157 [56:58<38:28, 36.07s/it]


--- Batch 92 examples ---

Image 5888:
  True class: desk
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a table with a bunch of flowers
    describe specifically: describe specifically, the table is a table
    be concise: be concisely, the best way to get a job
  CLIP confidence: 18.75%
  ICE confidence: 26.05%

Image 5889:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a plate of food with chicken
    describe specifically: describe specifically, the chicken is a very important ingredient
    be concise: be concisely with this delicious chicken pot pie
  CLIP confidence: 99.71%
  ICE confidence: 100.20%

Image 5890:
  True class: guinea pig
  CLIP prediction: guinea pig
  ICE prediction: guinea pig
  Captions:
    a photo of: a photo of a white rat
    describe specifically: describe specifically, the rat is a rat
    be concise: be concisely, the rat is a very int

 60%|█████▉    | 94/157 [57:34<37:44, 35.95s/it]


--- Batch 93 examples ---

Image 5952:
  True class: walking stick
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a person playing a piano
    describe specifically: describe specifically, the hands are the same
    be concise: be concisely, the hands of a woman
  CLIP confidence: 19.37%
  ICE confidence: 19.37%

Image 5953:
  True class: sock
  CLIP prediction: sock
  ICE prediction: sock
  Captions:
    a photo of: a photo of a red star on a concrete surface
    describe specifically: describe specifically, the star is a symbol of the universe
    be concise: be concise - the best way to be a star
  CLIP confidence: 48.90%
  ICE confidence: 48.90%

Image 5954:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a large ball of sand
    describe specifically: describe specifically, the water is the same as the surface
    be concise: be concisement - the great white sh

 61%|██████    | 95/157 [58:11<37:37, 36.41s/it]


--- Batch 94 examples ---

Image 6016:
  True class: military uniform
  CLIP prediction: military uniform
  ICE prediction: military uniform
  Captions:
    a photo of: a photo of a group of men in uniform
    describe specifically: describe specifically, the royal family is a family of four
    be concise: be concise - irish guards
  CLIP confidence: 68.95%
  ICE confidence: 73.58%

Image 6017:
  True class: dragonfly
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a bird sitting on a plant
    describe specifically: describe specifically, the insects are not related
    be concise: be concisely, be concisely, be concisely, be concisely, be concisely be concisely be concisely, be concisely be be be be be be be be be be be be
  CLIP confidence: 65.23%
  ICE confidence: 65.23%

Image 6018:
  True class: reel
  CLIP prediction: reel
  ICE prediction: reel
  Captions:
    a photo of: a photo of a spinning spinning machine
    describe speci

 61%|██████    | 96/157 [58:48<37:12, 36.60s/it]


--- Batch 95 examples ---

Image 6080:
  True class: triumphal arch
  CLIP prediction: triumphal arch
  ICE prediction: triumphal arch
  Captions:
    a photo of: a photo of a large ship in the water
    describe specifically: describe specifically, the us navy is a great place to work on the uss enterprise
    be concise: be concise - the battle of the nations
  CLIP confidence: 33.84%
  ICE confidence: 33.84%

Image 6081:
  True class: bell pepper
  CLIP prediction: bell pepper
  ICE prediction: bell pepper
  Captions:
    a photo of: a photo of a plant in a pot
    describe specifically: describe specifically, the plant is a plant
    be concise: be concise planter
  CLIP confidence: 10.13%
  ICE confidence: 10.13%

Image 6082:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a lizard on a leaf
    describe specifically: describe specifically, the lizard is a reptacy
    be concise: be concisely, the most 

 62%|██████▏   | 97/157 [59:25<36:36, 36.61s/it]


--- Batch 96 examples ---

Image 6144:
  True class: teddy
  CLIP prediction: teddy
  ICE prediction: teddy
  Captions:
    a photo of: a photo of a teddy bear sitting on a table
    describe specifically: describe specifically, this is a teddy bear
    be concise: be concise teddy bear
  CLIP confidence: 93.07%
  ICE confidence: 98.93%

Image 6145:
  True class: backpack
  CLIP prediction: poncho
  ICE prediction: poncho
  Captions:
    a photo of: a photo of a black and white dog
    describe specifically: describe specifically, the dog is a black and white color
    be concise: be concise black and white dog coat for sale
  CLIP confidence: 23.21%
  ICE confidence: 23.21%

Image 6146:
  True class: ice lolly
  CLIP prediction: standard poodle
  ICE prediction: standard poodle
  Captions:
    a photo of: a photo of a dog with a tennis ball
    describe specifically: describe specifically, the dog is a toy
    be concise: be concisely, the dog is a little girl
  CLIP confidence: 13.6

 62%|██████▏   | 98/157 [59:59<35:10, 35.77s/it]


--- Batch 97 examples ---

Image 6208:
  True class: gazelle
  CLIP prediction: gazelle
  ICE prediction: gazelle
  Captions:
    a photo of: a photo of a deer in the woods
    describe specifically: describe specifically, the deer is a very large animal
    be concise: be concise - deer hunting
  CLIP confidence: 33.25%
  ICE confidence: 33.25%

Image 6209:
  True class: rocking chair
  CLIP prediction: rocking chair
  ICE prediction: rocking chair
  Captions:
    a photo of: a photo of a living room with a couch and a chair
    describe specifically: describe specifically, the chair is a chair
    be concise: be concisely, the chair is a chair
  CLIP confidence: 97.80%
  ICE confidence: 103.32%

Image 6210:
  True class: CD player
  CLIP prediction: CD player
  ICE prediction: CD player
  Captions:
    a photo of: a photo of a radio in a car
    describe specifically: describe specifically in the car radio
    be concise: be conciser radio
  CLIP confidence: 64.99%
  ICE confidence:

 63%|██████▎   | 99/157 [1:00:34<34:32, 35.73s/it]


--- Batch 98 examples ---

Image 6272:
  True class: umbrella
  CLIP prediction: umbrella
  ICE prediction: umbrella
  Captions:
    a photo of: a photo of a flooded street with a blue umbrella
    describe specifically: describe specifically, the water is not as high as the land
    be concise: be concisement of the water
  CLIP confidence: 92.19%
  ICE confidence: 92.19%

Image 6273:
  True class: brass
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of a building with a large tower
    describe specifically: describe specifically, the number of the building is the number of the building
    be concise: be concise - the best of the best
  CLIP confidence: 11.03%
  ICE confidence: 11.03%

Image 6274:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bison in a field
    describe specifically: describe specifically, the bison is a large, black, male
    be concise: be concisely, a biso

 64%|██████▎   | 100/157 [1:01:12<34:24, 36.22s/it]


--- Batch 99 examples ---

Image 6336:
  True class: rugby ball
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a person wadi in a lake
    describe specifically: describe specifically, the water is so high
    be concise: be concise - fly fishing
  CLIP confidence: 22.40%
  ICE confidence: 29.49%

Image 6337:
  True class: cash machine
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a store with a sign on the front
    describe specifically: describe specifically, the signs are not in english
    be concise: be concise - the best of the best
  CLIP confidence: 21.96%
  ICE confidence: 21.96%

Image 6338:
  True class: dam
  CLIP prediction: suspension bridge
  ICE prediction: suspension bridge
  Captions:
    a photo of: a photo of a road with a car driving down it
    describe specifically: describe specifically, the road is paved
    be concise: be concisely, the road is empty
  CLIP confi

 64%|██████▍   | 101/157 [1:01:51<34:36, 37.09s/it]


--- Batch 100 examples ---

Image 6400:
  True class: ox
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a crowd of people walking down a street
    describe specifically: describe specifically, the traffic is not always
    be concise: be concisely, the most of the many people in the city
  CLIP confidence: 11.74%
  ICE confidence: 11.74%

Image 6401:
  True class: snorkel
  CLIP prediction: swimming trunks
  ICE prediction: swimming trunks
  Captions:
    a photo of: a photo of a green nebula
    describe specifically: describe specifically, the stars are not visible
    be concise: be concisely, be concisely, be concisely, be concisely, be concisely be concisely be concisely be con
  CLIP confidence: 42.41%
  ICE confidence: 42.41%

Image 6402:
  True class: spider web
  CLIP prediction: spider web
  ICE prediction: spider web
  Captions:
    a photo of: a photo of a large black hole in the sky
    describe specifically: describe specific

 65%|██████▍   | 102/157 [1:02:27<33:54, 36.99s/it]


--- Batch 101 examples ---

Image 6464:
  True class: ox
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a brown dog running through the grass
    describe specifically: describe specifically, the bull is a very large, muscular, and very large, bull
    be concise: be concisely, a beautiful red highland highland
  CLIP confidence: 69.38%
  ICE confidence: 70.41%

Image 6465:
  True class: orangutan
  CLIP prediction: orangutan
  ICE prediction: orangutan
  Captions:
    a photo of: a photo of a man with a monkey on his shoulder
    describe specifically: describe specifically, the monkey is a monkey
    be concise: be concisely, be concisely
  CLIP confidence: 92.43%
  ICE confidence: 92.43%

Image 6466:
  True class: bucket
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a white plastic trash can
    describe specifically: describe specifically for the use of the plastic cup
    be concise: be concisel 1 5 li

 66%|██████▌   | 103/157 [1:03:02<32:43, 36.37s/it]


--- Batch 102 examples ---

Image 6528:
  True class: slug
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the fire is a fire that is not burning
    be concise: be concise - the best of the best
  CLIP confidence: 31.47%
  ICE confidence: 31.47%

Image 6529:
  True class: lawn mower
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a man riding a horse
    describe specifically: describe specifically, the person is a person
    be concise: be concisely, the man is a very good person
  CLIP confidence: 45.70%
  ICE confidence: 45.70%

Image 6530:
  True class: thatch
  CLIP prediction: thatch
  ICE prediction: thatch
  Captions:
    a photo of: a photo of the ruins of the castle
    describe specifically: describe specifically, the castle is a very complex
    be concise: be concise, the castle
  CLIP confidence: 46.09%
  ICE con

 66%|██████▌   | 104/157 [1:03:39<32:11, 36.45s/it]


--- Batch 103 examples ---

Image 6592:
  True class: koala
  CLIP prediction: koala
  ICE prediction: koala
  Captions:
    a photo of: a photo of a koloa in a tree
    describe specifically: describe specifically, the kolos is a species of the kolos family
    be concise: be concisely, be concisely, be concisely, be concisely be concisely be concisely be
  CLIP confidence: 32.08%
  ICE confidence: 32.08%

Image 6593:
  True class: vestment
  CLIP prediction: vestment
  ICE prediction: vestment
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the flag of the chinese people is the chinese flag
    be concise: be concisement of the chinese flag
  CLIP confidence: 77.83%
  ICE confidence: 77.83%

Image 6594:
  True class: snorkel
  CLIP prediction: snorkel
  ICE prediction: snorkel
  Captions:
    a photo of: a photo of a blue sky with stars
    describe specifically: describe specifically, the stars are not visible
    be 

 67%|██████▋   | 105/157 [1:04:16<31:42, 36.59s/it]


--- Batch 104 examples ---

Image 6656:
  True class: wooden spoon
  CLIP prediction: sombrero
  ICE prediction: sombrero
  Captions:
    a photo of: a photo of a man playing drums
    describe specifically: describe specifically, the drummers are not the same
    be concise: be concise - drum player / percussion player in san, california
  CLIP confidence: 21.24%
  ICE confidence: 21.24%

Image 6657:
  True class: Egyptian cat
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a woman sitting on a bed
    describe specifically: describe specifically, the person is a person
    be concise: be concisely dressed in black
  CLIP confidence: 18.79%
  ICE confidence: 18.79%

Image 6658:
  True class: banana
  CLIP prediction: orange
  ICE prediction: orange
  Captions:
    a photo of: a photo of a bunch of oranges and bananas
    describe specifically: describe specifically, the fruit is ripe
    be concise: be concisely, the fruit is ri

 68%|██████▊   | 106/157 [1:04:55<31:36, 37.19s/it]


--- Batch 105 examples ---

Image 6720:
  True class: birdhouse
  CLIP prediction: birdhouse
  ICE prediction: birdhouse
  Captions:
    a photo of: a photo of a bird house in the snow
    describe specifically: describe specifically, the bird house is a birdhouse
    be concise: be concise - a collection of poems
  CLIP confidence: 99.76%
  ICE confidence: 105.37%

Image 6721:
  True class: plunger
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a man in a baseball uniform
    describe specifically: describe specifically, the following is a very important part of the story
    be concise: be concisely, be concisely
  CLIP confidence: 24.56%
  ICE confidence: 24.56%

Image 6722:
  True class: bee
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of yellow flowers
    describe specifically: describe specifically, the chrys is a very, yellow - colored flower
    be concise: be concisely with these yellow chrys
  

 68%|██████▊   | 107/157 [1:05:31<30:54, 37.09s/it]


--- Batch 106 examples ---

Image 6784:
  True class: scorpion
  CLIP prediction: scorpion
  ICE prediction: scorpion
  Captions:
    a photo of: a photo of a small lizard on the sidewalk
    describe specifically: describe specifically, the term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term
    be concise: be concisement of the ground
  CLIP confidence: 33.01%
  ICE confidence: 33.01%

Image 6785:
  True class: picket fence
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a lighthouse on the beach
    describe specifically: describe specifically, the lighthouse is a symbol of the city
    be concise: be concise lighthouse
  CLIP confidence: 40.04%
  ICE confidence: 45.00%

Image 6786:
  True class: cash machine
  CLIP prediction: refrigerator
  ICE prediction: refrigerator

 69%|██████▉   | 108/157 [1:06:11<30:55, 37.86s/it]


--- Batch 107 examples ---

Image 6848:
  True class: obelisk
  CLIP prediction: obelisk
  ICE prediction: obelisk
  Captions:
    a photo of: a photo of the washington monument
    describe specifically: describe specifically, the monument is a monument
    be concise: be concise monument in madrid
  CLIP confidence: 75.78%
  ICE confidence: 76.37%

Image 6849:
  True class: lemon
  CLIP prediction: lemon
  ICE prediction: lemon
  Captions:
    a photo of: a photo of a tree with green leaves
    describe specifically: describe specifically, the tree is a tree that is a tree that is a tree that is a tree that is a tree that is a tree
    be concise: be concise - yellow - 10
  CLIP confidence: 45.31%
  ICE confidence: 45.36%

Image 6850:
  True class: sulphur butterfly
  CLIP prediction: sulphur butterfly
  ICE prediction: sulphur butterfly
  Captions:
    a photo of: a photo of a butterfly on a flower
    describe specifically: describe specifically, the butterfly is a yellow butterfl

 69%|██████▉   | 109/157 [1:06:49<30:12, 37.76s/it]


--- Batch 108 examples ---

Image 6912:
  True class: neck brace
  CLIP prediction: neck brace
  ICE prediction: neck brace
  Captions:
    a photo of: a photo of a woman doing yoga
    describe specifically: describe specifically, the body is a physical function of the body
    be concise: be concisely with these yoga poses
  CLIP confidence: 51.56%
  ICE confidence: 51.56%

Image 6913:
  True class: sunglasses
  CLIP prediction: sunglasses
  ICE prediction: sunglasses
  Captions:
    a photo of: a photo of a woman with sunglasses on
    describe specifically: describe specifically, the person in the picture is a person
    be concise: be concisely, i ' m ' m ' m ' m ' m ' m ' m ' m ' m ' m ' m
  CLIP confidence: 94.48%
  ICE confidence: 94.63%

Image 6914:
  True class: mantis
  CLIP prediction: mantis
  ICE prediction: mantis
  Captions:
    a photo of: a photo of a person on a skateboard
    describe specifically: describe specifically, the cat is a cat
    be concise: be concisel

 70%|███████   | 110/157 [1:07:22<28:38, 36.56s/it]


--- Batch 109 examples ---

Image 6976:
  True class: pizza
  CLIP prediction: pizza
  ICE prediction: pizza
  Captions:
    a photo of: a photo of a pizza with a pizza cutter
    describe specifically: describe specifically, the pizza is a pizza
    be concise: be concisely, this pizza is a great way to enjoy the season
  CLIP confidence: 93.02%
  ICE confidence: 98.93%

Image 6977:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a building with a sign on it
    describe specifically: describe specifically, the building is a work of art
    be concise: be concise - the best of the best
  CLIP confidence: 99.12%
  ICE confidence: 99.12%

Image 6978:
  True class: mushroom
  CLIP prediction: mushroom
  ICE prediction: mushroom
  Captions:
    a photo of: a photo of a mushroom with a mushroom in the background
    describe specifically: describe specifically, the person who is the person who is the person who is 

 71%|███████   | 111/157 [1:07:59<28:09, 36.73s/it]


--- Batch 110 examples ---

Image 7040:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of a man in a graduation gown
    describe specifically: describe specifically, the man is a judge
    be concise: be concise man
  CLIP confidence: 99.80%
  ICE confidence: 100.00%

Image 7041:
  True class: volleyball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a group of people jumping in the air
    describe specifically: describe specifically, the beach is a place where the people are playing volleyball
    be concise: be concisely, the best beach volleyball game ever
  CLIP confidence: 80.22%
  ICE confidence: 86.52%

Image 7042:
  True class: birdhouse
  CLIP prediction: crane
  ICE prediction: crane
  Captions:
    a photo of: a photo of a street light with flowers in the background
    describe specifically: describe specifically, the bird is a bird
    be 

 71%|███████▏  | 112/157 [1:08:35<27:20, 36.45s/it]


--- Batch 111 examples ---

Image 7104:
  True class: dining table
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a living room with a couch and a table
    describe specifically: describe specifically, the table is a table
    be concise: be concisely, the best way to get a job
  CLIP confidence: 41.28%
  ICE confidence: 47.56%

Image 7105:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a person walking down a street
    describe specifically: describe specifically, the car is a car
    be concise: be concisely, the car is parked on the street
  CLIP confidence: 38.13%
  ICE confidence: 38.13%

Image 7106:
  True class: ice lolly
  CLIP prediction: ice lolly
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a little boy eating a strawberry
    describe specifically: describe specifically, the child is a child
    be concise: be con

 72%|███████▏  | 113/157 [1:09:13<26:55, 36.71s/it]


--- Batch 112 examples ---

Image 7168:
  True class: stopwatch
  CLIP prediction: stopwatch
  ICE prediction: stopwatch
  Captions:
    a photo of: a photo of a bowl on a stove
    describe specifically: describe specifically, the water is the same as the water vapor
    be concise: be concisement in the kitchen
  CLIP confidence: 86.18%
  ICE confidence: 86.18%

Image 7169:
  True class: orange
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a blood orange on a black background
    describe specifically: describe specifically by the orange
    be concise: be concise - blood orange
  CLIP confidence: 51.51%
  ICE confidence: 51.51%

Image 7170:
  True class: school bus
  CLIP prediction: school bus
  ICE prediction: school bus
  Captions:
    a photo of: a photo of a school bus on a highway
    describe specifically: describe specifically, the bus is a school bus
    be concise: be concisely, the bus is driving on the road
  CLIP co

 73%|███████▎  | 114/157 [1:09:48<26:07, 36.45s/it]


--- Batch 113 examples ---

Image 7232:
  True class: beer bottle
  CLIP prediction: beer bottle
  ICE prediction: beer bottle
  Captions:
    a photo of: a photo of a bear standing next to a bottle of beer
    describe specifically: describe specifically, the bear is a symbol of the bear family
    be concise: be concise - the best beer in the world
  CLIP confidence: 36.21%
  ICE confidence: 38.01%

Image 7233:
  True class: pole
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a cat on the sidewalk
    describe specifically: describe specifically, the black cat is a black cat
    be concise: be concisely, the first of the three - legged, two - legged, three - legged, three - legged, three legged, three legged, three legged, three legged, three legged, three legged, three legged, three legged,
  CLIP confidence: 32.47%
  ICE confidence: 32.47%

Image 7234:
  True class: standard poodle
  CLIP prediction: standard poodle
  ICE prediction: stan

 73%|███████▎  | 115/157 [1:10:25<25:28, 36.39s/it]


--- Batch 114 examples ---

Image 7296:
  True class: banana
  CLIP prediction: banana
  ICE prediction: banana
  Captions:
    a photo of: a photo of a banana
    describe specifically: describe specifically, the banana is a banana
    be concise: be concisely, the banana is a fruit
  CLIP confidence: 94.63%
  ICE confidence: 100.49%

Image 7297:
  True class: European fire salamander
  CLIP prediction: European fire salamander
  ICE prediction: European fire salamander
  Captions:
    a photo of: a photo of a large, dark, brown area with a few green spots
    describe specifically: describe specifically, the earth is a planet
    be concise: be concise - the universe
  CLIP confidence: 95.26%
  ICE confidence: 95.26%

Image 7298:
  True class: maypole
  CLIP prediction: maypole
  ICE prediction: maypole
  Captions:
    a photo of: a photo of a cemetery with a cross in the middle
    describe specifically: describe specifically, the name of the monument is the name of the monument
  

 74%|███████▍  | 116/157 [1:11:01<24:54, 36.44s/it]


--- Batch 115 examples ---

Image 7360:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a candle with a christmas tree in the background
    describe specifically: describe specifically with the candle
    be concise: be concisely with this candle
  CLIP confidence: 94.29%
  ICE confidence: 100.10%

Image 7361:
  True class: potpie
  CLIP prediction: potpie
  ICE prediction: potpie
  Captions:
    a photo of: a photo of a pizza on a table
    describe specifically: describe specifically, the pizza is a pizza
    be concise: be concisely, the pizza is a bit of the pizza
  CLIP confidence: 29.98%
  ICE confidence: 29.98%

Image 7362:
  True class: cougar
  CLIP prediction: fur coat
  ICE prediction: fur coat
  Captions:
    a photo of: a photo of a small mouse sitting on a table
    describe specifically: describe specifically, the mouse is a mouse
    be concise: be concisely, a very, and very - loved, and very - loved, and

 75%|███████▍  | 117/157 [1:11:38<24:19, 36.49s/it]


--- Batch 116 examples ---

Image 7424:
  True class: jellyfish
  CLIP prediction: spiny lobster
  ICE prediction: spiny lobster
  Captions:
    a photo of: a photo of a light in the water
    describe specifically: describe specifically, the two most important features of the deep sea are the deep water, the deep sea, and the deep water
    be concise: be concisement - the dark side of the moon
  CLIP confidence: 13.51%
  ICE confidence: 13.51%

Image 7425:
  True class: volleyball
  CLIP prediction: volleyball
  ICE prediction: volleyball
  Captions:
    a photo of: a photo of a group of people playing volleyball
    describe specifically: describe specifically, the volleyball game is a game that is played by a group of young men
    be concise: be concisely, the beach volleyball game
  CLIP confidence: 99.32%
  ICE confidence: 104.98%

Image 7426:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a black an

 75%|███████▌  | 118/157 [1:12:14<23:41, 36.44s/it]


--- Batch 117 examples ---

Image 7488:
  True class: pizza
  CLIP prediction: pizza
  ICE prediction: pizza
  Captions:
    a photo of: a photo of a pizza with a pizza in it
    describe specifically: describe specifically, the pizza is a pizza that is a pizza that is a pizza that is a pizza that is a pizza that is a pizza
    be concise: be concise pizza
  CLIP confidence: 82.23%
  ICE confidence: 88.53%

Image 7489:
  True class: Labrador retriever
  CLIP prediction: Labrador retriever
  ICE prediction: Labrador retriever
  Captions:
    a photo of: a photo of a white dog in the grass
    describe specifically: describe specifically, a white labie
    be concise: be concisely, a white labie puppy
  CLIP confidence: 76.71%
  ICE confidence: 76.71%

Image 7490:
  True class: black stork
  CLIP prediction: black stork
  ICE prediction: black stork
  Captions:
    a photo of: a photo of a bird in a field
    describe specifically: describe specifically, the bird is a bird
    be concis

 76%|███████▌  | 119/157 [1:12:51<23:08, 36.53s/it]


--- Batch 118 examples ---

Image 7552:
  True class: dugong
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a jellyfish
    describe specifically: describe specifically, the sea is a place where the ocean is not
    be concise: be concisely, the sea lion
  CLIP confidence: 37.18%
  ICE confidence: 37.18%

Image 7553:
  True class: binoculars
  CLIP prediction: cannon
  ICE prediction: cannon
  Captions:
    a photo of: a photo of a bunch of green and white beads
    describe specifically: describe specifically in the following the following the following
    be concise: be concisel gold plated brass plated brass plated brass plated brass plated brass plated brass plated brass plated
  CLIP confidence: 15.15%
  ICE confidence: 15.15%

Image 7554:
  True class: water jug
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a group of people riding motorcycles
    describe specifically: describe specifi

 76%|███████▋  | 120/157 [1:13:28<22:36, 36.67s/it]


--- Batch 119 examples ---

Image 7616:
  True class: wooden spoon
  CLIP prediction: wooden spoon
  ICE prediction: wooden spoon
  Captions:
    a photo of: a photo of a frying pan with a frying pan full of chicken
    describe specifically: describe specifically, the ingredients are not as good as the ingredients
    be concise: be concisely with this delicious and easy chicken and rice skill
  CLIP confidence: 28.15%
  ICE confidence: 28.17%

Image 7617:
  True class: hourglass
  CLIP prediction: hourglass
  ICE prediction: hourglass
  Captions:
    a photo of: a photo of a clock with a clock face
    describe specifically: describe specifically, the time is right for the time
    be concise: be concise - the best of the best
  CLIP confidence: 83.79%
  ICE confidence: 83.79%

Image 7618:
  True class: dam
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a waterfall in the jungle
    describe specifically: describe specifically, the water is the m

 77%|███████▋  | 121/157 [1:14:04<21:57, 36.60s/it]


--- Batch 120 examples ---

Image 7680:
  True class: nail
  CLIP prediction: nail
  ICE prediction: nail
  Captions:
    a photo of: a photo of a cross on a stone wall
    describe specifically: describe specifically, the cross is a symbol of the cross
    be concise: be concise - the cross
  CLIP confidence: 30.52%
  ICE confidence: 30.52%

Image 7681:
  True class: grasshopper
  CLIP prediction: grasshopper
  ICE prediction: grasshopper
  Captions:
    a photo of: a photo of a bug on a wall
    describe specifically: describe specifically, the scorpion is a scorpion
    be concise: be concisely, the scorpion is a very good looking insect
  CLIP confidence: 73.97%
  ICE confidence: 73.97%

Image 7682:
  True class: crane
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a building with a clock on it
    describe specifically: describe specifically, the building is a very large, rectangular structure
    be concise: be concisely, th

 78%|███████▊  | 122/157 [1:14:41<21:24, 36.70s/it]


--- Batch 121 examples ---

Image 7744:
  True class: lakeside
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a lake with trees and water
    describe specifically: describe specifically, the water is a little bit of blue
    be concise: be concise - the lake
  CLIP confidence: 46.97%
  ICE confidence: 46.97%

Image 7745:
  True class: reel
  CLIP prediction: punching bag
  ICE prediction: punching bag
  Captions:
    a photo of: a photo of a man walking down a street
    describe specifically: describe specifically, the person is a person
    be concise: be concisely, the man is walking in the park
  CLIP confidence: 4.73%
  ICE confidence: 4.73%

Image 7746:
  True class: parking meter
  CLIP prediction: stopwatch
  ICE prediction: stopwatch
  Captions:
    a photo of: a photo of a car parked in front of a house
    describe specifically: describe specifically on the rear of the car
    be concise: be concisely, the rear of the car is a bit of th

 78%|███████▊  | 123/157 [1:15:17<20:36, 36.38s/it]


--- Batch 122 examples ---

Image 7808:
  True class: pay-phone
  CLIP prediction: pay-phone
  ICE prediction: pay-phone
  Captions:
    a photo of: a photo of a yellow and blue building
    describe specifically: describe specifically, the image is a blur of the image
    be concise: be concise - the best of the best
  CLIP confidence: 32.37%
  ICE confidence: 32.37%

Image 7809:
  True class: sea slug
  CLIP prediction: sea slug
  ICE prediction: sea slug
  Captions:
    a photo of: a photo of a white dog running through the grass
    describe specifically: describe specifically, the white dog is a white dog
    be concise: be concisely, the white dog
  CLIP confidence: 62.50%
  ICE confidence: 62.50%

Image 7810:
  True class: rugby ball
  CLIP prediction: rugby ball
  ICE prediction: rugby ball
  Captions:
    a photo of: a photo of a group of people playing soccer
    describe specifically: describe specifically, the players are not the same
    be concise: be concisely, the best

 79%|███████▉  | 124/157 [1:15:55<20:15, 36.82s/it]


--- Batch 123 examples ---

Image 7872:
  True class: spider web
  CLIP prediction: tabby
  ICE prediction: tabby
  Captions:
    a photo of: a photo of a building with a tree in the fore
    describe specifically: describe specifically, the trees are not in bloom, but the trees are in bloom
    be concise: be concise - a great place to stay in the heart of the city
  CLIP confidence: 16.10%
  ICE confidence: 16.10%

Image 7873:
  True class: jinrikisha
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a group of people riding bikes
    describe specifically: describe specifically, the person in the picture is a person
    be concise: be concisely, the best way to get a ride
  CLIP confidence: 47.27%
  ICE confidence: 47.27%

Image 7874:
  True class: American alligator
  CLIP prediction: American alligator
  ICE prediction: American alligator
  Captions:
    a photo of: a photo of a bear in the woods
    describe specifically: describe specif

 80%|███████▉  | 125/157 [1:16:30<19:26, 36.45s/it]


--- Batch 124 examples ---

Image 7936:
  True class: centipede
  CLIP prediction: spiny lobster
  ICE prediction: spiny lobster
  Captions:
    a photo of: a photo of a small red flower on the ground
    describe specifically: describe specifically, the earth is a flat surface
    be concise: be concisement - the best of the best
  CLIP confidence: 27.42%
  ICE confidence: 27.42%

Image 7937:
  True class: school bus
  CLIP prediction: school bus
  ICE prediction: school bus
  Captions:
    a photo of: a photo of a yellow truck
    describe specifically: describe specifically, the vehicle is a vehicle
    be concise: be concise - the best of the best
  CLIP confidence: 85.64%
  ICE confidence: 89.89%

Image 7938:
  True class: slug
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a dog laying on the floor
    describe specifically: describe specifically, the food is not the same
    be concise: be concisely, the best way to cook fi

 80%|████████  | 126/157 [1:17:06<18:43, 36.23s/it]


--- Batch 125 examples ---

Image 8000:
  True class: albatross
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a bald eagle
    describe specifically: describe specifically, the bald eagle is a bald eagle
    be concise: be concise - the great white - tailed eagle
  CLIP confidence: 74.51%
  ICE confidence: 74.66%

Image 8001:
  True class: steel arch bridge
  CLIP prediction: steel arch bridge
  ICE prediction: steel arch bridge
  Captions:
    a photo of: a photo of a bridge
    describe specifically: describe specifically, the bridge is a bridge
    be concise: be concise - the bridge
  CLIP confidence: 92.58%
  ICE confidence: 93.65%

Image 8002:
  True class: tabby
  CLIP prediction: Egyptian cat
  ICE prediction: Egyptian cat
  Captions:
    a photo of: a photo of a cat sitting on a table
    describe specifically: describe specifically, the cat is a cat
    be concise: be concisely, the cat is a very good friend
  CLIP confidence

 81%|████████  | 127/157 [1:17:41<17:58, 35.94s/it]


--- Batch 126 examples ---

Image 8064:
  True class: pop bottle
  CLIP prediction: pop bottle
  ICE prediction: pop bottle
  Captions:
    a photo of: a photo of a small dog with a blue shirt
    describe specifically: describe specifically poo dog breed
    be concise: be concisely, a beautiful, and adorable dog
  CLIP confidence: 18.16%
  ICE confidence: 18.16%

Image 8065:
  True class: dragonfly
  CLIP prediction: dragonfly
  ICE prediction: dragonfly
  Captions:
    a photo of: a photo of a dragonfly
    describe specifically: describe specifically, the dragonfly is a common insect
    be concise: be concise dragonfly
  CLIP confidence: 97.95%
  ICE confidence: 103.71%

Image 8066:
  True class: black stork
  CLIP prediction: crane
  ICE prediction: crane
  Captions:
    a photo of: a photo of a bird standing on a rock
    describe specifically: describe specifically, the birds are not in the water
    be concise: be concise - coucan coucan
  CLIP confidence: 41.33%
  ICE confid

 82%|████████▏ | 128/157 [1:18:16<17:07, 35.44s/it]


--- Batch 127 examples ---

Image 8128:
  True class: bee
  CLIP prediction: bee
  ICE prediction: bee
  Captions:
    a photo of: a photo of a white flower with yellow center
    describe specifically: describe specifically, the flower is a white flower with a yellow center
    be concise: be concisely, be concisely, be concisely, be concisely, be concisely be concisely be concisely, beciely beciely becieciecieciecieciecieciecie
  CLIP confidence: 89.79%
  ICE confidence: 90.28%

Image 8129:
  True class: lemon
  CLIP prediction: lemon
  ICE prediction: lemon
  Captions:
    a photo of: a photo of three donuts
    describe specifically: describe specifically, the apple is a fruit
    be concise: be concisely, the best way to eat
  CLIP confidence: 37.70%
  ICE confidence: 37.70%

Image 8130:
  True class: cockroach
  CLIP prediction: cockroach
  ICE prediction: cockroach
  Captions:
    a photo of: a photo of a spider crawling on a concrete floor
    describe specifically: describe s

 82%|████████▏ | 129/157 [1:18:50<16:25, 35.18s/it]


--- Batch 128 examples ---

Image 8192:
  True class: cliff
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a field with a red soil
    describe specifically: describe specifically, the soil is the soil of the soil
    be concise: be concise - the best of the best
  CLIP confidence: 14.59%
  ICE confidence: 14.59%

Image 8193:
  True class: steel arch bridge
  CLIP prediction: steel arch bridge
  ICE prediction: steel arch bridge
  Captions:
    a photo of: a photo of the bridge that is being built
    describe specifically: describe specifically, the bridge is a symbol of the city
    be concise: be concise - the bridge
  CLIP confidence: 88.53%
  ICE confidence: 90.33%

Image 8194:
  True class: koala
  CLIP prediction: spiny lobster
  ICE prediction: spiny lobster
  Captions:
    a photo of: a photo of a man in a room
    describe specifically: describe specifically, the person is a person
    be concise: be concisely in a classroom
  C

 83%|████████▎ | 130/157 [1:19:28<16:08, 35.88s/it]


--- Batch 129 examples ---

Image 8256:
  True class: Labrador retriever
  CLIP prediction: ox
  ICE prediction: ox
  Captions:
    a photo of: a photo of a black bear in the woods
    describe specifically: describe specifically, the bear is a type of bear
    be concise: be concise - the best of the best
  CLIP confidence: 7.46%
  ICE confidence: 8.11%

Image 8257:
  True class: abacus
  CLIP prediction: abacus
  ICE prediction: abacus
  Captions:
    a photo of: a photo of a man in a blue shirt
    describe specifically: describe specifically in the following the following the following
    be concise: be concise - the best of the best
  CLIP confidence: 19.71%
  ICE confidence: 19.71%

Image 8258:
  True class: desk
  CLIP prediction: desk
  ICE prediction: desk
  Captions:
    a photo of: a photo of a computer
    describe specifically: describe specifically, the computer is a computer
    be concise: be concisely, the computer desk is a great place to work
  CLIP confidence: 92.

 83%|████████▎ | 131/157 [1:20:04<15:32, 35.88s/it]


--- Batch 130 examples ---

Image 8320:
  True class: pomegranate
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a heart made out of red roses
    describe specifically: describe specifically with the following the following the following the following the following the following the following the following the following the following
    be concise: be concisement - red rose petals
  CLIP confidence: 83.69%
  ICE confidence: 83.69%

Image 8321:
  True class: academic gown
  CLIP prediction: academic gown
  ICE prediction: academic gown
  Captions:
    a photo of: a photo of the team at the start of the race
    describe specifically: describe specifically, the team is a member of the club
    be concise: be concise - the best team in the world
  CLIP confidence: 89.16%
  ICE confidence: 89.16%

Image 8322:
  True class: plate
  CLIP prediction: wooden spoon
  ICE prediction: wooden spoon
  Captions:
    a photo of: a photo of a wat

 84%|████████▍ | 132/157 [1:20:41<15:08, 36.32s/it]


--- Batch 131 examples ---

Image 8384:
  True class: sea cucumber
  CLIP prediction: slug
  ICE prediction: slug
  Captions:
    a photo of: a photo of a worm on the ground
    describe specifically: describe specifically, the worm is a very large, black worm
    be concise: be concise - the cater
  CLIP confidence: 39.38%
  ICE confidence: 40.36%

Image 8385:
  True class: bucket
  CLIP prediction: bucket
  ICE prediction: bucket
  Captions:
    a photo of: a photo of a glass of water
    describe specifically: describe specifically in the glass
    be concise: be concisement cup
  CLIP confidence: 56.30%
  ICE confidence: 56.30%

Image 8386:
  True class: beaker
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a bottle of liquid
    describe specifically: describe specifically, the liquid is the same as the liquid
    be concise: be concisement test
  CLIP confidence: 40.58%
  ICE confidence: 40.58%

Image 8387:
  True class: computer keyboa

 85%|████████▍ | 133/157 [1:21:15<14:16, 35.67s/it]


--- Batch 132 examples ---

Image 8448:
  True class: dam
  CLIP prediction: dam
  ICE prediction: dam
  Captions:
    a photo of: a photo of a bridge over a river
    describe specifically: describe specifically, the water is the same
    be concise: be concise - the bridge
  CLIP confidence: 98.78%
  ICE confidence: 98.93%

Image 8449:
  True class: chain
  CLIP prediction: chain
  ICE prediction: chain
  Captions:
    a photo of: a photo of a man in a suit and tie
    describe specifically: describe specifically, the number of the word is the number of the word
    be concise: be concise - the best of the best
  CLIP confidence: 32.18%
  ICE confidence: 32.18%

Image 8450:
  True class: European fire salamander
  CLIP prediction: European fire salamander
  ICE prediction: European fire salamander
  Captions:
    a photo of: a photo of a bird in the woods
    describe specifically: describe specifically, the black bear is a common predator
    be concise: be concise - the best of th

 85%|████████▌ | 134/157 [1:21:51<13:45, 35.88s/it]


--- Batch 133 examples ---

Image 8512:
  True class: bighorn
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a mountain range
    describe specifically: describe specifically, the moose is a mammal
    be concise: be concisely, the mountain is in the background
  CLIP confidence: 28.49%
  ICE confidence: 29.83%

Image 8513:
  True class: fountain
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a city at night
    describe specifically: describe specifically, the water is very dark
    be concise: be concisely, the water is calm
  CLIP confidence: 96.78%
  ICE confidence: 96.78%

Image 8514:
  True class: turnstile
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a lobby with a large screen
    describe specifically: describe specifically, the lobby is a place where the lobby is located
    be concise: be concisely, the best way to get a job in the

 86%|████████▌ | 135/157 [1:22:28<13:12, 36.00s/it]


--- Batch 134 examples ---

Image 8576:
  True class: cauliflower
  CLIP prediction: cauliflower
  ICE prediction: cauliflower
  Captions:
    a photo of: a photo of a tennis ball in the dark
    describe specifically: describe specifically, the ball is a ball that is a ball that is a ball that is a ball that is a ball that is a ball
    be concise: be concisement - the green apple
  CLIP confidence: 41.11%
  ICE confidence: 41.11%

Image 8577:
  True class: goldfish
  CLIP prediction: goldfish
  ICE prediction: goldfish
  Captions:
    a photo of: a photo of a fish
    describe specifically: describe specifically, the oranges are oranges
    be concise: be concisely, the oranges are orange
  CLIP confidence: 89.06%
  ICE confidence: 89.06%

Image 8578:
  True class: alp
  CLIP prediction: beacon
  ICE prediction: beacon
  Captions:
    a photo of: a photo of a person walking on a snowy path
    describe specifically: describe specifically, the snow is not the same
    be concise: be 

 87%|████████▋ | 136/157 [1:23:04<12:37, 36.07s/it]


--- Batch 135 examples ---

Image 8640:
  True class: standard poodle
  CLIP prediction: albatross
  ICE prediction: albatross
  Captions:
    a photo of: a photo of a dog
    describe specifically: describe specifically, the poo is a breed of the same breed
    be concise: be concisely, the poo is a very intelligent breed
  CLIP confidence: 91.94%
  ICE confidence: 91.94%

Image 8641:
  True class: Arabian camel
  CLIP prediction: Arabian camel
  ICE prediction: Arabian camel
  Captions:
    a photo of: a photo of a camel and a dog
    describe specifically: describe specifically, the camel is a camel
    be concise: be concise camels
  CLIP confidence: 98.54%
  ICE confidence: 103.61%

Image 8642:
  True class: pomegranate
  CLIP prediction: pomegranate
  ICE prediction: pomegranate
  Captions:
    a photo of: a photo of a red flower on a blue sky background
    describe specifically: describe specifically, the poins are a very good choice for the christmas tree
    be concise: be c

 87%|████████▋ | 137/157 [1:23:39<11:54, 35.75s/it]


--- Batch 136 examples ---

Image 8704:
  True class: bikini
  CLIP prediction: bikini
  ICE prediction: bikini
  Captions:
    a photo of: a photo of a woman on a boat
    describe specifically: describe specifically, the woman is a sailor
    be concise: be concisely, the best way to get out of the boat
  CLIP confidence: 48.63%
  ICE confidence: 48.63%

Image 8705:
  True class: fur coat
  CLIP prediction: fur coat
  ICE prediction: fur coat
  Captions:
    a photo of: a photo of a man in a white coat
    describe specifically: describe specifically, the child is a child
    be concise: be concisely, the man is wearing a white coat
  CLIP confidence: 41.67%
  ICE confidence: 41.70%

Image 8706:
  True class: bikini
  CLIP prediction: bikini
  ICE prediction: bikini
  Captions:
    a photo of: a photo of a man standing on a baseball field
    describe specifically: describe specifically, the person who was the first person to be a person
    be concise: be concise, the first female 

 88%|████████▊ | 138/157 [1:24:16<11:28, 36.24s/it]


--- Batch 137 examples ---

Image 8768:
  True class: golden retriever
  CLIP prediction: golden retriever
  ICE prediction: golden retriever
  Captions:
    a photo of: a photo of a dog sitting on the grass
    describe specifically: describe specifically, adopt a golden retrieve
    be concise: be concise golden retrieve
  CLIP confidence: 83.40%
  ICE confidence: 89.65%

Image 8769:
  True class: plunger
  CLIP prediction: bannister
  ICE prediction: bannister
  Captions:
    a photo of: a photo of a room with a bed and a window
    describe specifically: describe specifically, the room is a room with a view
    be concise: be concisely in the house
  CLIP confidence: 15.78%
  ICE confidence: 15.78%

Image 8770:
  True class: cannon
  CLIP prediction: cannon
  ICE prediction: cannon
  Captions:
    a photo of: a photo of a group of people standing in a field
    describe specifically: describe specifically, the fire is not the same
    be concise: be conciseing the fire
  CLIP conf

 89%|████████▊ | 139/157 [1:24:50<10:40, 35.57s/it]


--- Batch 138 examples ---

Image 8832:
  True class: iPod
  CLIP prediction: remote control
  ICE prediction: remote control
  Captions:
    a photo of: a photo of a cell phone with a green and black cover
    describe specifically: describe specifically with the use of the mobile
    be concise: be concisem - mobile phone stand
  CLIP confidence: 20.37%
  ICE confidence: 22.72%

Image 8833:
  True class: oboe
  CLIP prediction: oboe
  ICE prediction: oboe
  Captions:
    a photo of: a photo of a necklace with a black and white bea
    describe specifically: describe specifically, the number of the number of the number of the number of the number of the number of the number of the number of the
    be concise: be concisely with this necklace
  CLIP confidence: 87.40%
  ICE confidence: 87.40%

Image 8834:
  True class: brass
  CLIP prediction: computer keyboard
  ICE prediction: computer keyboard
  Captions:
    a photo of: a photo of a man in a boat
    describe specifically: describ

 89%|████████▉ | 140/157 [1:25:27<10:10, 35.91s/it]


--- Batch 139 examples ---

Image 8896:
  True class: tarantula
  CLIP prediction: tarantula
  ICE prediction: tarantula
  Captions:
    a photo of: a photo of a black and white spider
    describe specifically: describe specifically, the earth is a planet
    be concise: be concisement - the best of the best
  CLIP confidence: 83.89%
  ICE confidence: 83.89%

Image 8897:
  True class: dining table
  CLIP prediction: sewing machine
  ICE prediction: sewing machine
  Captions:
    a photo of: a photo of a dining area with a table and chairs
    describe specifically: describe specifically, the table is a table
    be concise: be concise - dining area
  CLIP confidence: 39.94%
  ICE confidence: 39.94%

Image 8898:
  True class: European fire salamander
  CLIP prediction: European fire salamander
  ICE prediction: European fire salamander
  Captions:
    a photo of: a photo of a field with a bird in the middle
    describe specifically: describe specifically, the soil is a mixture of soi

 90%|████████▉ | 141/157 [1:26:06<09:47, 36.70s/it]


--- Batch 140 examples ---

Image 8960:
  True class: scoreboard
  CLIP prediction: scoreboard
  ICE prediction: scoreboard
  Captions:
    a photo of: a photo of a train with a sign on it
    describe specifically: describe specifically, the number of the train is the number of the train
    be concise: be concise - the best of the best
  CLIP confidence: 93.12%
  ICE confidence: 93.12%

Image 8961:
  True class: go-kart
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a man riding a motorcycle
    describe specifically: describe specifically, the car is a car
    be concise: be concisely, the best of the best
  CLIP confidence: 96.48%
  ICE confidence: 96.48%

Image 8962:
  True class: moving van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a white truck parked in a parking lot
    describe specifically: describe specifically, the truck is a good choice for the truck
    be concise: be co

 90%|█████████ | 142/157 [1:26:41<09:05, 36.34s/it]


--- Batch 141 examples ---

Image 9024:
  True class: pretzel
  CLIP prediction: beaker
  ICE prediction: beaker
  Captions:
    a photo of: a photo of a dog
    describe specifically: describe specifically, the cat is a cat
    be concise: be concisely, the cat is a very large cat
  CLIP confidence: 17.41%
  ICE confidence: 17.41%

Image 9025:
  True class: boa constrictor
  CLIP prediction: boa constrictor
  ICE prediction: boa constrictor
  Captions:
    a photo of: a photo of a woman with a snake on her neck
    describe specifically: describe specifically, the eye is the most important part of the eye
    be concise: be concisement - gold snake ring
  CLIP confidence: 90.82%
  ICE confidence: 91.26%

Image 9026:
  True class: brown bear
  CLIP prediction: brown bear
  ICE prediction: brown bear
  Captions:
    a photo of: a photo of a bear walking across a road
    describe specifically: describe specifically, the bear is a bear
    be concise: be concisely, the bear is a very la

 91%|█████████ | 143/157 [1:27:19<08:34, 36.73s/it]


--- Batch 142 examples ---

Image 9088:
  True class: orange
  CLIP prediction: orange
  ICE prediction: orange
  Captions:
    a photo of: a photo of a slice of orange
    describe specifically: describe specifically, the orange is a fruit that is a fruit that is a fruit that is a fruit that is a fruit that is a fruit
    be concise: be concise orange
  CLIP confidence: 66.31%
  ICE confidence: 73.10%

Image 9089:
  True class: fountain
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a rainbow fountain in a park
    describe specifically: describe specifically, the rainbow appears in the sky
    be concise: be concisely, the rainbow
  CLIP confidence: 64.31%
  ICE confidence: 66.75%

Image 9090:
  True class: sandal
  CLIP prediction: king penguin
  ICE prediction: king penguin
  Captions:
    a photo of: a photo of two penguins on a yellow field
    describe specifically: describe specifically, the penguins are the same
    be concise: b

 92%|█████████▏| 144/157 [1:27:56<07:57, 36.74s/it]


--- Batch 143 examples ---

Image 9152:
  True class: parking meter
  CLIP prediction: turnstile
  ICE prediction: turnstile
  Captions:
    a photo of: a photo of a blue and white building
    describe specifically: describe specifically, the light is not visible
    be concise: be concisely, the light is blue
  CLIP confidence: 47.44%
  ICE confidence: 47.44%

Image 9153:
  True class: wok
  CLIP prediction: wok
  ICE prediction: wok
  Captions:
    a photo of: a photo of a skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill
    describe specifically: describe specifically, the skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill skill
    be concise: be concisely with this delicious and easy skill skill skill skill skill skill skill skill skill skill skill skill
  CLIP confidence: 82.23%
  ICE confidence: 82.23%

Image 9154:
  True class: German she

 92%|█████████▏| 145/157 [1:28:32<07:19, 36.64s/it]


--- Batch 144 examples ---

Image 9216:
  True class: hog
  CLIP prediction: hog
  ICE prediction: hog
  Captions:
    a photo of: a photo of a wolf in the woods
    describe specifically: describe specifically, the wolf is a wolf
    be concise: be concisely, the wolf
  CLIP confidence: 58.11%
  ICE confidence: 58.50%

Image 9217:
  True class: go-kart
  CLIP prediction: go-kart
  ICE prediction: go-kart
  Captions:
    a photo of: a photo of a white and black atv
    describe specifically: describe specifically, the atv is a very powerful vehicle
    be concise: be concise atv quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad quad
  CLIP confidence: 71.14%
  ICE confidence: 71.14%

Image 9218:
  True class: sock
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a table with a bunch of flowers
    describe specifically: describe specifically, the cat is a cat
    be concise: be co

 93%|█████████▎| 146/157 [1:29:08<06:42, 36.55s/it]


--- Batch 145 examples ---

Image 9280:
  True class: cash machine
  CLIP prediction: cash machine
  ICE prediction: cash machine
  Captions:
    a photo of: a photo of a red and white bathroom
    describe specifically: describe specifically, the product is the product of the product
    be concise: be concisely, the red and white color is the same
  CLIP confidence: 25.93%
  ICE confidence: 25.93%

Image 9281:
  True class: lion
  CLIP prediction: Egyptian cat
  ICE prediction: lion
  Captions:
    a photo of: a photo of a dog playing with a ball
    describe specifically: describe specifically, the dog is a lion
    be concise: be concisely, the dog that is a great companion
  CLIP confidence: 20.01%
  ICE confidence: 20.84%

Image 9282:
  True class: guacamole
  CLIP prediction: guacamole
  ICE prediction: guacamole
  Captions:
    a photo of: a photo of a plate of food with brocco
    describe specifically: describe specifically, the food of the world is not the same
    be conci

 94%|█████████▎| 147/157 [1:29:45<06:05, 36.59s/it]


--- Batch 146 examples ---

Image 9344:
  True class: jinrikisha
  CLIP prediction: sandal
  ICE prediction: sandal
  Captions:
    a photo of: a photo of a man pushing a cart
    describe specifically: describe specifically, the wheel is the most important part of the wheel
    be concise: be concisement of the wheelbar
  CLIP confidence: 33.69%
  ICE confidence: 33.69%

Image 9345:
  True class: lemon
  CLIP prediction: confectionery
  ICE prediction: confectionery
  Captions:
    a photo of: a photo of a stack of cookies on a table
    describe specifically: describe specifically, the oranges are the most important to the oranges
    be concise: be concisement of the holy
  CLIP confidence: 16.06%
  ICE confidence: 23.50%

Image 9346:
  True class: bighorn
  CLIP prediction: bighorn
  ICE prediction: bighorn
  Captions:
    a photo of: a photo of a herd of sheep in the snow
    describe specifically: describe specifically, the sheep are not the same breed
    be concise: be concise

 94%|█████████▍| 148/157 [1:30:21<05:28, 36.47s/it]


--- Batch 147 examples ---

Image 9408:
  True class: gondola
  CLIP prediction: gondola
  ICE prediction: gondola
  Captions:
    a photo of: a photo of a street with a car parked in the middle
    describe specifically: describe specifically, the car is a car
    be concise: be concise - the best of the best
  CLIP confidence: 24.94%
  ICE confidence: 24.94%

Image 9409:
  True class: brain coral
  CLIP prediction: brain coral
  ICE prediction: brain coral
  Captions:
    a photo of: a photo of a small white bird in the woods
    describe specifically: describe specifically, the earth is a flat surface
    be concise: be concisement - ayur - ayur - ayur - ayur - ayur - ayur - ayur - ayur - ayur - ayur - a - a -
  CLIP confidence: 48.97%
  ICE confidence: 48.97%

Image 9410:
  True class: dining table
  CLIP prediction: dining table
  ICE prediction: dining table
  Captions:
    a photo of: a photo of a restaurant with a table and chairs
    describe specifically: describe specifical

 95%|█████████▍| 149/157 [1:30:58<04:53, 36.66s/it]


--- Batch 148 examples ---

Image 9472:
  True class: mushroom
  CLIP prediction: mushroom
  ICE prediction: mushroom
  Captions:
    a photo of: a photo of a tree with a green background
    describe specifically: describe specifically, the soil is the soil of the soil
    be concise: be concise - organic seeds
  CLIP confidence: 81.93%
  ICE confidence: 81.93%

Image 9473:
  True class: plate
  CLIP prediction: guacamole
  ICE prediction: guacamole
  Captions:
    a photo of: a photo of a table with plates and bowls
    describe specifically: describe specifically, the food is not the same
    be concise: be concisely, the best way to make your own taste
  CLIP confidence: 65.97%
  ICE confidence: 65.97%

Image 9474:
  True class: police van
  CLIP prediction: police van
  ICE prediction: police van
  Captions:
    a photo of: a photo of a police van and a police car
    describe specifically: describe specifically, the police are not responsible
    be concise: be concise police ve

 96%|█████████▌| 150/157 [1:31:33<04:13, 36.14s/it]


--- Batch 149 examples ---

Image 9536:
  True class: sock
  CLIP prediction: swimming trunks
  ICE prediction: swimming trunks
  Captions:
    a photo of: a photo of a man in a red shirt and blue shorts
    describe specifically: describe specifically, the two men are wearing socks
    be concise: be concisely, be concisely, be concisely be contively be contively be contively be con
  CLIP confidence: 30.69%
  ICE confidence: 30.93%

Image 9537:
  True class: cliff dwelling
  CLIP prediction: cliff dwelling
  ICE prediction: cliff dwelling
  Captions:
    a photo of: a photo of a castle with a dark sky in the background
    describe specifically: describe specifically, the building is a very complex
    be concise: be concise - the best of the best
  CLIP confidence: 28.27%
  ICE confidence: 28.27%

Image 9538:
  True class: cockroach
  CLIP prediction: centipede
  ICE prediction: centipede
  Captions:
    a photo of: a photo of a bee
    describe specifically: describe specifically,

 96%|█████████▌| 151/157 [1:32:10<03:38, 36.39s/it]


--- Batch 150 examples ---

Image 9600:
  True class: bison
  CLIP prediction: bison
  ICE prediction: bison
  Captions:
    a photo of: a photo of a bison grazing in a field
    describe specifically: describe specifically, the bison is a large, black - headed, herb - grown, herb - grown, herb - grown, herb - grown
    be concise: be concisely, the bison is a great addition to any family
  CLIP confidence: 85.64%
  ICE confidence: 91.80%

Image 9601:
  True class: freight car
  CLIP prediction: freight car
  ICE prediction: freight car
  Captions:
    a photo of: a photo of a truck with a trailer in the background
    describe specifically: describe specifically for sale
    be concise: be concise trailer
  CLIP confidence: 65.53%
  ICE confidence: 65.53%

Image 9602:
  True class: viaduct
  CLIP prediction: viaduct
  ICE prediction: viaduct
  Captions:
    a photo of: a photo of a mountain with a lake
    describe specifically: describe specifically, the land is a land that is not a

 97%|█████████▋| 152/157 [1:32:46<03:01, 36.35s/it]


--- Batch 151 examples ---

Image 9664:
  True class: koala
  CLIP prediction: koala
  ICE prediction: koala
  Captions:
    a photo of: a photo of a dog running in the dirt
    describe specifically: describe specifically, the dog is a black and white color
    be concise: be concisely, the dog is a very good dog
  CLIP confidence: 55.18%
  ICE confidence: 55.18%

Image 9665:
  True class: torch
  CLIP prediction: limousine
  ICE prediction: limousine
  Captions:
    a photo of: a photo of a man in a red and white outfit
    describe specifically: describe specifically, the red flag is a symbol of the chinese revolution
    be concise: be concisement of the red flag
  CLIP confidence: 41.58%
  ICE confidence: 41.58%

Image 9666:
  True class: bullet train
  CLIP prediction: bullet train
  ICE prediction: bullet train
  Captions:
    a photo of: a photo of a boat in the water
    describe specifically: describe specifically, the boat is a boat
    be concise: be concisely, the white a

 97%|█████████▋| 153/157 [1:33:23<02:25, 36.33s/it]


--- Batch 152 examples ---

Image 9728:
  True class: crane
  CLIP prediction: crane
  ICE prediction: crane
  Captions:
    a photo of: a photo of a statue in the middle of a park
    describe specifically: describe specifically, the statue is a symbol of the city of london
    be concise: be concise - the statue of liberty
  CLIP confidence: 42.99%
  ICE confidence: 42.99%

Image 9729:
  True class: teapot
  CLIP prediction: acorn
  ICE prediction: teapot
  Captions:
    a photo of: a photo of a teapot with a handle
    describe specifically: describe specifically, the teapot is a teapot
    be concise: be concise teapot
  CLIP confidence: 24.82%
  ICE confidence: 25.17%

Image 9730:
  True class: lakeside
  CLIP prediction: lakeside
  ICE prediction: lakeside
  Captions:
    a photo of: a photo of a tree in the middle of a field
    describe specifically: describe specifically, the tree is a tree
    be concise: be concise - the best of the best
  CLIP confidence: 30.76%
  ICE conf

 98%|█████████▊| 154/157 [1:33:58<01:48, 36.11s/it]


--- Batch 153 examples ---

Image 9792:
  True class: American alligator
  CLIP prediction: lawn mower
  ICE prediction: lawn mower
  Captions:
    a photo of: a photo of a bird in the grass
    describe specifically: describe specifically, the water is a little bit of blue
    be concise: be concise - the best way to get your dog ' s attention
  CLIP confidence: 10.25%
  ICE confidence: 10.25%

Image 9793:
  True class: computer keyboard
  CLIP prediction: computer keyboard
  ICE prediction: computer keyboard
  Captions:
    a photo of: a photo of a keyboard with a keyboard key
    describe specifically: describe specifically on the keyboard
    be concise: be concisel - the best keyboard for your business
  CLIP confidence: 95.31%
  ICE confidence: 101.07%

Image 9794:
  True class: goose
  CLIP prediction: goose
  ICE prediction: goose
  Captions:
    a photo of: a photo of a swan swimming in the water
    describe specifically: describe specifically, the swan is a bird
    be conc

 99%|█████████▊| 155/157 [1:34:36<01:13, 36.62s/it]


--- Batch 154 examples ---

Image 9856:
  True class: oboe
  CLIP prediction: oboe
  ICE prediction: oboe
  Captions:
    a photo of: a photo of a woman playing a drum
    describe specifically: describe specifically, the person is a person
    be concise: be concise - live at the royal albert park, toronto
  CLIP confidence: 26.00%
  ICE confidence: 26.00%

Image 9857:
  True class: spider web
  CLIP prediction: fountain
  ICE prediction: fountain
  Captions:
    a photo of: a photo of a black and white image of a black and white image of a black and white image of a black and white image of a
    describe specifically: describe specifically, the stars are not in the same plane
    be concise: be concisement of the universe
  CLIP confidence: 4.14%
  ICE confidence: 4.14%

Image 9858:
  True class: computer keyboard
  CLIP prediction: computer keyboard
  ICE prediction: computer keyboard
  Captions:
    a photo of: a photo of a city at night
    describe specifically: describe specif

 99%|█████████▉| 156/157 [1:35:14<00:36, 36.97s/it]


--- Batch 155 examples ---

Image 9920:
  True class: flagpole
  CLIP prediction: flagpole
  ICE prediction: flagpole
  Captions:
    a photo of: a photo of a kite flying in the sky
    describe specifically: describe specifically, the wind is the wind
    be concise: be concisement of the sun
  CLIP confidence: 87.99%
  ICE confidence: 87.99%

Image 9921:
  True class: black widow
  CLIP prediction: black widow
  ICE prediction: black widow
  Captions:
    a photo of: a photo of a black and white dog
    describe specifically: describe specifically, the term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term term
    be concise: be concisement - a new study of the human brain
  CLIP confidence: 73.54%
  ICE confidence: 73.54%

Image 9922:
  True class: steel arch bridge
  CLIP prediction: steel arch bridge
  ICE prediction

100%|██████████| 157/157 [1:35:23<00:00, 36.46s/it]


--- Batch 156 examples ---

Image 9984:
  True class: chain
  CLIP prediction: sea cucumber
  ICE prediction: sea cucumber
  Captions:
    a photo of: a photo of a man in a wet suit
    describe specifically: describe specifically, the bear is a member of the bear family
    be concise: be concisely, the best way to get out of the water
  CLIP confidence: 11.71%
  ICE confidence: 11.71%

Image 9985:
  True class: candle
  CLIP prediction: candle
  ICE prediction: candle
  Captions:
    a photo of: a photo of a cake with candles on it
    describe specifically: describe specifically, the candle is a symbol of the candlelighter ' s life
    be concise: be concisement - a candle and a candle
  CLIP confidence: 71.88%
  ICE confidence: 78.47%

Image 9986:
  True class: jellyfish
  CLIP prediction: jellyfish
  ICE prediction: jellyfish
  Captions:
    a photo of: a photo of a blue jellyfish in the ocean
    describe specifically: describe specifically, the earth is a planet
    be concise:


