Prerequisites

In [None]:
!pip install datasets transformers torch Pillow seaborn pandas matplotlib tqdm

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

Evaluation of the contrast pair on Base CLIP Vs CCS Probing

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

class CCS(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, return_logits=False):
        logits = self.linear(x)
        if return_logits:
            return logits
        return self.sigmoid(logits)

def normalize_features(features_pos, features_neg):
    mu_pos = features_pos.mean(dim=0, keepdim=True)
    mu_neg = features_neg.mean(dim=0, keepdim=True)
    features_pos = features_pos - mu_pos
    features_neg = features_neg - mu_neg
    std_pos = features_pos.std(dim=0, keepdim=True)
    std_neg = features_neg.std(dim=0, keepdim=True)
    features_pos = features_pos / (std_pos + 1e-8)
    features_neg = features_neg / (std_neg + 1e-8)
    return features_pos, features_neg

def create_preprocessed_data():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    print("Loading CLIP model...")
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    print("Loading POPE dataset...")
    dataset = load_dataset("lmms-lab/POPE", split='test')

    processed_data = {
        'features_pos': [],
        'features_neg': [],
        'clip_logits': [],
        'metadata': {
            'questions': [],
            'true_answers': [],
            'contrast_pairs': [],
            'ids': []
        }
    }

    print("Processing dataset...")
    for idx in tqdm(range(len(dataset))):
        sample = dataset[idx]
        question = sample['question'].lower().rstrip('?')

        # Create contrast pairs as per paper
        contrast_pair_pos = f"Q: Is {question} present in this image? A: yes"
        contrast_pair_neg = f"Q: Is {question} present in this image? A: no"

        inputs = processor(
            images=[sample['image']],
            text=[contrast_pair_pos, contrast_pair_neg],
            return_tensors="pt",
            padding=True
        )
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)
            text_features = outputs.text_embeds.cpu()
            logits_per_image = outputs.logits_per_image

        processed_data['features_pos'].append(text_features[0])
        processed_data['features_neg'].append(text_features[1])
        processed_data['clip_logits'].append(logits_per_image.cpu()[0])
        processed_data['metadata']['questions'].append(sample['question'])
        processed_data['metadata']['true_answers'].append(sample['answer'])
        processed_data['metadata']['contrast_pairs'].append({
            'positive': contrast_pair_pos,
            'negative': contrast_pair_neg
        })
        processed_data['metadata']['ids'].append(sample.get('id', idx))

    # Convert lists to tensors
    processed_data['features_pos'] = torch.stack(processed_data['features_pos'])
    processed_data['features_neg'] = torch.stack(processed_data['features_neg'])
    processed_data['clip_logits'] = torch.stack(processed_data['clip_logits'])

    print("\nPreprocessed Data Summary:")
    print(f"Number of samples: {len(processed_data['metadata']['questions'])}")
    print(f"Features positive shape: {processed_data['features_pos'].shape}")
    print(f"Features negative shape: {processed_data['features_neg'].shape}")
    print(f"CLIP logits shape: {processed_data['clip_logits'].shape}")

    print("\nSaving preprocessed data...")
    torch.save(processed_data, 'preprocessed_new.pt')

    return processed_data

def train_ccs(model, features_pos, features_neg, device, num_epochs=2000, lr=0.001):
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)
    best_loss = float('inf')
    best_state = None

    for epoch in range(num_epochs):
        optimizer.zero_grad()
        p_pos = model(features_pos)
        p_neg = model(features_neg)

        consistency_loss = (p_pos - (1 - p_neg)) ** 2
        confidence_loss = torch.min(p_pos, p_neg) ** 2
        loss = consistency_loss.mean() + confidence_loss.mean()

        if loss.item() < best_loss:
            best_loss = loss.item()
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}

        loss.backward()
        optimizer.step()

    if best_state is not None:
        model.load_state_dict(best_state)
    return best_loss

def run_experiment(preprocessed_path='preprocessed_new.pt', output_dir='comparison_results_new'):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'plots'), exist_ok=True)

    print("Loading preprocessed data...")
    data = torch.load(preprocessed_path)
    features_pos = data['features_pos'].to(device)
    features_neg = data['features_neg'].to(device)
    clip_logits = data['clip_logits'].to(device)
    metadata = data['metadata']

    # Split into train/test
    num_samples = len(metadata['questions'])
    indices = torch.randperm(num_samples)
    split = int(0.8 * num_samples)
    train_idx, test_idx = indices[:split], indices[split:]

    # Normalize features
    features_pos_norm, features_neg_norm = normalize_features(features_pos, features_neg)

    # Train CCS model
    print("\nTraining CCS model...")
    input_dim = features_pos.shape[1]
    best_loss = float('inf')
    best_model = None

    for try_idx in range(10):
        print(f"Training run {try_idx + 1}/10")
        ccs_model = CCS(input_dim).to(device)
        loss = train_ccs(ccs_model, features_pos_norm[train_idx], features_neg_norm[train_idx], device)

        if loss < best_loss:
            best_loss = loss
            best_model = ccs_model.state_dict()

    # Evaluate final model
    final_model = CCS(input_dim).to(device)
    final_model.load_state_dict(best_model)
    final_model.eval()

    with torch.no_grad():
        # Get CCS logits and probabilities
        ccs_logits_pos = final_model(features_pos_norm, return_logits=True)
        ccs_logits_neg = final_model(features_neg_norm, return_logits=True)

        final_pos_probs = final_model(features_pos_norm)
        final_neg_probs = final_model(features_neg_norm)

        avg_probs = 0.5 * (final_pos_probs + (1 - final_neg_probs))
        predictions = (avg_probs > 0.5).float()

    # Save results with logits
    ccs_results = {
        'logits_pos': ccs_logits_pos.cpu(),  # Raw logits before sigmoid
        'logits_neg': ccs_logits_neg.cpu(),
        'probs_pos': final_pos_probs.cpu(),
        'probs_neg': final_neg_probs.cpu(),
        'predictions': predictions.cpu(),
        'train_indices': train_idx.cpu(),
        'test_indices': test_idx.cpu(),
        'metadata': metadata
    }

    clip_results = {
        'logits': clip_logits.cpu(),  # Raw logits before softmax
        'predictions': (clip_logits[:, 0] > clip_logits[:, 1]).float().cpu(),
        'train_indices': train_idx.cpu(),
        'test_indices': test_idx.cpu()
    }

    torch.save(ccs_results, os.path.join(output_dir, 'ccs_logits_new.pt'))
    torch.save(clip_results, os.path.join(output_dir, 'clip_logits_new.pt'))

    # Print accuracies
    true_answers = torch.tensor([1 if ans.lower() == 'yes' else 0 for ans in metadata['true_answers']])
    ccs_acc = (predictions.cpu().flatten() == true_answers).float().mean()
    clip_acc = (clip_results['predictions'] == true_answers).float().mean()

    print(f"\nTest Accuracies:")
    print(f"CCS Accuracy: {ccs_acc:.3f}")
    print(f"CLIP Accuracy: {clip_acc:.3f}")

    return ccs_results, clip_results

if __name__ == "__main__":
    # Create preprocessed data if it doesn't exist
    if not os.path.exists('preprocessed_new.pt'):
        create_preprocessed_data()

    # Run experiment
    ccs_results, clip_results = run_experiment()

Using device: cuda
Loading CLIP model...
Loading POPE dataset...
Processing dataset...


100%|██████████| 9000/9000 [03:52<00:00, 38.67it/s]



Preprocessed Data Summary:
Number of samples: 9000
Features positive shape: torch.Size([9000, 512])
Features negative shape: torch.Size([9000, 512])
CLIP logits shape: torch.Size([9000, 2])

Saving preprocessed data...
Using device: cuda
Loading preprocessed data...


  data = torch.load(preprocessed_path)



Training CCS model...
Training run 1/10
Training run 2/10
Training run 3/10
Training run 4/10
Training run 5/10
Training run 6/10
Training run 7/10
Training run 8/10
Training run 9/10
Training run 10/10

Test Accuracies:
CCS Accuracy: 0.538
CLIP Accuracy: 0.488


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoProcessor, LlavaForConditionalGeneration
from datasets import load_dataset
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

def create_preprocessed_data():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print("Loading LLaVA model...")
    model = LlavaForConditionalGeneration.from_pretrained(
        "llava-hf/llava-1.5-7b-hf",
        torch_dtype=torch.float16,
        device_map="auto"
    )
    processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
    processor.tokenizer.padding_side = "left"

    print("Loading POPE dataset...")
    dataset = load_dataset("lmms-lab/POPE", split='test')

    processed_data = {
        'features_pos': [],
        'features_neg': [],
        'llava_logits': [],
        'hidden_states': {},
        'metadata': {
            'questions': [],
            'true_answers': [],
            'contrast_pairs': [],
            'ids': []
        }
    }

    for idx in tqdm(range(len(dataset))):
        sample = dataset[idx]
        question = sample['question'].lower().rstrip('?')

        # Format positive and negative conversations
        texts = [
            f"<image>\nUser: Is {question} present in this image?\nAssistant: Yes.",
            f"<image>\nUser: Is {question} present in this image?\nAssistant: No."
        ]

        # Process inputs
        inputs_pos = processor(
            images=sample['image'],
            text=texts[0],
            return_tensors="pt"
        )
        inputs_neg = processor(
            images=sample['image'],
            text=texts[1],
            return_tensors="pt"
        )

        inputs_pos = {k: v.to(device) for k, v in inputs_pos.items()}
        inputs_neg = {k: v.to(device) for k, v in inputs_neg.items()}

        with torch.no_grad():
            outputs_pos = model(**inputs_pos, output_hidden_states=True)
            outputs_neg = model(**inputs_neg, output_hidden_states=True)

            # Store hidden states from each layer
            for layer_idx, (h_pos, h_neg) in enumerate(zip(outputs_pos.hidden_states, outputs_neg.hidden_states)):
                if layer_idx not in processed_data['hidden_states']:
                    processed_data['hidden_states'][layer_idx] = {'pos': [], 'neg': []}

                # Get last token embedding for each layer
                processed_data['hidden_states'][layer_idx]['pos'].append(h_pos[0, -1].cpu())
                processed_data['hidden_states'][layer_idx]['neg'].append(h_neg[0, -1].cpu())

            # Store logits for final prediction
            processed_data['llava_logits'].append(torch.stack([
                outputs_pos.logits[0, -1],
                outputs_neg.logits[0, -1]
            ]).cpu())

        # Store metadata
        processed_data['metadata']['questions'].append(question)
        processed_data['metadata']['true_answers'].append(sample['answer'])
        processed_data['metadata']['contrast_pairs'].append({
            'positive': texts[0],
            'negative': texts[1]
        })
        processed_data['metadata']['ids'].append(idx)

    # Convert lists to tensors
    for layer_idx in processed_data['hidden_states']:
        processed_data['hidden_states'][layer_idx]['pos'] = torch.stack(processed_data['hidden_states'][layer_idx]['pos'])
        processed_data['hidden_states'][layer_idx]['neg'] = torch.stack(processed_data['hidden_states'][layer_idx]['neg'])

    processed_data['llava_logits'] = torch.stack(processed_data['llava_logits'])

    print("\nSaving preprocessed data...")
    torch.save(processed_data, 'preprocessed_llava.pt')

    print("\nPreprocessed Data Summary:")
    print(f"Number of samples: {len(processed_data['metadata']['questions'])}")
    print(f"Number of layers: {len(processed_data['hidden_states'])}")
    print(f"Hidden state dimension: {processed_data['hidden_states'][0]['pos'].shape[-1]}")

    return processed_data

def normalize_features(features_pos, features_neg):
    mu_pos = features_pos.mean(dim=0, keepdim=True)
    mu_neg = features_neg.mean(dim=0, keepdim=True)
    features_pos = features_pos - mu_pos
    features_neg = features_neg - mu_neg
    std_pos = features_pos.std(dim=0, keepdim=True)
    std_neg = features_neg.std(dim=0, keepdim=True)
    features_pos = features_pos / (std_pos + 1e-8)
    features_neg = features_neg / (std_neg + 1e-8)
    return features_pos, features_neg

if __name__ == "__main__":
    if not os.path.exists('preprocessed_llava.pt'):
        create_preprocessed_data()

Loading LLaVA model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/950 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/70.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.45k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.62M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

Some kwargs in processor config are unused and will not have any effect: num_additional_image_tokens. 


Loading POPE dataset...


README.md:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

test-00000-of-00003.parquet:   0%|          | 0.00/85.0M [00:00<?, ?B/s]

test-00001-of-00003.parquet:   0%|          | 0.00/85.0M [00:00<?, ?B/s]

test-00002-of-00003.parquet:   0%|          | 0.00/85.0M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/9000 [00:00<?, ? examples/s]

 70%|███████   | 6317/9000 [16:23<06:57,  6.42it/s]


KeyboardInterrupt: 

using the preprocessed .pt data

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import os

class CCS(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, return_logits=False):
        x = x.float()  # Convert input to float32
        logits = self.linear(x)
        if return_logits:
            return logits
        return self.sigmoid(logits)

def normalize_features(features_pos, features_neg):
    features_pos = features_pos.float()  # Convert to float32
    features_neg = features_neg.float()  # Convert to float32

    mu_pos = features_pos.mean(dim=0, keepdim=True)
    mu_neg = features_neg.mean(dim=0, keepdim=True)
    features_pos = features_pos - mu_pos
    features_neg = features_neg - mu_neg
    std_pos = features_pos.std(dim=0, keepdim=True)
    std_neg = features_neg.std(dim=0, keepdim=True)
    features_pos = features_pos / (std_pos + 1e-8)
    features_neg = features_neg / (std_neg + 1e-8)
    return features_pos, features_neg

def train_ccs_layer(model, features_pos, features_neg, device, num_epochs=2000, lr=0.001):
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)
    best_loss = float('inf')
    best_state = None

    for epoch in range(num_epochs):
        optimizer.zero_grad()
        p_pos = model(features_pos)
        p_neg = model(features_neg)

        consistency_loss = (p_pos - (1 - p_neg)) ** 2
        confidence_loss = torch.min(p_pos, p_neg) ** 2
        loss = consistency_loss.mean() + confidence_loss.mean()

        if loss.item() < best_loss:
            best_loss = loss.item()
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}

        loss.backward()
        optimizer.step()

    model.load_state_dict(best_state)
    return best_loss

def run_layerwise_analysis(preprocessed_path='preprocessed_llava.pt', output_dir='llava_layerwise'):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    os.makedirs(output_dir, exist_ok=True)

    print("Loading preprocessed data...")
    data = torch.load(preprocessed_path)
    metadata = data['metadata']
    llava_logits = data['llava_logits'].to(device).float()  # Convert to float32

    true_answers = torch.tensor([1 if ans.lower() == 'yes' else 0 for ans in metadata['true_answers']])

    num_samples = len(metadata['questions'])
    train_size = int(0.8 * num_samples)
    indices = torch.randperm(num_samples)
    train_idx, test_idx = indices[:train_size], indices[train_size:]

    layer_results = {}
    for layer_idx in tqdm(data['hidden_states'], desc="Processing layers"):
        features_pos = data['hidden_states'][layer_idx]['pos'].to(device)
        features_neg = data['hidden_states'][layer_idx]['neg'].to(device)

        features_pos_norm, features_neg_norm = normalize_features(features_pos, features_neg)

        input_dim = features_pos.shape[1]
        best_loss = float('inf')
        best_model = None

        print(f"\nTraining CCS for layer {layer_idx}")
        for try_idx in range(10):
            ccs_model = CCS(input_dim).to(device)
            loss = train_ccs_layer(
                ccs_model,
                features_pos_norm[train_idx],
                features_neg_norm[train_idx],
                device
            )

            if loss < best_loss:
                best_loss = loss
                best_model = ccs_model.state_dict()

        final_model = CCS(input_dim).to(device)
        final_model.load_state_dict(best_model)
        final_model.eval()

        with torch.no_grad():
            logits_pos = final_model(features_pos_norm, return_logits=True)
            logits_neg = final_model(features_neg_norm, return_logits=True)
            probs_pos = final_model(features_pos_norm)
            probs_neg = final_model(features_neg_norm)

            avg_probs = 0.5 * (probs_pos + (1 - probs_neg))
            predictions = (avg_probs > 0.5).float()

        test_acc = (predictions[test_idx].cpu().flatten() == true_answers[test_idx]).float().mean().item()
        layer_results[layer_idx] = {
            'logits_pos': logits_pos.cpu(),
            'logits_neg': logits_neg.cpu(),
            'predictions': predictions.cpu(),
            'test_accuracy': test_acc
        }

    # Calculate LLaVA base accuracy on test set
    llava_preds = (llava_logits[:, 0, 0] > llava_logits[:, 1, 0]).float()
    llava_test_acc = (llava_preds[test_idx].cpu() == true_answers[test_idx]).float().mean().item()

    results = {
        'layer_results': layer_results,
        'llava_logits': llava_logits.cpu(),
        'llava_accuracy': llava_test_acc,
        'metadata': metadata,
        'train_indices': train_idx.cpu(),
        'test_indices': test_idx.cpu()
    }

    torch.save(results, os.path.join(output_dir, 'llava_results.pt'))

    # Create accuracy plot
    layers = sorted(layer_results.keys())
    accuracies = [layer_results[l]['test_accuracy'] for l in layers]

    plt.figure(figsize=(10, 6))
    plt.plot(layers, accuracies, marker='o', label='CCS')
    plt.axhline(y=llava_test_acc, color='r', linestyle='--', label='Base LLaVA')
    plt.xlabel('Layer')
    plt.ylabel('Test Accuracy')
    plt.title('Layer-wise CCS vs Base LLaVA Accuracy')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(output_dir, 'accuracy_comparison.png'))
    plt.close()

    print(f"\nTest Set Results:")
    print(f"Base LLaVA Accuracy: {llava_test_acc:.3f}")
    print(f"Best Layer CCS Accuracy: {max(accuracies):.3f}")
    print(f"Best Layer: {layers[np.argmax(accuracies)]}")

    return results

if __name__ == "__main__":
    results = run_layerwise_analysis()

Loading preprocessed data...


  data = torch.load(preprocessed_path)
Processing layers:   0%|          | 0/33 [00:00<?, ?it/s]


Training CCS for layer 0


Processing layers:   3%|▎         | 1/33 [00:33<17:40, 33.14s/it]


Training CCS for layer 1


Processing layers:   6%|▌         | 2/33 [01:07<17:31, 33.92s/it]


Training CCS for layer 2


Processing layers:   9%|▉         | 3/33 [01:41<16:59, 33.97s/it]


Training CCS for layer 3


Processing layers:  12%|█▏        | 4/33 [02:15<16:26, 34.01s/it]


Training CCS for layer 4


Processing layers:  15%|█▌        | 5/33 [02:49<15:53, 34.05s/it]


Training CCS for layer 5


Processing layers:  18%|█▊        | 6/33 [03:23<15:13, 33.82s/it]


Training CCS for layer 6


Processing layers:  21%|██        | 7/33 [03:56<14:34, 33.64s/it]


Training CCS for layer 7


Processing layers:  24%|██▍       | 8/33 [04:30<14:04, 33.80s/it]


Training CCS for layer 8


Processing layers:  27%|██▋       | 9/33 [05:03<13:27, 33.66s/it]


Training CCS for layer 9


Processing layers:  30%|███       | 10/33 [05:37<12:51, 33.54s/it]


Training CCS for layer 10


Processing layers:  33%|███▎      | 11/33 [06:10<12:19, 33.61s/it]


Training CCS for layer 11


Processing layers:  36%|███▋      | 12/33 [06:44<11:43, 33.51s/it]


Training CCS for layer 12


Processing layers:  39%|███▉      | 13/33 [07:17<11:09, 33.48s/it]


Training CCS for layer 13


Processing layers:  42%|████▏     | 14/33 [07:51<10:38, 33.58s/it]


Training CCS for layer 14


Processing layers:  45%|████▌     | 15/33 [08:25<10:04, 33.58s/it]


Training CCS for layer 15


Processing layers:  48%|████▊     | 16/33 [08:58<09:29, 33.47s/it]


Training CCS for layer 16


Processing layers:  52%|█████▏    | 17/33 [09:31<08:52, 33.30s/it]


Training CCS for layer 17


Processing layers:  55%|█████▍    | 18/33 [10:04<08:20, 33.40s/it]


Training CCS for layer 18


Processing layers:  58%|█████▊    | 19/33 [10:38<07:49, 33.52s/it]


Training CCS for layer 19


Processing layers:  61%|██████    | 20/33 [11:11<07:13, 33.36s/it]


Training CCS for layer 20


Processing layers:  64%|██████▎   | 21/33 [11:44<06:39, 33.31s/it]


Training CCS for layer 21


Processing layers:  67%|██████▋   | 22/33 [12:18<06:06, 33.32s/it]


Training CCS for layer 22


Processing layers:  70%|██████▉   | 23/33 [12:51<05:32, 33.25s/it]


Training CCS for layer 23


Processing layers:  73%|███████▎  | 24/33 [13:24<04:58, 33.21s/it]


Training CCS for layer 24


Processing layers:  76%|███████▌  | 25/33 [13:58<04:28, 33.50s/it]


Training CCS for layer 25


Processing layers:  79%|███████▉  | 26/33 [14:31<03:53, 33.34s/it]


Training CCS for layer 26


Processing layers:  82%|████████▏ | 27/33 [15:04<03:20, 33.34s/it]


Training CCS for layer 27


Processing layers:  85%|████████▍ | 28/33 [15:38<02:46, 33.29s/it]


Training CCS for layer 28


Processing layers:  88%|████████▊ | 29/33 [16:11<02:13, 33.35s/it]


Training CCS for layer 29


Processing layers:  91%|█████████ | 30/33 [16:44<01:39, 33.33s/it]


Training CCS for layer 30


Processing layers:  94%|█████████▍| 31/33 [17:18<01:06, 33.39s/it]


Training CCS for layer 31


Processing layers:  97%|█████████▋| 32/33 [17:52<00:33, 33.51s/it]


Training CCS for layer 32


Processing layers: 100%|██████████| 33/33 [18:25<00:00, 33.50s/it]



Test Set Results:
Base LLaVA Accuracy: 0.538
Best Layer CCS Accuracy: 0.867
Best Layer: 25
