In [29]:
import argparse
import random
import torch
from torch import nn
import numpy as np
from tqdm import tqdm
import functools
import os
import cProfile
import copy
import time
import sys
import json
import subprocess
from torch import nn
from sklearn.model_selection import train_test_split
import timm
import pandas
import seaborn
import matplotlib.pyplot
import numpy as np
from tqdm import tqdm
from collections import Counter
import math
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
import csv
import argparse
import sys




import wandb
from sklearn.metrics.pairwise import cosine_similarity

from torch.utils.data import DataLoader

from src.backbone import get_backbone
from src.modules import CosineLinear
from src.moe_seed import MoE_SEED
from src.data import (
    CILDataManager,
    DILDataManager,
    get_dataset,
    DATASET_MAP,
    make_test_transform_from_args,
    make_train_transform_from_args,
    update_transforms,
)
from src.logging import Logger, WandbLogger, ConsoleLogger, TQDMLogger
from torch.utils.data import Subset

from src.support_functions import check_gpu_memory, shrink_dataset, display_profile, log_gpustat, optimize_args



In [30]:

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def setup_logger(args):
    Logger.instance().add_backend(ConsoleLogger())
    if args.wandb_project is not None:
        Logger.instance().add_backend(
            WandbLogger(args.wandb_project, args.wandb_entity, args)
        )


def update_args(args):
    assert args.k >= 1 and args.k <= 12
    args.intralayers = [f"blocks.{11 - i}" for i in range(args.k)]

    args.aug_normalize = bool(args.aug_normalize)

    args.target_size = 224

    return args



def calculate_similarity(feature1, feature2, metric='cosine'):
    """
    Calculates the similarity between two feature vectors.

    Args:
        feature1 (np.ndarray): First feature vector.
        feature2 (np.ndarray): Second feature vector.
        metric (str): The similarity metric to use ('cosine' or 'euclidean').

    Returns:
        float: The similarity score.
    """
    if metric == 'cosine':
        return cosine_similarity(feature1.reshape(1, -1), feature2.reshape(1, -1))[0][0]
    elif metric == 'euclidean':
        return -np.linalg.norm(feature1 - feature2) # Negative for consistency (higher value = more similar)
    else:
        raise ValueError(f"Unsupported similarity metric: {metric}")

def calculate_intra_class_similarity(features, labels, similarity_metric='cosine'):
    """
    Calculates the average intra-class similarity for a dataset.

    Args:
        features (np.ndarray): Array of feature vectors.
        labels (np.ndarray): Array of corresponding labels.
        similarity_metric (str): The similarity metric to use ('cosine' or 'euclidean').

    Returns:
        tuple: (dict, float) - A dictionary of per-class intra-class similarities
               and the overall average intra-class similarity.
    """
    intra_class_similarities = {}
    unique_labels = np.unique(labels)
    bar = tqdm(enumerate(unique_labels), desc="Calculating Intra-Class Similarity", total=len(unique_labels))
    for i, label in bar:
        class_features = features[labels == label]
        similarities = []
        for i in range(len(class_features)):
            for j in range(i + 1, len(class_features)):
                similarity = calculate_similarity(class_features[i], class_features[j], similarity_metric)
                similarities.append(similarity)
        if similarities:
            intra_class_similarities[label] = np.mean(similarities)
        else:
            intra_class_similarities[label] = 0.0

    overall_intra_class_similarity = np.mean(list(intra_class_similarities.values())) if intra_class_similarities else 0.0
    return intra_class_similarities, overall_intra_class_similarity

def calculate_inter_class_similarity(features, labels, similarity_metric='cosine'):
    """
    Calculates the average inter-class similarity for a dataset.

    Args:
        features (np.ndarray): Array of feature vectors.
        labels (np.ndarray): Array of corresponding labels.
        similarity_metric (str): The similarity metric to use ('cosine' or 'euclidean').

    Returns:
        float: The overall average inter-class similarity.
    """
    inter_class_similarities = []
    unique_labels = np.unique(labels)
    bar = tqdm(enumerate(unique_labels), desc="Calculating Inter-Class Similarity", total=len(unique_labels))
    # Iterate through all pairs of classes
    for i, _ in bar:
        for j in range(i + 1, len(unique_labels)):
            label1 = unique_labels[i]
            label2 = unique_labels[j]
            features_class1 = features[labels == label1]
            features_class2 = features[labels == label2]
            for feat1 in features_class1:
                for feat2 in features_class2:
                    similarity = calculate_similarity(feat1, feat2, similarity_metric)
                    inter_class_similarities.append(similarity)

    overall_inter_class_similarity = np.mean(inter_class_similarities) if inter_class_similarities else 0.0
    return overall_inter_class_similarity

def calculate_inter_class_similarity_vectorized(features, labels, similarity_metric='cosine'):
    """
    Calculates the average inter-class similarity for a dataset using vectorization.
    """
    inter_class_similarities = []
    unique_labels = np.unique(labels)
    n_classes = len(unique_labels)
    bar = tqdm(range(n_classes), desc="Calculating Inter-Class Similarity", total=n_classes)

    for i in bar:
        for j in range(i + 1, n_classes):
            label1 = unique_labels[i]
            label2 = unique_labels[j]
            features_class1 = features[labels == label1]
            features_class2 = features[labels == label2]

            if similarity_metric == 'cosine':
                similarity_matrix = cosine_similarity(features_class1, features_class2)
                inter_class_similarities.extend(similarity_matrix.flatten())
            elif similarity_metric == 'euclidean':
                # Calculate pairwise Euclidean distances and negate for consistency
                distances = np.linalg.norm(features_class1[:, np.newaxis, :] - features_class2[np.newaxis, :, :], axis=2)
                inter_class_similarities.extend((-distances).flatten())
            else:
                raise ValueError(f"Unsupported similarity metric: {similarity_metric}")

    overall_inter_class_similarity = np.mean(inter_class_similarities) if inter_class_similarities else 0.0
    return overall_inter_class_similarity

# Expert learned a set of classes and we want to calculate the similarity to all other classes
def calculate_selective_inter_class_similarity(features, labels, target_classes, similarity_metric='cosine'):
    """
    Calculates the average inter-class similarity between a target list of classes
    and all other classes not in the target list, using vectorization.

    Args:
        features (np.ndarray): Array of feature vectors.
        labels (np.ndarray): Array of corresponding labels.
        target_classes (list): List of class labels for which to compute similarity
                               to all other classes (excluding those in this list).
        similarity_metric (str, optional): Metric to use for similarity calculation.
                                           'cosine' or 'euclidean' are supported.
                                           Defaults to 'cosine'.

    Returns:
        float: The average inter-class similarity between the target classes
               and the other classes. Returns 0.0 if no such pairs exist.
    """
    inter_class_similarities = []
    unique_labels = np.unique(labels)
    target_classes = set(target_classes)  # Convert to set for faster lookups
    other_classes = [label for label in unique_labels if label not in target_classes]

    bar = tqdm(target_classes, desc="Calculating Selective Inter-Class Similarity", total=len(target_classes))

    for label1 in bar:
        features_class1 = features[labels == label1]
        for label2 in other_classes:
            features_class2 = features[labels == label2]

            if similarity_metric == 'cosine':
                similarity_matrix = cosine_similarity(features_class1, features_class2)
                inter_class_similarities.extend(similarity_matrix.flatten())
            elif similarity_metric == 'euclidean':
                # Calculate pairwise Euclidean distances and negate for consistency
                distances = np.linalg.norm(features_class1[:, np.newaxis, :] - features_class2[np.newaxis, :, :], axis=2)
                inter_class_similarities.extend((-distances).flatten())
            else:
                raise ValueError(f"Unsupported similarity metric: {similarity_metric}")

    overall_inter_class_similarity = np.mean(inter_class_similarities) if inter_class_similarities else 0.0
    return overall_inter_class_similarity


def calculate_entropy(labels):
    """
    Calculates the entropy of a list or NumPy array of labels.

    Args:
        labels (list or np.ndarray): A list or array of labels.

    Returns:
        float: The entropy of the labels.
    """

    label_counts = Counter(labels)
    total_samples = len(labels)
    entropy = 0.0

    for count in label_counts.values():
        probability = count / total_samples
        entropy -= probability * math.log2(probability)

    return entropy

def calculate_feature_variance(features):
    """
    Calculates the variance of the features.

    Args:
        features (np.ndarray): Array of feature vectors.

    Returns:
        float: The variance of the features.
    """
    return np.var(features, axis=0).mean()  # Mean variance across all features

def visualize_csv_with_adjusted_size(csv_filepath, output_filepath="heatmap_adjusted.png"):
    try:
        df = pd.read_csv(csv_filepath, index_col=0)
    except FileNotFoundError:
        print(f"Fehler: Datei '{csv_filepath}' nicht gefunden.")
        return

    numeric_cols = df.select_dtypes(include=np.number).columns

    if not numeric_cols.empty:
        df_normalized = df[numeric_cols].apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=0)
        cmap = LinearSegmentedColormap.from_list("mycmap", ["white", "lightblue", "darkblue"])

        # Erhöhe die Figurengröße, um die Kästchen größer zu machen
        plt.figure(figsize=(len(numeric_cols) * 2, len(df) * 1))

        sns.heatmap(df_normalized, annot=False, cmap=cmap, cbar=True, yticklabels=True)
        plt.title("Farbliche Visualisierung der Datenspalten", fontsize=12) # Kleinere Schriftgröße für den Titel
        plt.xlabel("Numerische Spalten", fontsize=10) # Kleinere Schriftgröße für die X-Achse
        plt.ylabel("Datensätze", fontsize=10) # Kleinere Schriftgröße für die Y-Achse
        plt.xticks(rotation=45, ha="right", fontsize=8) # Kleinere Schriftgröße für die X-Achsenbeschriftungen
        plt.yticks(fontsize=8) # Kleinere Schriftgröße für die Y-Achsenbeschriftungen
        plt.tight_layout()
        plt.savefig(output_filepath)
        print(f"Heatmap mit angepasster Größe und Schrift gespeichert als '{output_filepath}'.")
        plt.close()
    else:
        print("Keine numerischen Spalten zum Visualisieren gefunden.")


def get_all_dataset_metrics(train_features, train_labels, dataset_name):
    values = [None] * 6
    values[0] = dataset_name
    
    label_entropy = calculate_entropy(train_labels)
    print(f"Label Entropy: {label_entropy:.4f}")
    values[1] = label_entropy
    feature_entropy = calculate_entropy(train_features.flatten())
    print(f"Feature Entropy: {feature_entropy:.4f}")
    values[2] = feature_entropy

    # 4. Calculate intra-class similarity on the training set
    intra_class_similarities, overall_intra_similarity = calculate_intra_class_similarity(train_features, train_labels) # Use train_labels here
    print(f"Intra-Class Similarities per class: {intra_class_similarities}")
    print(f"Overall Intra-Class Similarity: {overall_intra_similarity:.4f}")
    values[5] = overall_intra_similarity
    
    # 5. Calculate inter-class similarity on the training set
    overall_inter_similarity = calculate_inter_class_similarity_vectorized(train_features, train_labels) # Use train_labels here
    print(f"Overall Inter-Class Similarity: {overall_inter_similarity:.4f}")
    values[4] = overall_inter_similarity

    # 6. Calculate feature variance
    feature_variance = calculate_feature_variance(train_features)
    print(f"Feature Variance: {feature_variance:.4f}")
    values[3] = feature_variance

    return values


def main(args):
    # get dataset and augmentations
    train_transform = make_train_transform_from_args(args)
    test_transform = make_test_transform_from_args(args)
    train_base_dataset, test_base_dataset = get_dataset(
        args.dataset, path=args.data_root
    )
    update_transforms(test_base_dataset, transform=test_transform)


    # get datamanager based on ds
    data_manager = None
    if DILDataManager.is_dil(str(train_base_dataset)):
        print("DIL")
        data_manager = DILDataManager(
            train_base_dataset,
            test_base_dataset,
        )
    else:
        print("CIL")
        data_manager = CILDataManager(
            train_base_dataset,
            test_base_dataset,
            T=args.T,
            num_first_task=None if args.dataset != "cars" else 16,
            shuffle=True,
            seed=args.seed,
        )


    feature_extractor = timm.create_model(args.backbone, pretrained=True).to(args.device)
    feature_extractor.head = nn.Identity()
    feature_extractor.eval()
    train_features = []
    train_labels = []
    bar = tqdm(enumerate(data_manager), desc="Extracting Features", total=len(data_manager))
    for i, (train_dataset, _) in bar:
        train_dataset.transform = train_transform
        train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=True)
        for images, labels in train_loader:  # Iterate through all batches in the loader
            images = images.to(args.device)
            features = feature_extractor(images)
            train_features.append(features.cpu().detach().numpy())
            train_labels.append(labels.cpu().detach().numpy())

    del feature_extractor

    train_features = np.concatenate(train_features, axis=0)
    train_labels = np.concatenate(train_labels, axis=0)
    print("Features shape:", train_features.shape)

    return train_features, train_labels   

    #values = get_all_dataset_metrics(train_features, train_labels, args.dataset)
    
"""
    # Saving values
    save_path = "local_data/dataset_metrics.csv"
    with open(save_path, 'a', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(values)
"""

'\n    # Saving values\n    save_path = "local_data/dataset_metrics.csv"\n    with open(save_path, \'a\', newline=\'\') as outfile:\n        writer = csv.writer(outfile)\n        writer.writerow(values)\n'

In [31]:
def parse_arguments():
    parser = argparse.ArgumentParser(description="Your script description here")

    # Define your arguments as before
    parser.add_argument('--batch_size', type=int, default=32, help='Batch size for training')
    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate')
    parser.add_argument('--weight_decay', type=float, default=0.0, help='Weight decay')
    parser.add_argument('--early_stopping', type=int, default=10, help='Patience for early stopping')
    parser.add_argument('--dataset', type=str, default='cifar100',
                        choices=['cifar100', 'imagenetr', 'imageneta', 'vtab', 'cars', 'cub',
                                 'omnibenchmark', 'dil_imagenetr', 'cddb', 'limited_domainnet'],
                        help='Dataset to use')
    parser.add_argument('--T', type=int, default=5, help='Number of timesteps')
    parser.add_argument('--backbone', type=str, default='vit_base_patch16_224',
                        choices=['vit_base_patch16_224', 'vit_base_patch16_224_in21k'],
                        help='Backbone architecture')
    parser.add_argument('--finetune_method', type=str, default='none',
                        choices=['none', 'adapter', 'ssf', 'vpt'],
                        help='Finetuning method')
    parser.add_argument('--finetune_epochs', type=int, default=20, help='Number of finetuning epochs')
    parser.add_argument('--k', type=int, default=5, help='Number of nearest neighbors')
    parser.add_argument('--device', type=str, default='cuda' if 'cuda' in sys.modules else 'cpu',
                        help='Device to use (cuda or cpu)')
    parser.add_argument('--seed', type=int, default=42, help='Random seed')
    parser.add_argument('--data_root', type=str, default='./local_data', help='Root directory for datasets')
    parser.add_argument('--moe_max_experts', type=int, default=4, help='Maximum number of experts for MoE')
    parser.add_argument('--reduce_dataset', type=float, default=1.0, help='Fraction of dataset to use')
    parser.add_argument('--gmms', type=int, default=8, help='Number of GMM components')
    parser.add_argument('--use_multivariate', action='store_true', help='Use multivariate Gaussian')
    parser.add_argument('--selection_method', type=str, default='random',
                        choices=['random', 'around', 'eucld_dist', 'inv_eucld_dist', 'kl_div',
                                 'inv_kl_div', 'ws_div', 'inv_ws_div'],
                        help='Selection method')
    parser.add_argument('--kd', action='store_true', help='Use knowledge distillation')
    parser.add_argument('--kd_alpha', type=float, default=0.5, help='Alpha for knowledge distillation loss')
    parser.add_argument('--log_gpustat', action='store_true', help='Log GPU statistics')
    parser.add_argument('--sweep_logging', action='store_true', help='Enable Weights & Biases sweep logging')
    parser.add_argument('--exit_after_T', action='store_true', help='Exit after T timesteps')
    parser.add_argument('--selection_criterion', type=int, default=0, choices=[0, 1, 2],
                        help='Selection criterion')
    parser.add_argument('--tau', type=float, default=0.1, help='Temperature parameter')
    parser.add_argument('--exit_after_acc', type=float, default=0.0, help='Exit after reaching this accuracy')
    parser.add_argument('--trash_var', type=float, default=0.0, help='Trash variable (for testing)')
    parser.add_argument('--use_adamw', action='store_true', help='Use AdamW optimizer')
    parser.add_argument('--use_cosine_annealing', action='store_true', help='Use cosine annealing scheduler')
    parser.add_argument('--aug_resize_crop_min', type=float, default=0.8, help='Min scale for random resize crop')
    parser.add_argument('--aug_resize_crop_max', type=float, default=1.0, help='Max scale for random resize crop')
    parser.add_argument('--aug_random_rotation_degree', type=float, default=0.0, help='Degree for random rotation')
    parser.add_argument('--aug_brightness_jitter', type=float, default=0.0, help='Brightness jitter')
    parser.add_argument('--aug_contrast_jitter', type=float, default=0.0, help='Contrast jitter')
    parser.add_argument('--aug_saturation_jitter', type=float, default=0.0, help='Saturation jitter')
    parser.add_argument('--aug_hue_jitter', type=float, default=0.0, help='Hue jitter')
    parser.add_argument('--aug_normalize', action='store_true', help='Normalize input images')
    parser.add_argument('--wandb_project', type=str, default='your_project_name', help='WandB project name')
    parser.add_argument('--wandb_entity', type=str, default='your_entity_name', help='WandB entity name')

    if '__file__' in globals():  # Check if running as a script
        args = parser.parse_args()
    else:  # Running in a Jupyter Notebook
        args = parser.parse_args(args=[]) # Pass an empty list to avoid errors
        # You can set default values here or handle arguments differently in the notebook

    return args

In [32]:
DATASET = "vtab"
CUDA_VISIBLE_DEVICES = "4"

os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES
args = parse_arguments()
args.dataset = DATASET
args = update_args(args)
set_seed(args.seed)


features, labels = main(args)

CIL


Iterate over train dataset: 1796it [00:00, 1657419.14it/s]
Iterate over test dataset: 8619it [00:00, 1777146.11it/s]




KeyboardInterrupt: 

In [None]:
test = calculate_selective_inter_class_similarity(features, labels, target_classes=[i for i in range(10)], similarity_metric='cosine')
print("Selective Inter-Class Similarity:", test)

({0: 0.404879, 1: 0.71943855, 2: 0.44615406, 3: 0.8199341, 4: 0.33074144, 5: 0.78092754, 6: 0.7489024, 7: 0.60773534, 8: 0.527426, 9: 0.26716688, 10: 0.23423919, 11: 0.46803683, 12: 0.46809265, 13: 0.5118063, 14: 0.5899071, 15: 0.4957721, 16: 0.88559484, 17: 0.5422535, 18: 0.90276074, 19: 0.3853124, 20: 0.45831147, 21: 0.41843164, 22: 0.4866485, 23: 0.55178463, 24: 0.7465788, 25: 0.5648913, 26: 0.714228, 27: 0.6028725, 28: 0.4497971, 29: 0.556562, 30: 0.5575757, 31: 0.22412403, 32: 0.45714498, 33: 0.6376291, 34: 0.60207206, 35: 0.6695693, 36: 0.793339, 37: 0.5768212, 38: 0.6843294, 39: 0.7417639, 40: 0.3188536, 41: 0.5054952, 42: 0.22406928, 43: 0.8706063, 44: 0.55285555, 45: 0.6206727, 46: 0.80375963, 47: 0.13300587, 48: 0.5665677, 49: 0.42743763}, 0.55309755)
