In [1]:
import argparse
import random
import torch
from torch import nn
import numpy as np
from tqdm import tqdm
import functools
import os
import cProfile
import copy
import time
import sys
import json
import subprocess
from torch import nn
from sklearn.model_selection import train_test_split
import timm
import pandas
import seaborn
import matplotlib.pyplot
import numpy as np
from tqdm import tqdm
from collections import Counter
import math
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
import csv
import argparse
import sys
import ast




import wandb
from sklearn.metrics.pairwise import cosine_similarity

from torch.utils.data import DataLoader

from src.backbone import get_backbone
from src.modules import CosineLinear
from src.moe_seed import MoE_SEED
from src.data import (
    CILDataManager,
    DILDataManager,
    get_dataset,
    DATASET_MAP,
    make_test_transform_from_args,
    make_train_transform_from_args,
    update_transforms,
)
from src.logging import Logger, WandbLogger, ConsoleLogger, TQDMLogger
from torch.utils.data import Subset

from src.support_functions import check_gpu_memory, shrink_dataset, display_profile, log_gpustat, optimize_args



# Important! Only one dataset at a time

In [2]:
DATASET = "vtab"
CUDA_VISIBLE_DEVICES = "4"

## Get dataset or class features

In [28]:

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def setup_logger(args):
    Logger.instance().add_backend(ConsoleLogger())
    if args.wandb_project is not None:
        Logger.instance().add_backend(
            WandbLogger(args.wandb_project, args.wandb_entity, args)
        )


def update_args(args):
    assert args.k >= 1 and args.k <= 12
    args.intralayers = [f"blocks.{11 - i}" for i in range(args.k)]

    args.aug_normalize = bool(args.aug_normalize)

    args.target_size = 224
    
    dataset_T_map = {
        "dil_imagenetr": {"T": 15, "moe_max_experts": 2},
        "limited_domainnet": {"T": 6, "moe_max_experts": 3},
        "vtab": {"T": 5, "moe_max_experts": 3},
        "cddb": {"T": 5, "moe_max_experts": 3},
    }

    if args.dataset in dataset_T_map.keys():
        args.T = dataset_T_map[args.dataset]["T"]
        #args.moe_max_experts = dataset_T_map[args.dataset]["moe_max_experts"] #immer 5!
        print(f"Dataset {args.dataset} has T={args.T} and moe_max_experts={args.moe_max_experts}")

    return args



def calculate_similarity(feature1, feature2, metric='cosine'):
    """
    Calculates the similarity between two feature vectors.

    Args:
        feature1 (np.ndarray): First feature vector.
        feature2 (np.ndarray): Second feature vector.
        metric (str): The similarity metric to use ('cosine' or 'euclidean').

    Returns:
        float: The similarity score.
    """
    if metric == 'cosine':
        return cosine_similarity(feature1.reshape(1, -1), feature2.reshape(1, -1))[0][0]
    elif metric == 'euclidean':
        return -np.linalg.norm(feature1 - feature2) # Negative for consistency (higher value = more similar)
    else:
        raise ValueError(f"Unsupported similarity metric: {metric}")

def calculate_intra_class_similarity(features, labels, similarity_metric='cosine'):
    """
    Calculates the average intra-class similarity for a dataset.

    Args:
        features (np.ndarray): Array of feature vectors.
        labels (np.ndarray): Array of corresponding labels.
        similarity_metric (str): The similarity metric to use ('cosine' or 'euclidean').

    Returns:
        tuple: (dict, float) - A dictionary of per-class intra-class similarities
               and the overall average intra-class similarity.
    """
    intra_class_similarities = {}
    unique_labels = np.unique(labels)
    bar = tqdm(enumerate(unique_labels), desc="Calculating Intra-Class Similarity", total=len(unique_labels))
    for i, label in bar:
        class_features = features[labels == label]
        similarities = []
        for i in range(len(class_features)):
            for j in range(i + 1, len(class_features)):
                similarity = calculate_similarity(class_features[i], class_features[j], similarity_metric)
                similarities.append(similarity)
        if similarities:
            intra_class_similarities[label] = np.mean(similarities)
        else:
            intra_class_similarities[label] = 0.0

    overall_intra_class_similarity = np.mean(list(intra_class_similarities.values())) if intra_class_similarities else 0.0
    return intra_class_similarities, overall_intra_class_similarity

def calculate_inter_class_similarity(features, labels, similarity_metric='cosine'):
    """
    Calculates the average inter-class similarity for a dataset.

    Args:
        features (np.ndarray): Array of feature vectors.
        labels (np.ndarray): Array of corresponding labels.
        similarity_metric (str): The similarity metric to use ('cosine' or 'euclidean').

    Returns:
        float: The overall average inter-class similarity.
    """
    inter_class_similarities = []
    unique_labels = np.unique(labels)
    bar = tqdm(enumerate(unique_labels), desc="Calculating Inter-Class Similarity", total=len(unique_labels))
    # Iterate through all pairs of classes
    for i, _ in bar:
        for j in range(i + 1, len(unique_labels)):
            label1 = unique_labels[i]
            label2 = unique_labels[j]
            features_class1 = features[labels == label1]
            features_class2 = features[labels == label2]
            for feat1 in features_class1:
                for feat2 in features_class2:
                    similarity = calculate_similarity(feat1, feat2, similarity_metric)
                    inter_class_similarities.append(similarity)

    overall_inter_class_similarity = np.mean(inter_class_similarities) if inter_class_similarities else 0.0
    return overall_inter_class_similarity

def calculate_inter_class_similarity_vectorized(features, labels, similarity_metric='cosine'):
    """
    Calculates the average inter-class similarity for a dataset using vectorization.
    """
    inter_class_similarities = []
    unique_labels = np.unique(labels)
    n_classes = len(unique_labels)
    bar = tqdm(range(n_classes), desc="Calculating Inter-Class Similarity", total=n_classes)

    for i in bar:
        for j in range(i + 1, n_classes):
            label1 = unique_labels[i]
            label2 = unique_labels[j]
            features_class1 = features[labels == label1]
            features_class2 = features[labels == label2]

            if similarity_metric == 'cosine':
                similarity_matrix = cosine_similarity(features_class1, features_class2)
                inter_class_similarities.extend(similarity_matrix.flatten())
            elif similarity_metric == 'euclidean':
                # Calculate pairwise Euclidean distances and negate for consistency
                distances = np.linalg.norm(features_class1[:, np.newaxis, :] - features_class2[np.newaxis, :, :], axis=2)
                inter_class_similarities.extend((-distances).flatten())
            else:
                raise ValueError(f"Unsupported similarity metric: {similarity_metric}")

    overall_inter_class_similarity = np.mean(inter_class_similarities) if inter_class_similarities else 0.0
    return overall_inter_class_similarity

# Expert learned a set of classes and we want to calculate the similarity to all other classes
def calculate_selective_inter_class_similarity(features, labels, target_classes, similarity_metric='cosine'):
    """
    Calculates the average inter-class similarity between a target list of classes
    and all other classes not in the target list, using vectorization.

    Args:
        features (np.ndarray): Array of feature vectors.
        labels (np.ndarray): Array of corresponding labels.
        target_classes (list): List of class labels for which to compute similarity
                               to all other classes (excluding those in this list).
        similarity_metric (str, optional): Metric to use for similarity calculation.
                                           'cosine' or 'euclidean' are supported.
                                           Defaults to 'cosine'.

    Returns:
        float: The average inter-class similarity between the target classes
               and the other classes. Returns 0.0 if no such pairs exist.
    """
    inter_class_similarities = []
    unique_labels = np.unique(labels)
    target_classes = set(target_classes)  # Convert to set for faster lookups
    other_classes = [label for label in unique_labels if label not in target_classes]

    bar = tqdm(target_classes, desc="Calculating Selective Inter-Class Similarity", total=len(target_classes))

    for label1 in bar:
        features_class1 = features[labels == label1]
        for label2 in other_classes:
            features_class2 = features[labels == label2]

            if similarity_metric == 'cosine':
                similarity_matrix = cosine_similarity(features_class1, features_class2)
                inter_class_similarities.extend(similarity_matrix.flatten())
            elif similarity_metric == 'euclidean':
                # Calculate pairwise Euclidean distances and negate for consistency
                distances = np.linalg.norm(features_class1[:, np.newaxis, :] - features_class2[np.newaxis, :, :], axis=2)
                inter_class_similarities.extend((-distances).flatten())
            else:
                raise ValueError(f"Unsupported similarity metric: {similarity_metric}")

    overall_inter_class_similarity = np.mean(inter_class_similarities) if inter_class_similarities else 0.0
    return overall_inter_class_similarity


def calculate_entropy(labels):
    """
    Calculates the entropy of a list or NumPy array of labels.

    Args:
        labels (list or np.ndarray): A list or array of labels.

    Returns:
        float: The entropy of the labels.
    """

    label_counts = Counter(labels)
    total_samples = len(labels)
    entropy = 0.0

    for count in label_counts.values():
        probability = count / total_samples
        entropy -= probability * math.log2(probability)

    return entropy

def calculate_feature_variance(features):
    """
    Calculates the variance of the features.

    Args:
        features (np.ndarray): Array of feature vectors.

    Returns:
        float: The variance of the features.
    """
    return np.var(features, axis=0).mean()  # Mean variance across all features

def visualize_csv_with_adjusted_size(csv_filepath, output_filepath="heatmap_adjusted.png"):
    try:
        df = pd.read_csv(csv_filepath, index_col=0)
    except FileNotFoundError:
        print(f"Fehler: Datei '{csv_filepath}' nicht gefunden.")
        return

    numeric_cols = df.select_dtypes(include=np.number).columns

    if not numeric_cols.empty:
        df_normalized = df[numeric_cols].apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=0)
        cmap = LinearSegmentedColormap.from_list("mycmap", ["white", "lightblue", "darkblue"])

        # Erhöhe die Figurengröße, um die Kästchen größer zu machen
        plt.figure(figsize=(len(numeric_cols) * 2, len(df) * 1))

        sns.heatmap(df_normalized, annot=False, cmap=cmap, cbar=True, yticklabels=True)
        plt.title("Farbliche Visualisierung der Datenspalten", fontsize=12) # Kleinere Schriftgröße für den Titel
        plt.xlabel("Numerische Spalten", fontsize=10) # Kleinere Schriftgröße für die X-Achse
        plt.ylabel("Datensätze", fontsize=10) # Kleinere Schriftgröße für die Y-Achse
        plt.xticks(rotation=45, ha="right", fontsize=8) # Kleinere Schriftgröße für die X-Achsenbeschriftungen
        plt.yticks(fontsize=8) # Kleinere Schriftgröße für die Y-Achsenbeschriftungen
        plt.tight_layout()
        plt.savefig(output_filepath)
        print(f"Heatmap mit angepasster Größe und Schrift gespeichert als '{output_filepath}'.")
        plt.close()
    else:
        print("Keine numerischen Spalten zum Visualisieren gefunden.")


def get_all_dataset_metrics(train_features, train_labels, dataset_name):
    values = [None] * 6
    values[0] = dataset_name
    
    label_entropy = calculate_entropy(train_labels)
    print(f"Label Entropy: {label_entropy:.4f}")
    values[1] = label_entropy
    feature_entropy = calculate_entropy(train_features.flatten())
    print(f"Feature Entropy: {feature_entropy:.4f}")
    values[2] = feature_entropy

    # 4. Calculate intra-class similarity on the training set
    intra_class_similarities, overall_intra_similarity = calculate_intra_class_similarity(train_features, train_labels) # Use train_labels here
    print(f"Intra-Class Similarities per class: {intra_class_similarities}")
    print(f"Overall Intra-Class Similarity: {overall_intra_similarity:.4f}")
    values[5] = overall_intra_similarity
    
    # 5. Calculate inter-class similarity on the training set
    overall_inter_similarity = calculate_inter_class_similarity_vectorized(train_features, train_labels) # Use train_labels here
    print(f"Overall Inter-Class Similarity: {overall_inter_similarity:.4f}")
    values[4] = overall_inter_similarity

    # 6. Calculate feature variance
    feature_variance = calculate_feature_variance(train_features)
    print(f"Feature Variance: {feature_variance:.4f}")
    values[3] = feature_variance

    return values


def main(args):
    # get dataset and augmentations
    train_transform = make_train_transform_from_args(args)
    test_transform = make_test_transform_from_args(args)
    train_base_dataset, test_base_dataset = get_dataset(
        args.dataset, path=args.data_root
    )
    update_transforms(test_base_dataset, transform=test_transform)


    # get datamanager based on ds
    data_manager = None
    if DILDataManager.is_dil(str(train_base_dataset)):
        print("DIL")
        data_manager = DILDataManager(
            train_base_dataset,
            test_base_dataset,
        )
    else:
        print("CIL")
        data_manager = CILDataManager(
            train_base_dataset,
            test_base_dataset,
            T=args.T,
            num_first_task=None if args.dataset != "cars" else 16,
            shuffle=True,
            seed=args.seed,
        )


    feature_extractor = timm.create_model(args.backbone, pretrained=True).to(args.device)
    feature_extractor.head = nn.Identity()
    feature_extractor.eval()
    train_features = []
    train_labels = []
    bar = tqdm(enumerate(data_manager), desc="Extracting Features", total=len(data_manager))
    for i, (train_dataset, _) in bar:
        train_dataset.transform = train_transform
        train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=True)
        for images, labels in train_loader:  # Iterate through all batches in the loader
            images = images.to(args.device)
            features = feature_extractor(images)
            train_features.append(features.cpu().detach().numpy())
            train_labels.append(labels.cpu().detach().numpy())

    del feature_extractor

    train_features = np.concatenate(train_features, axis=0)
    train_labels = np.concatenate(train_labels, axis=0)
    print("Features shape:", train_features.shape)

    return train_features, train_labels   

    #values = get_all_dataset_metrics(train_features, train_labels, args.dataset)
    
"""
    # Saving values
    save_path = "local_data/dataset_metrics.csv"
    with open(save_path, 'a', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(values)
"""


def main2_classes_per_task_and_seed(args):
    # get dataset and augmentations
    train_transform = make_train_transform_from_args(args)
    test_transform = make_test_transform_from_args(args)
    train_base_dataset, test_base_dataset = get_dataset(
        args.dataset, path=args.data_root
    )
    update_transforms(test_base_dataset, transform=test_transform)


    # get datamanager based on ds
    data_manager = None
    if DILDataManager.is_dil(str(train_base_dataset)):
        print("DIL")
        data_manager = DILDataManager(
            train_base_dataset,
            test_base_dataset,
        )
    else:
        print("CIL")
        data_manager = CILDataManager(
            train_base_dataset,
            test_base_dataset,
            T=args.T,
            num_first_task=None if args.dataset != "cars" else 16,
            shuffle=True,
            seed=args.seed,
        )


    csv_rows = []
    bar = tqdm(enumerate(data_manager), desc="Iterate over tasks", total=len(data_manager))
    for t, (train_dataset, _) in bar:
        task_labels_set = set()

        train_dataset.transform = train_transform
        train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=True)
        for _, labels in train_loader:  # Iterate through all batches in the loader
            task_labels = labels.cpu().detach().numpy()
            for batch_label in task_labels:
                task_labels_set.add(batch_label.item())
        for i in list(task_labels_set):
            row = {
                "dataset": args.dataset,
                "task": t,
                "class": i,
                "seed": args.seed
            }
            csv_rows.append(row)


    # Save the results to a CSV file
    save_path = f"local_data/classes_per_task/{args.dataset}_{args.seed}.csv"

    # Erstelle den Ordner, falls er nicht existiert
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    with open(save_path, 'w', newline='') as outfile:
        writer = csv.DictWriter(outfile, fieldnames=["dataset", "task", "class", "seed"])
        writer.writeheader()  # Füge eine Kopfzeile zur CSV-Datei hinzu
        writer.writerows(csv_rows)            

In [29]:
def parse_arguments():
    parser = argparse.ArgumentParser(description="Your script description here")

    # Define your arguments as before
    parser.add_argument('--batch_size', type=int, default=32, help='Batch size for training')
    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate')
    parser.add_argument('--weight_decay', type=float, default=0.0, help='Weight decay')
    parser.add_argument('--early_stopping', type=int, default=10, help='Patience for early stopping')
    parser.add_argument('--dataset', type=str, default='cifar100',
                        choices=['cifar100', 'imagenetr', 'imageneta', 'vtab', 'cars', 'cub',
                                 'omnibenchmark', 'dil_imagenetr', 'cddb', 'limited_domainnet'],
                        help='Dataset to use')
    parser.add_argument('--T', type=int, default=10, help='Number of timesteps')
    parser.add_argument('--backbone', type=str, default='vit_base_patch16_224',
                        choices=['vit_base_patch16_224', 'vit_base_patch16_224_in21k'],
                        help='Backbone architecture')
    parser.add_argument('--finetune_method', type=str, default='none',
                        choices=['none', 'adapter', 'ssf', 'vpt'],
                        help='Finetuning method')
    parser.add_argument('--finetune_epochs', type=int, default=20, help='Number of finetuning epochs')
    parser.add_argument('--k', type=int, default=5, help='Number of nearest neighbors')
    parser.add_argument('--device', type=str, default='cuda' if 'cuda' in sys.modules else 'cpu',
                        help='Device to use (cuda or cpu)')
    parser.add_argument('--seed', type=int, default=2001, help='Random seed')
    parser.add_argument('--data_root', type=str, default='./local_data', help='Root directory for datasets')
    parser.add_argument('--moe_max_experts', type=int, default=4, help='Maximum number of experts for MoE')
    parser.add_argument('--reduce_dataset', type=float, default=1.0, help='Fraction of dataset to use')
    parser.add_argument('--gmms', type=int, default=8, help='Number of GMM components')
    parser.add_argument('--use_multivariate', action='store_true', help='Use multivariate Gaussian')
    parser.add_argument('--selection_method', type=str, default='random',
                        choices=['random', 'around', 'eucld_dist', 'inv_eucld_dist', 'kl_div',
                                 'inv_kl_div', 'ws_div', 'inv_ws_div'],
                        help='Selection method')
    parser.add_argument('--kd', action='store_true', help='Use knowledge distillation')
    parser.add_argument('--kd_alpha', type=float, default=0.5, help='Alpha for knowledge distillation loss')
    parser.add_argument('--log_gpustat', action='store_true', help='Log GPU statistics')
    parser.add_argument('--sweep_logging', action='store_true', help='Enable Weights & Biases sweep logging')
    parser.add_argument('--exit_after_T', action='store_true', help='Exit after T timesteps')
    parser.add_argument('--selection_criterion', type=int, default=0, choices=[0, 1, 2],
                        help='Selection criterion')
    parser.add_argument('--tau', type=float, default=0.1, help='Temperature parameter')
    parser.add_argument('--exit_after_acc', type=float, default=0.0, help='Exit after reaching this accuracy')
    parser.add_argument('--trash_var', type=float, default=0.0, help='Trash variable (for testing)')
    parser.add_argument('--use_adamw', action='store_true', help='Use AdamW optimizer')
    parser.add_argument('--use_cosine_annealing', action='store_true', help='Use cosine annealing scheduler')
    parser.add_argument('--aug_resize_crop_min', type=float, default=0.8, help='Min scale for random resize crop')
    parser.add_argument('--aug_resize_crop_max', type=float, default=1.0, help='Max scale for random resize crop')
    parser.add_argument('--aug_random_rotation_degree', type=float, default=0.0, help='Degree for random rotation')
    parser.add_argument('--aug_brightness_jitter', type=float, default=0.0, help='Brightness jitter')
    parser.add_argument('--aug_contrast_jitter', type=float, default=0.0, help='Contrast jitter')
    parser.add_argument('--aug_saturation_jitter', type=float, default=0.0, help='Saturation jitter')
    parser.add_argument('--aug_hue_jitter', type=float, default=0.0, help='Hue jitter')
    parser.add_argument('--aug_normalize', action='store_true', help='Normalize input images')
    parser.add_argument('--wandb_project', type=str, default='your_project_name', help='WandB project name')
    parser.add_argument('--wandb_entity', type=str, default='your_entity_name', help='WandB entity name')

    if '__file__' in globals():  # Check if running as a script
        args = parser.parse_args()
    else:  # Running in a Jupyter Notebook
        args = parser.parse_args(args=[]) # Pass an empty list to avoid errors
        # You can set default values here or handle arguments differently in the notebook

    return args

In [8]:
os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES
args = parse_arguments()
args.dataset = DATASET
args = update_args(args)
set_seed(args.seed)


features, labels = main(args)

CIL


Iterate over train dataset: 1796it [00:00, 1226069.33it/s]
Iterate over test dataset: 8619it [00:00, 1487438.54it/s]
Extracting Features: 100%|██████████| 5/5 [30:13<00:00, 362.69s/it]

Features shape: (1796, 768)





## Saving all features into file 

In [None]:
dataset_name = DATASET
labels_list = labels
features_list = features

# Stelle sicher, dass features_list eine Python-Liste ist
if not isinstance(features_list, list):
    features_list = features_list.tolist()  # Konvertiere zu Liste, falls es ein NumPy Array ist

# Annahme: Jede innere Liste hat die gleiche Länge und entspricht einer Zeile
# und die Elemente der inneren Liste sollen separate Spalten werden.

# Erstelle Spaltennamen für die Features
num_features = len(features_list[0]) if features_list else 0
feature_columns = [f'feature_{i}' for i in range(num_features)]

# Erstelle ein Dictionary für den DataFrame
data = {'dataset': [dataset_name] * len(labels_list),
        'label': [item[0] if isinstance(item, list) else item for item in labels_list]} # Annahme: Label ist das erste Element der inneren Liste
for i, col in enumerate(feature_columns):
    data[col] = [item[i] if isinstance(item, list) and len(item) > i else None for item in features_list]

df = pd.DataFrame(data)

# Speichern als CSV-Datei
csv_filename = f"./local_data/{dataset_name}_class_features.csv"
df.to_csv(csv_filename, index=False)
print(f"Daten erfolgreich als '{csv_filename}' gespeichert.")


Daten erfolgreich als './local_data/vtab_class_features.csv' gespeichert (korrigiert).


## Load data from file

In [7]:
csv_filename = f"./local_data/{DATASET}_class_features.csv"

# Lese die CSV-Datei ein
df_loaded = pd.read_csv(csv_filename)

# Extrahiere die Labels und Features
loaded_labels = df_loaded['label'].tolist()

# Wenn die Features als separate Spalten gespeichert wurden (Szenario 1):
if 'feature_0' in df_loaded.columns:
    loaded_features = []
    i = 0
    while f'feature_{i}' in df_loaded.columns:
        loaded_features.append(df_loaded[f'feature_{i}'].tolist())
        i += 1
    # Transponiere die Liste von Listen, um die ursprüngliche Struktur wiederherzustellen
    loaded_features = list(zip(*loaded_features))
    loaded_features = [list(item) for item in loaded_features]

# Wenn die Features als Listen in einer Zelle gespeichert wurden (Szenario 2):
elif 'feature' in df_loaded.columns:
    loaded_features = [ast.literal_eval(item) for item in df_loaded['feature'].tolist()]

print("Daten erfolgreich aus CSV eingelesen:")
print("Geladene Labels (erste 5):", loaded_labels[:5])
print("Geladene Features (erste 1):", loaded_features[:1])


Daten erfolgreich aus CSV eingelesen:
Geladene Labels (erste 5): [0, 0, 0, 0, 0]
Geladene Features (erste 1): [[-1.751719355583191, 0.8563099503517151, -0.8320057988166809, 0.5462210774421692, 1.6325730085372925, 0.1137448772788047, -0.5725335478782654, -1.0483394861221311, -0.0714033916592598, -1.7993239164352417, 1.2114547491073608, 0.7065345644950867, -0.8003975749015808, -2.7898194789886475, -0.0857531875371933, -3.801489353179932, 0.8702642321586609, 0.0858083814382553, 1.3374313116073608, 0.2062888741493225, 0.2373815923929214, 0.6349830031394958, 2.385110855102539, -0.3392757475376129, -1.8584578037261963, -0.5441467761993408, 0.0477000139653682, -1.7922112941741943, -3.2841243743896484, -0.3403828740119934, 0.6959256529808044, -0.9947118759155272, 1.0811444520950315, -0.7592121958732605, -0.0447749905288219, 0.4561104774475097, -0.6492897868156433, 0.1021648347377777, 1.7643967866897583, -0.9342617988586426, -0.0910230129957199, -0.755124568939209, -0.4380847513675689, -0.75045

## What are the classes per task?
Ich brauche eine übersetzung.  
Zusätzlich: Was ist mit den Seeds? Classenreihenfolge ist seed abhängig! Seed der runs berücksichtigen.

In [30]:
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
args = parse_arguments()


seeds = [2000, 2001, 2002, 2003, 2004]
datasets = ["imagenetr", "cifar100", "cub", "dil_imagenetr", "imageneta", "cars", "omnibenchmark", "limited_domainnet"]

#set_seed(args.seed)


#main2_classes_per_task_and_seed(args)


for dataset in datasets:
    DATASET = dataset
    for seed in seeds:
        print(f"Dataset: {dataset}, seed: {seed}")
        args.seed = seed
        args.dataset = dataset
        args.device = "cpu"
        args = update_args(args)

        set_seed(args.seed)

        main2_classes_per_task_and_seed(args)

Dataset: imagenetr, seed: 2000
CIL


Iterate over train dataset: 24000it [00:00, 1155455.65it/s]
Iterate over test dataset: 6000it [00:00, 1620987.05it/s]
Iterate over tasks: 100%|██████████| 10/10 [02:10<00:00, 13.07s/it]


Dataset: imagenetr, seed: 2001
CIL


Iterate over train dataset: 24000it [00:00, 1149964.54it/s]
Iterate over test dataset: 6000it [00:00, 1147866.45it/s]
Iterate over tasks: 100%|██████████| 10/10 [02:09<00:00, 12.94s/it]


Dataset: imagenetr, seed: 2002
CIL


Iterate over train dataset: 24000it [00:00, 1155177.20it/s]
Iterate over test dataset: 6000it [00:00, 1594691.34it/s]
Iterate over tasks:  30%|███       | 3/10 [00:36<01:25, 12.25s/it]


KeyboardInterrupt: 

## Which expert learned which class

In [None]:
wandb.login(key="8a88a8c49d1c2d31b8677fe0b8eb7d3e3a031f83")
api = wandb.Api()


def get_expert_distribution(run):
    if run.state != "finished":
        return None

    history = run.history()
    
    expert_distributions = dict()
    ft_tasks = [None] * 1000
    ft_buffer = run.config.get("moe_max_experts")
    for line in run.history().columns:
        if line.startswith("Expert") and line.endswith("learned task"):
            
            line_splited = line.split(" ")
            expert = int(line_splited[1])
            tasks = history[line].dropna().tolist()
            tasks = [int(task) for task in tasks]
            
            if expert not in expert_distributions:
                expert_distributions[expert] = list()
            expert_distributions[expert].extend(tasks) 

            
            for i in tasks:
                if i >= ft_buffer:
                    ft_tasks[i - ft_buffer] = expert    

    # cleaning ft_tasks
    ft_tasks = [i for i in ft_tasks if i is not None]

    return ft_tasks, expert_distributions

def get_sweep_data(runs, attributes_config=[], attributes_summary=[], include_run_id=True, class_similarity=False):
    sweep_data = []
    for run in runs:
        config = run.config
        summary = run.summary

        if summary.get("task_mean/acc") is not None and run.state == "finished":
            run_data = dict()
            if include_run_id:
                run_data["run_id"] = run.id
            # Add the config attributes to the run_data dictionary
            for attr in attributes_config:
                run_data[attr] = config.get(attr)

            # Add the summary attributes to the run_data dictionary
            for attr in attributes_summary:
                run_data[attr] = summary.get(attr)
            
            # average class similarity of learned classes per expert
            if class_similarity and run_data["dataset"] == DATASET:
                _, task_distribution = get_expert_distribution(run)

                # Calculate inter-class similarity for each expert
                expert_similaritys = list()

                # Tasks are not classes!!

                for expert_id, learned_classes in task_distribution.items():
                    if len(learned_classes) > 1:
                        label_mask = np.isin(loaded_labels, learned_classes)

                        # Verwende die Maske, um die entsprechenden Features und Labels auszuwählen
                        expert_features = loaded_features[label_mask]
                        expert_labels = loaded_labels[label_mask]
                        print(learned_classes)
                        print(expert_features.shape)
                        similarity = calculate_inter_class_similarity_vectorized(expert_features, expert_labels)
                        print(f"Expert {expert_id} - Inter-Class Similarity: {similarity:.4f}")
                        expert_similaritys.append(similarity)

                average_similarity = np.mean(expert_similaritys) if expert_similaritys else 0.0       
                run_data["average_expert_similarity"] = average_similarity
                print(f"Average Expert Similarity: {average_similarity:.4f}")
                print(task_distribution)


                # inter-class similarity to all classes except the one where their expert only learned this one class
                lonly_learned_classes = []
                for expert_id, learned_classes in task_distribution.items():
                    if len(learned_classes) == 1:
                        lonly_learned_classes.append(learned_classes[0])
                print(f"Classes where expert only learned this class: {lonly_learned_classes}")

                # Calculate inter-class similarity to all other classes
                other_classes = [label for label in loaded_labels if label != lonly_learned_classes]
                label_mask = np.isin(loaded_labels, other_classes)

                expert_features = loaded_features[label_mask]
                expert_labels = loaded_labels[label_mask]
                similarity = calculate_inter_class_similarity_vectorized(expert_features, expert_labels)
                run_data["filtered_dataset_similarity"] = similarity
                print(f"Dataset similarity: {similarity:.4f}")




            sweep_data.append(run_data)

    return sweep_data


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /export/home/0schindl/.netrc


## Average similarity per expert

In [49]:
_42_adapter_performance = [
    "belaschindler-university-hamburg/0schindl-LayUp_sweeps_question1_results/6kim8tiu", # DIL
    "belaschindler-university-hamburg/0schindl-LayUp_sweeps_question1_results/jdpa9z1x", # Cars
    "belaschindler-university-hamburg/0schindl-LayUp_sweeps_question1_results/cjddpel4", # Imagenet-a
    "belaschindler-university-hamburg/0schindl-LayUp_sweeps_question1_results/hxigp6ck", # Imagenet-r
    "belaschindler-university-hamburg/0schindl-LayUp_sweeps_question1_results/p7zmthx9", # CIL
    ]
datsets_in_CIL = ["cifar100", "cub", "vtab", "omnibenchmark"]

loaded_labels = np.array(loaded_labels)
loaded_features = np.array(loaded_features)

table_421 = []
for i, s in enumerate(_42_adapter_performance):
    sweep = api.sweep(s)
    runs = sweep.runs

    attributes_config = ["dataset", "selection_method", "seed"]
    attributes_summary = ["task_mean/acc"]

    data = get_sweep_data(runs, attributes_config, attributes_summary, class_similarity=True)
    for e in data:
        if e["dataset"] == DATASET:
            if i == 4:
                # CIL
                if e["dataset"] in datsets_in_CIL:
                    table_421.append(e)
            else:
                table_421.append(e)
    


df_sweep = pd.DataFrame(table_421)

Average Expert Similarity: 0.0000
{2: [2], 4: [4], 0: [0], 3: [3], 1: [1]}
Average Expert Similarity: 0.0000
{3: [3], 0: [0], 2: [2], 4: [4], 1: [1]}
Average Expert Similarity: 0.0000
{1: [1], 3: [3], 0: [0], 4: [4], 2: [2]}
Average Expert Similarity: 0.0000
{3: [3], 4: [4], 0: [0], 1: [1], 2: [2]}
Average Expert Similarity: 0.0000
{1: [1], 2: [2], 3: [3], 4: [4], 0: [0]}


In [50]:
data = df_sweep
print(data)

     run_id dataset selection_method  seed  task_mean/acc  \
0  u620gh0q    vtab       inv_ws_div  2001       0.783650   
1  roof165o    vtab       inv_ws_div  2004       0.779962   
2  ypg2gd39    vtab       inv_ws_div  2003       0.778159   
3  12gkkkie    vtab       inv_ws_div  2002       0.793482   
4  3dyl6fjx    vtab       inv_ws_div  2000       0.776197   

   average_expert_similarity  
0                        0.0  
1                        0.0  
2                        0.0  
3                        0.0  
4                        0.0  
