In [3]:
pip install avalanche-lib==0.5

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [19]:
from torch.utils.data import Subset
from avalanche.benchmarks.classic.ccifar100 import SplitCIFAR100
import numpy as np

def create_imbalanced_subset(dataset, class_counts):
    """Create an imbalanced subset of the dataset given class counts."""
    indices_per_class = {label: [] for label in range(100)}


    for idx in range(len(dataset)):
        label = dataset.targets[idx] if hasattr(dataset, 'targets') else dataset[idx][1]
        indices_per_class[label].append(idx)


    selected_indices = []
    for class_label, count in class_counts.items():
        available_count = len(indices_per_class[class_label])
        selected_count = min(count, available_count)
        selected_indices.extend(np.random.choice(indices_per_class[class_label], selected_count, replace=False))

    return np.array(selected_indices)


num_phases = 10
initial_count = 500 
decay_factor = 0.7


phase_class_counts = []
for i in range(num_phases):
    count = int(initial_count * (decay_factor ** i)) 
    class_counts = {label: count for label in range(100)}
    phase_class_counts.append(class_counts)


scenario = SplitCIFAR100(n_experiences=num_phases)


imbalanced_scenarios = []
for exp_id in range(num_phases):
    train_dataset = scenario.train_stream[exp_id].dataset
    counts = phase_class_counts[exp_id]


    selected_indices = create_imbalanced_subset(train_dataset, counts)
    imbalanced_train_subset = Subset(train_dataset, selected_indices)


    imbalanced_scenarios.append(imbalanced_train_subset)


    print(f"Phase {exp_id + 1}: Number of samples = {len(imbalanced_train_subset)}")


Files already downloaded and verified
Files already downloaded and verified
Phase 1: Number of samples = 5000
Phase 2: Number of samples = 3500
Phase 3: Number of samples = 2440
Phase 4: Number of samples = 1710
Phase 5: Number of samples = 1200
Phase 6: Number of samples = 840
Phase 7: Number of samples = 580
Phase 8: Number of samples = 410
Phase 9: Number of samples = 280
Phase 10: Number of samples = 200


In [26]:
from torch.utils.data import Subset
from avalanche.benchmarks.classic.ccifar100 import SplitCIFAR100
import numpy as np

def create_imbalanced_subset(dataset, class_counts):
    indices_per_class = {label: [] for label in range(100)}
    
    for idx in range(len(dataset)):
        label = dataset.targets[idx] if hasattr(dataset, 'targets') else dataset[idx][1]
        indices_per_class[label].append(idx)

    selected_indices = []
    for class_label, count in class_counts.items():
        available_count = len(indices_per_class[class_label])
        selected_count = min(count, available_count)
        selected_indices.extend(np.random.choice(indices_per_class[class_label], selected_count, replace=False))

    return np.array(selected_indices)

num_phases = 10
classes_per_phase = 100 // num_phases
initial_count = 500
decay_factor = 0.7

phase_class_counts = []
phase_classes = []

for i in range(num_phases):
    start_class = i * classes_per_phase
    end_class = start_class + classes_per_phase
    current_classes = list(range(start_class, end_class))
    phase_classes.append(current_classes)
    
    count = int(initial_count * (decay_factor ** i))
    class_counts = {label: count for label in current_classes}
    phase_class_counts.append(class_counts)

scenario = SplitCIFAR100(n_experiences=num_phases, fixed_class_order=list(range(100)))

imbalanced_scenarios = []
for exp_id in range(num_phases):
    train_dataset = scenario.train_stream[exp_id].dataset
    counts = phase_class_counts[exp_id]

    selected_indices = create_imbalanced_subset(train_dataset, counts)
    imbalanced_train_subset = Subset(train_dataset, selected_indices)

    imbalanced_scenarios.append(imbalanced_train_subset)

    print(f"Phase {exp_id + 1}: Classes = {phase_classes[exp_id]}, Number of samples = {len(imbalanced_train_subset)}")


Files already downloaded and verified
Files already downloaded and verified
Phase 1: Classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], Number of samples = 5000
Phase 2: Classes = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], Number of samples = 3500
Phase 3: Classes = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], Number of samples = 2440
Phase 4: Classes = [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], Number of samples = 1710
Phase 5: Classes = [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], Number of samples = 1200
Phase 6: Classes = [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], Number of samples = 840
Phase 7: Classes = [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], Number of samples = 580
Phase 8: Classes = [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], Number of samples = 410
Phase 9: Classes = [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], Number of samples = 280
Phase 10: Classes = [90, 91, 92, 93, 94, 95, 96, 97, 98, 99], Number of samples = 200
