In [1]:
from transformers import AutoProcessor, FlavaModel, FlavaImageModel, AutoImageProcessor, FlavaFeatureExtractor, FlavaImageCodebook, FlavaConfig, FlavaForPreTraining
import numpy as np
from torchmultimodal.models.flava.model import flava_model_for_classification

import torchvision
import torch
import torch.nn as nn

#model_ex = torchvision.models.vision_transformer(pretrained=True) #Use this in last case resort

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if torch.cuda.is_available():
    print("CUDA is available on this system.")
    device = torch.device("cuda")
else:
    print("CUDA is not available on this system.")
    device = torch.device("cpu")




CUDA is available on this system.


In [3]:
from tqdm import tqdm
from avalanche.benchmarks.generators import nc_benchmark, ni_benchmark
from avalanche.benchmarks.datasets import MNIST

from torchvision.transforms import Compose, ToTensor, Normalize, RandomCrop
import torchvision.transforms as transforms



resize_transform = transforms.Resize((224, 224))
grayscale_transform = transforms.Grayscale(num_output_channels=3)
train_transform = Compose([
    resize_transform,
    grayscale_transform,
    ToTensor()
    
])

test_transform = Compose([
    resize_transform,
    grayscale_transform,
    ToTensor()

])


mnist_train = MNIST(
    './data/mnist', train=True, download=True , transform=train_transform
)

mnist_test = MNIST(
    './data/mnist', train=False, download=True , transform=test_transform
)


In [4]:
# Get the first image from the dataset
image, label = mnist_train[0]

# Check the size and shape of the image
size = image.size
shape = image.shape

print(f"Image Size: {size}")
print(f"Image Shape: {shape}")

Image Size: <built-in method size of Tensor object at 0x7ffae780d5e0>
Image Shape: torch.Size([3, 224, 224])


In [5]:
# scenario = nc_benchmark(
#     mnist_train, mnist_test, n_experiences=2, shuffle=True, seed=1234,
#     task_labels=False
# )


#Classsic Domain incremental creation from MNIST dataset
scenario_DIL = ni_benchmark(
    mnist_train, mnist_test, n_experiences=3, shuffle=False, seed=42,
    balance_experiences=True 
)

Training on Unfrozen FLAVA model + a simple Classification head with Replay plugin  on SMNIST dataset

In [6]:
class flava_custom_with_sequence_length_unfreeze(torch.nn.Module):
    def __init__(self, num_classes, device='cuda'):
        super().__init__()
        self.image_processor = AutoImageProcessor.from_pretrained("facebook/flava-full")
        self.model_custom = FlavaImageModel.from_pretrained("facebook/flava-full").eval().to("cuda")

        self.mlp = nn.Sequential(
                nn.Linear(768*197, 100),
                nn.ReLU(),
                nn.Linear(100, 50),
                nn.ReLU(),
                nn.Linear(50, 10),
                nn.Sigmoid()
        )
        self.device = device

    def forward(self, x):
        if not isinstance(x, list):
            x = list(x.cuda())
         
        #with torch.no_grad():
        inputs = self.image_processor(images=x, return_tensors="pt").to("cuda")
        outputs = self.model_custom(**inputs)
            
        features = outputs.last_hidden_state.view(outputs.last_hidden_state.size(0), -1)  # Flatten spatial dimensions
        logits = self.mlp(features)    
        
        #print("logits", logits.shape)
        return logits



In [7]:
model_ER = flava_custom_with_sequence_length_unfreeze(10)


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [8]:
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from avalanche.benchmarks.classic import SplitMNIST
from avalanche.evaluation.metrics import forgetting_metrics, accuracy_metrics, \
    loss_metrics, timing_metrics, cpu_usage_metrics, confusion_matrix_metrics, disk_usage_metrics
from avalanche.models import SimpleMLP
from avalanche.logging import InteractiveLogger, TextLogger, TensorboardLogger
from avalanche.training.plugins import EvaluationPlugin
from avalanche.training.supervised import Naive, ER_ACE
from transformers import AutoImageProcessor, ViTModel
from avalanche.models import SlimResNet18
from avalanche.training.plugins import ReplayPlugin



# MODEL CREATION
#model = SimpleMLP(num_classes=scenario_DIL.n_classes)
#model = FlavaImageModel.from_pretrained("facebook/flava-full").eval()
#model = torchvision.models.vit_b_16(pretrained=True)
#model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
#model = SlimResNet18(nclasses=scenario_DIL.n_classes)
#model = flava_custom(num_classes=10, hidden_dim=256)


# DEFINE THE EVALUATION PLUGIN and LOGGERS
# The evaluation plugin manages the metrics computation.
# It takes as argument a list of metrics, collectes their results and returns
# them to the strategy it is attached to.

# log to Tensorboard
tb_logger = TensorboardLogger()

# log to text file
text_logger = TextLogger(open('log.txt', 'a'))

# print to stdout
interactive_logger = InteractiveLogger()

eval_plugin = EvaluationPlugin(
    accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    timing_metrics(epoch=True, epoch_running=True),
    forgetting_metrics(experience=True, stream=True),
    cpu_usage_metrics(experience=True),
    confusion_matrix_metrics(num_classes=scenario_DIL.n_classes, save_image=False,
                             stream=True),
    disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loggers=[interactive_logger, text_logger, tb_logger]
)

replay_plugin = ReplayPlugin(
        mem_size=50)

# CREATE THE STRATEGY INSTANCE (Replay)
cl_strategy = Naive(
    model_ER, SGD(model_ER.parameters(), lr=0.1, momentum=0.9),
    CrossEntropyLoss(), train_mb_size=50, train_epochs=5, eval_mb_size=25,
    evaluator=eval_plugin, plugins=[replay_plugin], device=device)

# TRAINING LOOP
print('Starting experiment...')
results = []
for experience in scenario_DIL.train_stream:
    print("Start of experience: ", experience.current_experience)
    print("Current Classes: ", experience.classes_in_this_experience)

    # train returns a dictionary which contains all the metric values
    res = cl_strategy.train(experience)
    print('Training completed')

    print('Computing accuracy on the whole test set')
    # test also returns a dictionary which contains all the metric values
    results.append(cl_strategy.eval(scenario_DIL.test_stream))

Starting experiment...
Start of experience:  0
Current Classes:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-- >> Start of training phase << --
0it [00:00, ?it/s]

100%|██████████| 401/401 [08:37<00:00,  1.29s/it]
Epoch 0 ended.
	DiskUsage_Epoch/train_phase/train_stream/Task000 = 2734235.0430
	DiskUsage_MB/train_phase/train_stream/Task000 = 2734235.0430
	Loss_Epoch/train_phase/train_stream/Task000 = 2.3021
	Loss_MB/train_phase/train_stream/Task000 = 2.3595
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0055
	Time_Epoch/train_phase/train_stream/Task000 = 517.3662
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.1105
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.0000
100%|██████████| 401/401 [08:34<00:00,  1.28s/it]
Epoch 1 ended.
	DiskUsage_Epoch/train_phase/train_stream/Task000 = 2734366.5586
	DiskUsage_MB/train_phase/train_stream/Task000 = 2734366.5586
	Loss_Epoch/train_phase/train_stream/Task000 = 2.3017
	Loss_MB/train_phase/train_stream/Task000 = 2.2993
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0023
	Time_Epoch/train_phase/train_stream/Task000 = 513.9971
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.1124
	

Training on frozen FLAVA model + a simple Classification head with Replay plugin  on SMNIST dataset

In [6]:
class flava_custom_with_sequence_length_freeze(torch.nn.Module):
    def __init__(self, num_classes, device='cuda'):
        super().__init__()
        self.image_processor = AutoImageProcessor.from_pretrained("facebook/flava-full")
        self.model_custom = FlavaImageModel.from_pretrained("facebook/flava-full").eval().to("cuda")

        self.mlp = nn.Sequential(
                nn.Linear(768*197, 100),
                nn.ReLU(),
                nn.Linear(100, 50),
                nn.ReLU(),
                nn.Linear(50, 10),
                nn.Sigmoid()
        )
        self.device = device

    def forward(self, x):
        if not isinstance(x, list):
            x = list(x.cuda())
         
        with torch.no_grad():
            inputs = self.image_processor(images=x, return_tensors="pt").to("cuda")
            outputs = self.model_custom(**inputs)
            
        features = outputs.last_hidden_state.view(outputs.last_hidden_state.size(0), -1)  # Flatten spatial dimensions
        logits = self.mlp(features)    
        
        #print("logits", logits.shape)
        return logits


In [7]:
model_Replay_frozen = flava_custom_with_sequence_length_freeze(10)

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [8]:
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from avalanche.benchmarks.classic import SplitMNIST
from avalanche.evaluation.metrics import forgetting_metrics, accuracy_metrics, \
    loss_metrics, timing_metrics, cpu_usage_metrics, confusion_matrix_metrics, disk_usage_metrics
from avalanche.models import SimpleMLP
from avalanche.logging import InteractiveLogger, TextLogger, TensorboardLogger
from avalanche.training.plugins import EvaluationPlugin
from avalanche.training.supervised import Naive, ER_ACE
from transformers import AutoImageProcessor, ViTModel
from avalanche.models import SlimResNet18
from avalanche.training.plugins import ReplayPlugin



# MODEL CREATION
#model = FlavaImageModel.from_pretrained("facebook/flava-full").eval()
#model = torchvision.models.vit_b_16(pretrained=True)
#model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
#model = SlimResNet18(nclasses=scenario_DIL.n_classes)



# DEFINE THE EVALUATION PLUGIN and LOGGERS
# The evaluation plugin manages the metrics computation.
# It takes as argument a list of metrics, collectes their results and returns
# them to the strategy it is attached to.

# log to Tensorboard
tb_logger = TensorboardLogger()

# log to text file
text_logger = TextLogger(open('log.txt', 'a'))

# print to stdout
interactive_logger = InteractiveLogger()

eval_plugin = EvaluationPlugin(
    accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    timing_metrics(epoch=True, epoch_running=True),
    forgetting_metrics(experience=True, stream=True),
    cpu_usage_metrics(experience=True),
    confusion_matrix_metrics(num_classes=scenario_DIL.n_classes, save_image=False,
                             stream=True),
    disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loggers=[interactive_logger, text_logger] #tb_logger]
)

replay_plugin = ReplayPlugin(
        mem_size=50)

# CREATE THE STRATEGY INSTANCE (Replay)
cl_strategy = Naive(
    model_Replay_frozen, SGD(model_Replay_frozen.parameters(), lr=0.09, momentum=0.9),
    CrossEntropyLoss(), train_mb_size=100, train_epochs=5, eval_mb_size=25,
    evaluator=eval_plugin, plugins=[replay_plugin], device=device)

# TRAINING LOOP
print('Starting experiment...')
results = []
for experience in scenario_DIL.train_stream:
    print("Start of experience: ", experience.current_experience)
    print("Current Classes: ", experience.classes_in_this_experience)

    # train returns a dictionary which contains all the metric values
    res = cl_strategy.train(experience)
    print('Training completed')

    print('Computing accuracy on the whole test set')
    # test also returns a dictionary which contains all the metric values
    results.append(cl_strategy.eval(scenario_DIL.test_stream))

Starting experiment...
Start of experience:  0
Current Classes:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-- >> Start of training phase << --
0it [00:00, ?it/s]

100%|██████████| 201/201 [05:21<00:00,  1.60s/it]
Epoch 0 ended.
	DiskUsage_Epoch/train_phase/train_stream/Task000 = 2736124.3262
	DiskUsage_MB/train_phase/train_stream/Task000 = 2736124.3262
	Loss_Epoch/train_phase/train_stream/Task000 = 1.7731
	Loss_MB/train_phase/train_stream/Task000 = 1.4756
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0122
	Time_Epoch/train_phase/train_stream/Task000 = 321.0442
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.6950
	Top1_Acc_MB/train_phase/train_stream/Task000 = 1.0000
100%|██████████| 201/201 [05:18<00:00,  1.59s/it]
Epoch 1 ended.
	DiskUsage_Epoch/train_phase/train_stream/Task000 = 2736124.3262
	DiskUsage_MB/train_phase/train_stream/Task000 = 2736124.3262
	Loss_Epoch/train_phase/train_stream/Task000 = 1.5290
	Loss_MB/train_phase/train_stream/Task000 = 1.4619
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0063
	Time_Epoch/train_phase/train_stream/Task000 = 318.5984
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9348
	