In [33]:
from pathlib import Path
import numpy as np
import os, shutil
from PIL import Image
import time

from tqdm import tqdm

import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
import torch.optim as optim
import matplotlib.pyplot as plt

from torchvision.models import resnet50, ResNet50_Weights, resnet34, ResNet34_Weights
import warnings

class resnet_feature_extractor(torch.nn.Module):
    def __init__(self, layer2=False, layer3=False, layer4=False):
        super(resnet_feature_extractor, self).__init__()
        self.model = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.model.eval()

        for param in self.model.parameters():
            param.requires_grad = False
            
        def hook(module, input, output):
            self.features.append(output)
            
        # Store user preferences
        self.layer2_enabled = layer2
        self.layer3_enabled = layer3
        self.layer4_enabled = layer4
        
        # Register hooks dynamically
        if self.layer2_enabled:
            self.model.layer2[-1].register_forward_hook(hook)
        if self.layer3_enabled:
            self.model.layer3[-1].register_forward_hook(hook)
        if self.layer4_enabled:
            self.model.layer4[-1].register_forward_hook(hook)

    def forward(self, input):
        self.features = []
        with torch.no_grad():
            _ = self.model(input)

        self.avg = torch.nn.AvgPool2d(3, stride=1)
        fmap_size = self.features[0].shape[-2]
        self.resize = torch.nn.AdaptiveAvgPool2d(fmap_size)
        resized_maps = [self.resize(self.avg(fmap)) for fmap in self.features]
        patch = torch.cat(resized_maps, 1)

        return patch

In [34]:
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

In [35]:
from torch import nn

class FeatCAE(nn.Module):
    """Autoencoder."""

    def __init__(self, in_channels=1000, latent_dim=50, is_bn=True):
        super(FeatCAE, self).__init__()

        layers = []
        layers += [nn.Conv2d(in_channels, (in_channels + 2 * latent_dim) // 2, kernel_size=1, stride=1, padding=0)]
        if is_bn:
            layers += [nn.BatchNorm2d(num_features=(in_channels + 2 * latent_dim) // 2)]
        layers += [nn.ReLU()]
        layers += [nn.Conv2d((in_channels + 2 * latent_dim) // 2, 2 * latent_dim, kernel_size=1, stride=1, padding=0)]
        if is_bn:
            layers += [nn.BatchNorm2d(num_features=2 * latent_dim)]
        layers += [nn.ReLU()]
        layers += [nn.Conv2d(2 * latent_dim, latent_dim, kernel_size=1, stride=1, padding=0)]

        self.encoder = nn.Sequential(*layers)

        # if 1x1 conv to reconstruct the rgb values, we try to learn a linear combination
        # of the features for rgb
        layers = []
        layers += [nn.Conv2d(latent_dim, 2 * latent_dim, kernel_size=1, stride=1, padding=0)]
        if is_bn:
            layers += [nn.BatchNorm2d(num_features=2 * latent_dim)]
        layers += [nn.ReLU()]
        layers += [nn.Conv2d(2 * latent_dim, (in_channels + 2 * latent_dim) // 2, kernel_size=1, stride=1, padding=0)]
        if is_bn:
            layers += [nn.BatchNorm2d(num_features=(in_channels + 2 * latent_dim) // 2)]
        layers += [nn.ReLU()]
        layers += [nn.Conv2d((in_channels + 2 * latent_dim) // 2, in_channels, kernel_size=1, stride=1, padding=0)]
        # layers += [nn.ReLU()]

        self.decoder = nn.Sequential(*layers)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [36]:
def warmup_cuda():
    """
    Realiza un warmup de CUDA para asegurar que la GPU esté lista.
    """
    if torch.cuda.is_available():
        # Asegura que CUDA esté inicializado
        torch.cuda.synchronize()
        # Realiza algunas operaciones dummy para calentar la GPU
        dummy = torch.randn(1000, 1000).cuda()
        for _ in range(1000):
            _ = torch.matmul(dummy, dummy)
        torch.cuda.synchronize()
        # Limpia la memoria
        del dummy
        torch.cuda.empty_cache()

warmup_cuda()

In [41]:
!nvidia-smi

Tue Jan 14 12:23:29 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off | 00000000:01:00.0 Off |                  Off |
|  0%   39C    P8              58W / 480W |   1187MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [42]:
import torch
import torch.nn as nn
import torch.profiler
import time

# MODIFY THE BELOW PARAMS
fe_model = resnet_feature_extractor(layer2=True) # Load the feature extraction model
cae_model = FeatCAE(in_channels=512, latent_dim=100)
torch_state = torch.load('/home/jovyan/work/anomaly_detection_demo/modelsave/model.pth')
cae_model.load_state_dict(torch_state['model_state_dict'])
# Load the pre-trained CAE model

# Move models to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
fe_model.to(device)
cae_model.to(device)

# Dummy input image for testing
dummy_input = torch.randn(1, 3, 512, 512).to(device)  # Replace with actual input dimensions

# Measure GPU usage before starting the process
def log_gpu_usage():
    print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
    print(f"GPU Memory Cached: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

# Start profiler to log inference time
def run_inference_with_profiler(model, input_data, model_name, output_dir):
    # Log GPU usage before inference
    print(f"Running inference on {model_name}...")
    log_gpu_usage()

    # Profile the inference time
    with torch.profiler.profile(
        activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA],
        profile_memory=True,
    ) as prof:
        with torch.profiler.record_function("model_inference"):
            with torch.no_grad():
                output=model(input_data)
                    
            # Log GPU usage after inference
            log_gpu_usage()
        
    # Print the profiling results
    print(prof.key_averages().table(
        sort_by="cuda_time_total" if torch.cuda.is_available() else "cpu_time_total", 
        row_limit=10
    ))

    csv_path = os.path.join(output_dir, f'{model_name}_profiler_results.csv')
    with open(csv_path, 'w') as f:
        # Write CSV header
        f.write("Operation,CPU Time (ms),CUDA Time (ms),Called,CPU Memory (MB) ,CUDA Memory (MB)\n")
        
        # Export detailed results
        for row in prof.key_averages():
            f.write(
                f"{row.key},"
                f"{row.cpu_time_total/1000:.2f},"
                f"{row.cuda_time/1000:.2f},"
                f"{row.count},"
                f"{row.cpu_memory_usage/1024/1024:.2f},"
                f"{row.self_device_memory_usage/1024/1024:.2f}\n"
            )
            
    return output, prof

# Run inference on the FE model
fe_output, fe_profiler = run_inference_with_profiler(fe_model, dummy_input, "FE Model_l2l3", "/home/jovyan/work/anomaly_detection/")

Running inference on FE Model_l2l3...
GPU Memory Allocated: 771.90 MB
GPU Memory Cached: 820.00 MB
GPU Memory Allocated: 787.90 MB
GPU Memory Cached: 870.00 MB
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference        29.60%       1.532ms       100.00%       5.177ms       5.177ms     627.000us        12.10%       5.18

[W114 12:23:41.793038913 kineto_shim.cpp:405] Adding profiling metadata requires using torch.profiler with Kineto support (USE_KINETO=1)


In [43]:
cae_output, cae_profiler = run_inference_with_profiler(cae_model, fe_output, "CAE Model_l2l3", "/home/jovyan/work/anomaly_detection/")

Running inference on CAE Model_l2l3...
GPU Memory Allocated: 239.81 MB
GPU Memory Cached: 870.00 MB
GPU Memory Allocated: 247.81 MB
GPU Memory Cached: 870.00 MB
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 model_inference        48.30%     533.868us       100.00%       1.105ms       1.105ms     451.000us        40.74%       1.107ms

[W114 12:23:44.925109263 kineto_shim.cpp:405] Adding profiling metadata requires using torch.profiler with Kineto support (USE_KINETO=1)
