In [1]:
import torch
import os

# cuDNN 오류 해결을 위한 환경 설정
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['TORCH_USE_CUDA_DSA'] = '1'

# cuDNN 설정 조정 - cuDNN을 완전히 비활성화
torch.backends.cudnn.enabled = False  # 이것이 핵심!
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.allow_tf32 = False

# 텐서 연속성 보장을 위한 설정
torch.set_float32_matmul_precision('medium')

# GPU 메모리 정리
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.cuda.synchronize()

print("✅ cuDNN 비활성화 및 환경 설정 완료")


✅ cuDNN 비활성화 및 환경 설정 완료


In [2]:
from anomalib.models.video import AiVad
from anomalib.data import Avenue
from anomalib.data.datasets.base.video import VideoTargetFrame
from anomalib.engine import Engine



In [3]:

# Initialize model and datamodule
datamodule = Avenue(
    clip_length_in_frames=2,
    frames_between_clips=1,
    target_frame=VideoTargetFrame.LAST
)
model = AiVad()

In [4]:
from anomalib.data import UCSDped

In [5]:
datamodule = UCSDped(root="/data/DJ/datasets/ucsd")
datamodule.prepare_data()



In [6]:
# from anomalib.data import ShanghaiTech
# datamodule = ShanghaiTech(root="/data/DJ/datasets/shanghai")
# datamodule.prepare_data()

In [7]:
from anomalib.data import Avenue
datamodule = Avenue(root="/data/DJ/datasets/avenue")
datamodule.prepare_data()


In [8]:
# Avenue 데이터셋 재설정 (cuDNN 에러 방지를 위한 설정)
datamodule = Avenue(
    root="/data/DJ/datasets/avenue",
    clip_length_in_frames=2,
    frames_between_clips=1,
    target_frame=VideoTargetFrame.LAST,
    num_workers=2,  # 워커 수 줄이기
)

# Engine 설정
engine = Engine(
    devices=1,  # 노트북 환경에서는 단일 GPU 사용
    accelerator='gpu',
    precision='32',  # 32-bit precision 사용 (cuDNN 호환성)
    max_epochs=10,  # 테스트를 위해 1 에포크만 실행
    limit_train_batches=5,  # 배치 수 제한 (메모리 사용량 감소)
    limit_val_batches=2,
    accumulate_grad_batches=1,  # 그래디언트 누적 비활성화
)

In [9]:
# 모델 초기화
model = AiVad()

# Cell 6에서 설정한 engine과 datamodule 사용
# Train using the engine
engine.fit(model=model, datamodule=datamodule)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
100%|██████████| 1/1 [00:00<00:00,  9.24it/s]
100%|██████████| 2/2 [00:00<00:00, 17.90it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [4,5]
/home/dongjukim/miniforge3/envs/mt_p310/lib/python3.10/site-packages/lightning/pytorch/core/optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name           | Type          | Params | Mode 
---------------------------------------------------------
0 | pre_processor  | PreProcessor  | 0      | train
1 | post_processor | PostProcessor | 0      | train
2 | evaluator      | Evaluator     | 0      | train
3 | model          | AiVadModel    | 260 M  | train
---------------------------------------------------------
259 M     Trainable params
447 K     Non-trainable params
260 M     Total params
1,041.500 Total estimated model params size (MB)
670       Modules in train mode
227    

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
