### FSOD를 위한 Fater_Rcnn model test

- base datasets : coco-datasets
- learning algorithm : few-shot learning, meta learning
- base algorithm : cnn
- frame work : pytorch

In [1]:
import faster_RCNN as fsod
import torch
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
fsod.utils()

----- Notice -----
device: cuda
cuda num: 1
cude device name: NVIDIA GeForce GTX 1660 Ti
cudnn version: 90100
random rate: 37

numpy version: 1.24.3
matplotlib version: 3.7.3
opencv version: 4.10.0
PIL version: 10.4.0

torch version: 2.4.1
torchvision version: 0.20.0
torchinfo version: 1.8.0
torchmetrics version: 1.5.2


- 일단 모델을 들고와서 구조부터 보자

In [3]:
faster_rcnn_model = fsod.models.detection.fasterrcnn_resnet50_fpn(weights=fsod.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

print(faster_rcnn_model)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [4]:
fsod.summary(faster_rcnn_model, input_size=(1, 3, 224, 224))

Layer (type:depth-idx)                                  Output Shape              Param #
FasterRCNN                                              [0, 4]                    --
├─GeneralizedRCNNTransform: 1-1                         [1, 3, 800, 800]          --
├─BackboneWithFPN: 1-2                                  [1, 256, 13, 13]          --
│    └─IntermediateLayerGetter: 2-1                     [1, 2048, 25, 25]         --
│    │    └─Conv2d: 3-1                                 [1, 64, 400, 400]         (9,408)
│    │    └─FrozenBatchNorm2d: 3-2                      [1, 64, 400, 400]         --
│    │    └─ReLU: 3-3                                   [1, 64, 400, 400]         --
│    │    └─MaxPool2d: 3-4                              [1, 64, 200, 200]         --
│    │    └─Sequential: 3-5                             [1, 256, 200, 200]        (212,992)
│    │    └─Sequential: 3-6                             [1, 512, 100, 100]        1,212,416
│    │    └─Sequential: 3-7              

In [5]:
num_classes = 91 + 5
custom_model = fsod.CustomFasterRCNNModel(num_classes).to(fsod.DEVICE)

print(custom_model)

CustomFasterRCNNModel(
  (faster_rcnn): FasterRCNN(
    (transform): GeneralizedRCNNTransform(
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        Resize(min_size=(800,), max_size=1333, mode='bilinear')
    )
    (backbone): BackboneWithFPN(
      (body): IntermediateLayerGetter(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): FrozenBatchNorm2d(64, eps=0.0)
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): FrozenBatchNorm2d(64, eps=0.0)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): FrozenBatchNorm2d(64, eps=0.0)
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False

In [6]:
dummy_query = (1, 3, 224, 224)
dummy_support = (1, 3, 224, 224)

# fsod.summary(custom_model, input_size=[dummy_query, dummy_support])
fsod.summary(custom_model)

Layer (type:depth-idx)                                       Param #
CustomFasterRCNNModel                                        --
├─FasterRCNN: 1-1                                            --
│    └─GeneralizedRCNNTransform: 2-1                         --
│    └─BackboneWithFPN: 2-2                                  --
│    │    └─IntermediateLayerGetter: 3-1                     23,454,912
│    │    └─FeaturePyramidNetwork: 3-2                       3,344,384
│    └─RegionProposalNetwork: 2-3                            --
│    │    └─AnchorGenerator: 3-3                             --
│    │    └─RPNHead: 3-4                                     593,935
│    └─RoIHeads: 2-4                                         --
│    │    └─MultiScaleRoIAlign: 3-5                          --
│    │    └─TwoMLPHead: 3-6                                  13,895,680
│    │    └─FastRCNNPredictor: 3-7                           492,000
Total params: 41,780,911
Trainable params: 41,558,511
Non-trainabl

In [7]:
def show_mat(mat, title):
    sns.heatmap(mat, annot=True, fmt='.2f', cbar=False, cmap='BuPu')
    plt.title(title + 'confuse matrics')
    plt.show()

In [8]:
# 쿼리와 지원 데이터 생성
query_data = torch.randn(16, 3, 224, 224).to(fsod.DEVICE)  # 쿼리 이미지
support_data = torch.randn(16, 3, 224, 224).to(fsod.DEVICE)  # 지원 이미지
y_data = torch.randint(0, 2, (16,)).to(fsod.DEVICE)  # 정답 라벨

# Validation 실행
val_loss, val_acc, val_f1, val_mat = fsod.validation(custom_model, query_data, support_data, y_data, num_classes)
print(f"Validation Loss: {val_loss}, Accuracy: {val_acc}, F1 Score: {val_f1}")

# Testing 실행
test_loss, test_acc, test_f1, test_mat = fsod.testing(custom_model, query_data, support_data, y_data, num_classes)
print(f"Test Loss: {test_loss}, Accuracy: {test_acc}, F1 Score: {test_f1}")

# show_mat(val_mat, 'validation')
show_mat(test_mat, 'test')

query_tensor shape: torch.Size([16, 256, 56, 56])
support_tensor shape: torch.Size([16, 256, 56, 56])

knn_similarities shape: torch.Size([16, 1])
high_similarity_indices shape: torch.Size([16, 1])



ValueError: targets should not be None