# 랜덤 시드 고정

In [None]:
import torch
import random
import torch.backends.cudnn as cudnn
import numpy as np
#파이토치의 랜덤시드 고정
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0) # gpu 1개 이상일 때 

# 넘파이 랜덤시드 고정
np.random.seed(0)

#CuDNN 랜덤시드 고정
cudnn.benchmark = False
cudnn.deterministic = True # 연산 처리 속도가 줄어들어서 연구 후반기에 사용하자

# 파이썬 랜덤시드 고정
random.seed(0)

# 데이터셋 생성 클래스 정의

In [None]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO

class myOwnDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # open the input image
        img = Image.open(os.path.join(self.root, path))

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        labels_obj = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
            labels_obj.append(coco_annotation[i]['category_id'])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        
        labels = torch.tensor((labels_obj), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

In [None]:
# In my case, just added ToTensor
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)

# 데이터 loader 정의 / 코어 : 12개 (오류나면 줄임) / 배치 : 10개 (성능 보고 변경)

In [None]:
# path to your own data and coco file
train_data_dir = '../../data/train_fishOnly/'
train_coco = '../../data/labels/modified_train.json'

# create own Dataset
my_dataset = myOwnDataset(root=train_data_dir,
                          annotation=train_coco,
                          transforms=get_transform()
                          )

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

# Batch size
train_batch_size = 10

# own DataLoader
data_loader = torch.utils.data.DataLoader(my_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=12,
                                          collate_fn=collate_fn)

# img, annotation tensor 생성

In [None]:
# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# DataLoader is iterable over Dataset
for imgs, annotations in data_loader:
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    print(annotations)

# 딥러닝 학습 코드

In [None]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    # model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=False)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    # model.resize_token_embeddings()
    # torch.nn.Embedding()
    return model

# Call&implement Tensorboard
import torch
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

# 8 classes; Only target class or background
num_classes = 8
num_epochs = 15
model = get_model_instance_segmentation(num_classes)

# move model to the right device
model.to(device)
    
# parameters
params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.Adam(params, lr=0.0015, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.0005, amsgrad=False)
len_dataloader = len(data_loader)

for epoch in range(num_epochs):
    model.train()
    i = 0    
    for imgs, annotations in data_loader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())
        writer.add_scalar("Loss/train", losses, epoch)
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        print(f'Iteration: {i}/{len_dataloader}, Loss: {losses}')
writer.flush()
writer.close()

2023-11-12 19:05:02.893599: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-12 19:05:02.893653: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-12 19:05:02.895594: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-12 19:05:03.120371: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
def train_model(iter):
    for epoch in range(iter):
        writer.add_scalar("Loss/train", losses, epoch)
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

train_model(10)
writer.flush()

# 모델 저장

In [None]:
torch.save(model.state_dict(), '../model/first_weights.pth')

In [None]:
model.eval()

# 모델 불러오기

In [None]:
model = torch.load('../model/third_trained.pt')

# 테스트 

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image, ImageDraw, ImageFont

model.to('cuda')
# 이미지 열기
image = Image.open('../../data/train_fishOnly/train_0.png')

# 이미지를 PyTorch 텐서로 변환
transform = transforms.Compose([transforms.ToTensor()])
img = transform(image)
img = img.to('cuda')

# 모델에 입력하기 위해 배치 차원 추가
# img = img.unsqueeze(0)

# 추론 수행
with torch.no_grad():
    predictions = model([img])

# Threshold 설정
confidence_threshold = 0.5
nms_iou_threshold = 0.5

# Confidence threshold를 적용하여 유효하지 않은 예측 제거
scores = predictions[0]['scores'] > confidence_threshold
predictions[0]['boxes'] = predictions[0]['boxes'][scores]
predictions[0]['labels'] = predictions[0]['labels'][scores]
predictions[0]['scores'] = predictions[0]['scores'][scores]

# NMS (Non-Maximum Suppression) 적용
keep = torchvision.ops.nms(predictions[0]['boxes'], predictions[0]['scores'], nms_iou_threshold)
predictions[0]['boxes'] = predictions[0]['boxes'][keep]
predictions[0]['labels'] = predictions[0]['labels'][keep]
predictions[0]['scores'] = predictions[0]['scores'][keep]

classes = {0:"농어",1:"베스",2:"숭어",3:"강준치",4:"블루길",5:"잉어",6:"붕어",7:"누치"}
# 이미지에 바운딩 박스와 라벨링 텍스트 추가
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("/usr/share/fonts/truetype/nanum/NanumGothic.ttf", 20)  # 폰트와 사이즈 선택
for score, box, label in zip(predictions[0]['scores'], predictions[0]['boxes'], predictions[0]['labels']):
    draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline='red', width=3)
    draw.text((box[0], box[1]), f"Label: {classes[int(label)]} / score : {score:.3f}", fill='red', font = font)

# 결과 이미지 보기 또는 저장
image.show()  # 이미지를 보여줍니다.

# 답안 파일 만드는 부분은 잘 몰라서 생략