In [None]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim

from PIL import Image

In [None]:
seed = 42

os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

In [None]:
#이미지 파일 이름을 이용해서, frame 숫자를 뽑아내는 함수이다.
def Find_frame_num(image_filename):
  frame_num = ''
  count_ = 0
  

  for i in range(len(image_filename)):
    if image_filename[i] == '_':
      count_ += 1
      if count_ == 2:
        break
      continue
    if count_ == 1:
      frame_num += image_filename[i]
  
  frame_num = int(frame_num)
  return frame_num


In [None]:
#yolo용 target 구조: (x, y, w, h, c, cat, dog, i, j)
#만약 CNN 학습을 위해 target에 breed를 추가하신다면, (breed, x, y, w, h, c, cat, dog, i, j)

#Image에 필요한 target을 만들려고함
#근데 존나 마음아픈게 frame별로 걍 마구잡이로 되 있어서, Image파일 이름에 있는 frame 숫자를 이용해서 멥핑. /ARCH/20201028_cat-arch-000156.mp4/frame_102_timestamp_0.jpg => 102  
#이 함수는 라벨(json)과 frame_num을 인자로 받아 해당 frame에 대한 bounding box, speices, responsible grid cell을 찾는다. *추가: 만약 breed가 추가된다면 breed도...
def Make_target(label_dir, frame_num):
  with open(label_dir) as f:
    label_json = json.load(f)
  
  s = 8

  target_list = []
  #바운딩박스 찾기
  target_list.append(label_json['metadata']['breed'])
  for i in range(len(label_json['annotations'])):
    if label_json['annotations'][i]['frame_number'] == frame_num:
       target_list.append(label_json['annotations'][i]['bounding_box']['x'])
       target_list.append(label_json['annotations'][i]['bounding_box']['y'])
       target_list.append(label_json['annotations'][i]['bounding_box']['width'])
       target_list.append(label_json['annotations'][i]['bounding_box']['height'])
  
  #바운딩박스 yolo의 target 형식에 맞게 변경
  image_width = label_json['metadata']['width']
  image_height = label_json['metadata']['height']

  s_image_width = image_width / 8
  s_image_height = image_height / 8

  i, j = 0, 0
  while(i*s_image_width < target_list[0]):
    i += 1

  while(j*s_image_height < target_list[1]):
    j += 1

  i -= 1
  j -= 1
  
  target_list[0] = target_list[0]/s_image_width - i
  target_list[1] = target_list[1]/s_image_height - j
  target_list[2] /= s_image_width
  target_list[3] /= s_image_height


  #confidence score를 1로 설정
  target_list.append(1.0)

  #고양이면 1, 0 을 강아지면 0,1을 append
  if label_json['metadata']['species'] == 'CAT':
    target_list.append(1.0)
    target_list.append(0)
  else:
    target_list.append(0)
    target_list.append(1.0)

  target_list.append(i)
  target_list.append(j)
  
  return target_list  

In [None]:
#Label파일과 image파일에 약간의? 문제가 있는 관계로 고민을 좀 많이했다.
#그 결과, dataset class getitem에서 image랑 그 image에 맞는 필요한 부분만 뽑아서 사용하려 한다. 

class cnn_dataset(Dataset):
  def __init__(self, label_dir, image_dir, transform=None):
    self.label_dir = label_dir
    self.image_dir = image_dir
    self.transform = transform

    list_image = os.listdir(self.image_dir)
    self.list_image = list_image

  #이미지 하나 당 target 생성
  def __getitem__(self, index):
    image = os.path.join(self.image_dir, self.list_image[index])
    image = Image.open(image).convert('RGB')
    #image = np.asarray(image)

    target = Make_target(self.label_dir, Find_frame_num(self.list_image[index])) #밑에 Make_target함수와 Find_frame_num함수 설명이 있습니다.
    
    if self.transform:
      image = self.transform(image)

    target = torch.tensor(target)

    data = {'image':image, 'target':target}
    return data

  def __len__(self):
    return len(self.list_image)

In [None]:
cnn_trans = transforms.Compose([transforms.Resize((512,512)),
                                transforms.ToTensor()])

trainset = cnn_dataset()

trainloader = torch.utils.data.DataLoader(trainset, batch_size=512, shuffle=True)

In [None]:
size = 32

class BreedNet(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
    
        self.conv1 = nn.Conv2d(3, size, kernel_size=3, stride=1, padding = 1)
        self.conv2 = nn.Conv2d(size, size*2, kernel_size=3, stride=1, padding = 1)
        self.conv3 = nn.Conv2d(size*2, size*4, kernel_size=3, stride=1, padding = 1)
                
        self.fc1 = nn.Linear(128*28*28, 512)
        self.fc2 = nn.Linear(512, 128)
        
        self.dropout = nn.Dropout(0.5) 
        
    def forward(self, x):
        x = self.MaxPool2d(F.relu(self.conv1(x)))
        x = self.MaxPool2d(F.relu(self.conv2(x)))
        x = self.MaxPool2d(F.relu(self.conv3(x)))

        x = x.view(-1, size*4*28*28)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x


In [None]:
epochs = 5

model_cat = BreedNet().to(device)
model_dog = BreedNet().to(device)
optimizer_cat = optim.Adadelta(model_cat.parameters(), lr=1.0)
scheduler_cat = StepLR(optimizer_cat, step_size=1, gamma=0.7)
optimizer_dog = optim.Adadelta(model_dog.parameters(), lr=1.0)
scheduler_dog = StepLR(optimizer_dog, step_size=1, gamma=0.7)
criterion = nn.CrossEntropyLoss().to(device)

model_cat.train()
model_dog.train()

for epoch in range(EPOCHS):
    for i, (image, target) in enumerate(train_loader):
        image, target = image.to(device), target.to(device)
        optimizer.zero_grad()

        if target[6] == 1.0:
            out = model_cat(image)
            loss = criterion(out, targets)
            loss.backward()
            train_loss += loss.item()
            optimizer_cat.step()

            if (i + 1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, EPOCHS, i + 1, total_step, loss.item()))
        else:
            out = model_dog(image)
            loss = criterion(out, targets)
            loss.backward()
            train_loss += loss.item()
            optimizer_cat.step()

            if (i + 1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, EPOCHS, i + 1, total_step, loss.item()))
      scheduler_cat.step()
      scheduler_dog.step()