In [4]:
import json
import shutil
import os 
import yaml
import cv2
from glob import glob
from openpyxl import Workbook
from scipy.sparse import data
from sklearn.model_selection import train_test_split

from tqdm import tqdm
import time

import random

In [5]:
!pwd

/home/mb/wildlife_model


In [None]:
# 압축 해제

import os
import zipfile

TO_PATH = '/media/mb/exFAT256/wildlife_model/data'
FROM_PATH = '/home/mb/wildlife_model/data'

def unzip_all_in_directory(source_directory, destination_directory):
    for root, dirs, files in os.walk(source_directory):
        for file in files:
            if file.endswith('.zip'):
                # 원본 zip 파일의 전체 경로
                zip_path = os.path.join(root, file)
                # 압축 해제할 디렉토리 경로 (원본 경로 구조를 유지)
                relative_path = os.path.relpath(root, source_directory)
                extract_path = os.path.join(destination_directory, relative_path, os.path.splitext(file)[0])

                # 디렉토리 생성
                if not os.path.exists(extract_path):
                    os.makedirs(extract_path)

                # zip 파일 압축 해제
                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                    zip_ref.extractall(extract_path)

                print(f'Extracted {zip_path} to {extract_path}')


unzip_all_in_directory(FROM_PATH, TO_PATH)


In [10]:
!ls -al

total 14004
drwxrwxr-x  2 mb mb     4096 Nov 27 17:27  실행환경버전
drwxrwxr-x  7 mb mb     4096 Dec  7 18:00  .
drwxr-x--- 61 mb mb     4096 Dec  7 18:35  ..
-rwxr-xr-x  1 mb mb 14213578 Nov 22 14:09 'AI 모델 소스코드.zip'
-rw-rw-r--  1 mb mb      116 Dec  1 12:14  classification.txt
drwxrwxr-x  5 mb mb     4096 Dec  7 18:45  dataset
-rw-rw-r--  1 mb mb        0 Dec  7 15:32  images_directory_list.txt
-rw-rw-r--  1 mb mb     1828 Dec  7 15:32  labels_directory_list.txt
-rw-rw-r--  1 mb mb    68260 Dec  7 18:58  Main.ipynb
drwxr-xr-x  4 mb mb     4096 Dec  2 18:05  origin_data
-rw-rw-r--  1 mb mb      683 Feb 11  2022  ReadMe.txt
-rw-rw-r--  1 mb mb    10386 Dec  1 11:00  test.ipynb
drwxrwxr-x  8 mb mb     4096 Dec  7 17:26  _yolov5
drwxrwxr-x 11 mb mb     4096 Dec  7 17:42  yolov5


In [8]:
TARGET_PATH = "./origin_data"

def get_directory_list(start_path):
    images_directory_list = []
    labels_directory_list = []
    for root, dirs, _ in os.walk(start_path):
        for name in dirs:
            if "images" in os.path.join(root, name):
                images_directory_list.append(os.path.join(root, name))
            if "labels" in os.path.join(root, name):
                labels_directory_list.append(os.path.join(root, name))
    return images_directory_list, labels_directory_list

images_directory_list, labels_directory_list = get_directory_list(TARGET_PATH)
images_directory_list = sorted(images_directory_list)
labels_directory_list = sorted(labels_directory_list)

In [9]:
images_directory_list

[]

In [11]:
labels_directory_list

['./origin_data/training/labels',
 './origin_data/training/labels/TL_01.고라니',
 './origin_data/training/labels/TL_02.멧돼지',
 './origin_data/training/labels/TL_03.너구리',
 './origin_data/training/labels/TL_04.다람쥐',
 './origin_data/training/labels/TL_05.청설모',
 './origin_data/training/labels/TL_06.반달가슴곰',
 './origin_data/training/labels/TL_07.족제비',
 './origin_data/training/labels/TL_08.멧토끼',
 './origin_data/training/labels/TL_09.왜가리',
 './origin_data/training/labels/TL_10.중대백로',
 './origin_data/training/labels/TL_11.노루',
 './origin_data/validation/labels',
 './origin_data/validation/labels/VL_01.고라니',
 './origin_data/validation/labels/VL_02.멧돼지',
 './origin_data/validation/labels/VL_03.너구리',
 './origin_data/validation/labels/VL_04.다람쥐',
 './origin_data/validation/labels/VL_05.청설모',
 './origin_data/validation/labels/VL_06.반달가슴곰',
 './origin_data/validation/labels/VL_07.족제비',
 './origin_data/validation/labels/VL_08.멧토끼',
 './origin_data/validation/labels/VL_09.왜가리',
 './origin_data/validation/l

In [12]:
def save_directory_list(directory_list, file_path):
    with open(file_path, 'w') as file:
        for directory in directory_list:
            file.write(directory + '\n')

# 사용 예
save_directory_list(labels_directory_list, 'labels_directory_list.txt')
save_directory_list(images_directory_list, 'images_directory_list.txt')

In [29]:
# json 형태의 annotation 데이터를 YOLOv5 학습 형태에 맞게 txt 로 변환한다.

classification = []
count = []

TOP = 0
LEFT = 1
BOTTOM = 2
RIGHT = 3

SQURE = 1


def load_directory_list(file_path):
    with open(file_path, 'r') as file:
        directory_list = file.read().splitlines()
    return directory_list

labels_directory_list = load_directory_list('labels_directory_list.txt')
images_directory_list = load_directory_list('images_directory_list.txt')

class Label:

	def __init__(self, fileName, imagePath):
		data = self.parse_json_data(fileName)
		self.set_data(data, fileName, imagePath)

	def parse_json_data(self, fileName):
		with open(fileName, encoding='utf-8-sig') as jsonFile:
			jsonData = json.load(jsonFile)
		return jsonData

	def set_data(self, data, fileName, imagePath):
		self.fileName = '.'.join(os.path.basename(fileName).split('.')[0:-1])
		self.width = int(data['images'][0]['width'])
		self.height = int(data['images'][0]['height'])
		self.annotations = data['annotations']

	def get_bbox_point(self, bbox):
		top = 10000
		left = 10000
		bottom = -1
		right = -1

		for i in bbox:
			if (i[0] < left): left = i[0]
			if (i[0] > right): right = i[0]
			if (i[1] < top): top = i[1]
			if (i[1] > bottom): bottom = i[1]

		return ((top, left, bottom, right))

	def get_segment_point(self, seg):
		top = 10000
		left = 10000
		bottom = -1
		right = -1

		for i in range(len(seg)):
			if (i % 2 == 0):
				if (seg[i] < left): left = seg[i]
				if (seg[i] > right): right = seg[i]
			else:
				if (seg[i] < top): top = seg[i]
				if (seg[i] > bottom): bottom = seg[i]

		return ((top, left, bottom, right))

	def get_shape_point(self, shape):
		if (shape['bbox'] and len(shape['bbox']) != 0):
			return self.get_bbox_point(shape['bbox'])
		elif (shape['segmentation'] and len(shape['segmentation']) != 0):
			return self.get_segment_point(shape['segmentation'][0])
		else:
			return (0, 0, 0, 0)

	def point_to_txt(self, point):
		if (point[0]) not in classification:
			classification.append(point[0])
			count.append(0)
			print(classification.index(point[0]), classification)

		idx = classification.index(point[0])
		count[idx] += 1

		if (SQURE):
			r = (self.width - self.height) / 2
			w_center = ((point[1][LEFT] + point[1][RIGHT]) / 2) / self.width
			h_center = (((point[1][TOP] + point[1][BOTTOM]) / 2) + r) / self.width
			w_len = (point[1][RIGHT] - point[1][LEFT]) / self.width
			h_len = (point[1][BOTTOM] - point[1][TOP]) / self.width
		else:
			w_center = ((point[1][LEFT] + point[1][RIGHT]) / 2) / self.width
			h_center = ((point[1][TOP] + point[1][BOTTOM]) / 2) / self.height
			w_len = (point[1][RIGHT] - point[1][LEFT]) / self.width
			h_len = (point[1][BOTTOM] - point[1][TOP]) / self.height
		return (idx, w_center, h_center, w_len, h_len)

	def convert_data(self):
		self.points = []
		self.txt = []
		for shape in self.annotations:
			self.points.append((shape['species'], self.get_shape_point(shape)))
		for point in self.points:
			self.txt.append(self.point_to_txt(point))

	def write_data(self, path):
		with open(path + self.fileName + ".txt", 'w') as f:
			for line in self.txt:
				f.write(' '.join(map(str, line)) + "\n")





def convert_data(file, targetPath, imagePath):
	target = Label(file, imagePath)
	target.convert_data()
	target.write_data(targetPath)

def get_directory_list(start_path):
    directory_list = []
    for root, dirs, _ in os.walk(start_path):
        for name in dirs:
            directory_list.append(os.path.join(root, name))
            print(os.path.join(root, name))
    return directory_list

json_count = [0]
def convert(labels_directory_list, src):
	for idx, targetPath in enumerate(labels_directory_list):
		data_list = os.listdir(targetPath)
		print(idx+1, "/", len(labels_directory_list), " files : ", len(data_list), " - ", targetPath)
		time.sleep(1)
		
		for line in tqdm(data_list):
			if '.json' in line:
				json_count[0] += 1
				#print(json_count[0], "Data Convert:", line, end='\r')
				shutil.copyfile(targetPath + "/" + line, src + "/json/" + line)
				convert_data(targetPath + "/" + line, src + "/labels/", src + "/images/")
			else:
				#print("Pass: ", line, end='\r')
				pass
			#time.sleep(0.01)
		#if idx > 10: break

image_count = [0]
def copy_image(targetPath, src):
	data_list = os.listdir(targetPath)
	for line in data_list:
		if os.path.isdir(os.path.join(targetPath, line)):
			copy_image(os.path.join(targetPath, line), src)
		elif '.jpg' in line:
			image_count[0] += 1
			print(image_count[0], "Image Copy:", line, end='\r')
			shutil.copyfile(targetPath + "/" + line, src + "/images/" + line)
		else:
			pass

def write_yaml(target):
	data = {}
	data["names"] = classification
	data["nc"] = len(classification)
	data["train"] = "../dataset/train.txt"
	data["val"] = "../dataset/valid.txt"
	data["test"] = "../dataset/test.txt"
	with open(target, 'w', encoding='utf-8-sig') as f:
		yaml.dump(data, f, allow_unicode=True)

def run_convert(images_directory_list, labels_directory_list, src, copy):
	if not os.path.isdir(src + "/images"):
		os.makedirs(src + "/images/")
	if not os.path.isdir(src + "/labels"):
		os.makedirs(src + "/labels")
	if not os.path.isdir(src + "/json"):
		os.makedirs(src + "/json")
	if copy:
		copy_image(images_directory_list, src)
	print("start covert label.")
	convert(labels_directory_list, src)
	write_yaml(os.path.join(src, "data.yaml"))
	for i in range(len(classification)):
		print(classification[i], "is counted:", count[i])

run_convert(images_directory_list, labels_directory_list, "./dataset", False)



start covert label.
1 / 24  files :  11  -  /media/mb/exFAT256/wildlife_model/data/training/labels


100%|██████████| 11/11 [00:00<00:00, 189087.48it/s]


2 / 24  files :  31074  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_01.고라니


 13%|█▎        | 4135/31074 [00:00<00:01, 21188.20it/s]

0 ['고라니']
1 ['고라니', '노루']


 63%|██████▎   | 19711/31074 [00:00<00:00, 21821.93it/s]

2 ['고라니', '노루', '너구리']


 84%|████████▍ | 26251/31074 [00:01<00:00, 19833.95it/s]

3 ['고라니', '노루', '너구리', '멧돼지']
4 ['고라니', '노루', '너구리', '멧돼지', '반달가슴곰']


100%|██████████| 31074/31074 [00:01<00:00, 19544.19it/s]


3 / 24  files :  30700  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_02.멧돼지


 14%|█▍        | 4429/30700 [00:00<00:01, 21128.29it/s]

5 ['고라니', '노루', '너구리', '멧돼지', '반달가슴곰', '멧토끼']


100%|██████████| 30700/30700 [00:01<00:00, 20710.37it/s]


4 / 24  files :  13743  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_03.너구리


100%|██████████| 13743/13743 [00:00<00:00, 21317.10it/s]


5 / 24  files :  34152  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_04.다람쥐


 14%|█▎        | 4665/34152 [00:00<00:01, 23348.27it/s]

6 ['고라니', '노루', '너구리', '멧돼지', '반달가슴곰', '멧토끼', '다람쥐']


 27%|██▋       | 9291/34152 [00:00<00:01, 16630.76it/s]

7 ['고라니', '노루', '너구리', '멧돼지', '반달가슴곰', '멧토끼', '다람쥐', '청설모']


 87%|████████▋ | 29724/34152 [00:01<00:00, 22907.37it/s]

8 ['고라니', '노루', '너구리', '멧돼지', '반달가슴곰', '멧토끼', '다람쥐', '청설모', '중대백로']


100%|██████████| 34152/34152 [00:01<00:00, 21345.04it/s]


6 / 24  files :  22258  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_05.청설모


100%|██████████| 22258/22258 [00:01<00:00, 19635.63it/s]


7 / 24  files :  14960  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_06.반달가슴곰


100%|██████████| 14960/14960 [00:00<00:00, 18569.88it/s]


8 / 24  files :  14857  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_07.족제비


 27%|██▋       | 3940/14857 [00:00<00:00, 20065.73it/s]

9 ['고라니', '노루', '너구리', '멧돼지', '반달가슴곰', '멧토끼', '다람쥐', '청설모', '중대백로', '족제비']


100%|██████████| 14857/14857 [00:00<00:00, 20866.21it/s]


9 / 24  files :  24514  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_08.멧토끼


100%|██████████| 24514/24514 [00:01<00:00, 21158.07it/s]


10 / 24  files :  25974  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_09.왜가리


 18%|█▊        | 4564/25974 [00:00<00:00, 22838.70it/s]

10 ['고라니', '노루', '너구리', '멧돼지', '반달가슴곰', '멧토끼', '다람쥐', '청설모', '중대백로', '족제비', '왜가리']


100%|██████████| 25974/25974 [00:01<00:00, 18681.72it/s]


11 / 24  files :  21587  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_10.중대백로


100%|██████████| 21587/21587 [00:01<00:00, 19851.26it/s]


12 / 24  files :  25255  -  /media/mb/exFAT256/wildlife_model/data/training/labels/TL_11.노루


100%|██████████| 25255/25255 [00:01<00:00, 19753.37it/s]


13 / 24  files :  11  -  /media/mb/exFAT256/wildlife_model/data/validation/labels


100%|██████████| 11/11 [00:00<00:00, 265157.15it/s]


14 / 24  files :  3904  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_01.고라니


100%|██████████| 3904/3904 [00:00<00:00, 19795.02it/s]


15 / 24  files :  3805  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_02.멧돼지


100%|██████████| 3805/3805 [00:00<00:00, 20545.94it/s]


16 / 24  files :  1721  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_03.너구리


100%|██████████| 1721/1721 [00:00<00:00, 19544.52it/s]


17 / 24  files :  4275  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_04.다람쥐


100%|██████████| 4275/4275 [00:00<00:00, 21490.54it/s]


18 / 24  files :  2796  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_05.청설모


100%|██████████| 2796/2796 [00:00<00:00, 19286.66it/s]


19 / 24  files :  1866  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_06.반달가슴곰


100%|██████████| 1866/1866 [00:00<00:00, 17591.32it/s]


20 / 24  files :  1858  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_07.족제비


100%|██████████| 1858/1858 [00:00<00:00, 20348.37it/s]


21 / 24  files :  3027  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_08.멧토끼


100%|██████████| 3027/3027 [00:00<00:00, 20378.08it/s]


22 / 24  files :  3255  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_09.왜가리


100%|██████████| 3255/3255 [00:00<00:00, 17979.21it/s]


23 / 24  files :  2679  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_10.중대백로


100%|██████████| 2679/2679 [00:00<00:00, 19391.13it/s]


24 / 24  files :  3142  -  /media/mb/exFAT256/wildlife_model/data/validation/labels/VL_11.노루


100%|██████████| 3142/3142 [00:00<00:00, 18520.60it/s]

고라니 is counted: 36297
노루 is counted: 33464
너구리 is counted: 15606
멧돼지 is counted: 38796
반달가슴곰 is counted: 26776
멧토끼 is counted: 36800
다람쥐 is counted: 39162
청설모 is counted: 26104
중대백로 is counted: 27964
족제비 is counted: 16719
왜가리 is counted: 31177





In [23]:
classification

['고라니', '노루', '너구리', '멧돼지', '반달가슴곰', '멧토끼', '다람쥐', '청설모', '중대백로', '족제비', '왜가리']

In [24]:
save_directory_list(classification, 'classification.txt')

In [31]:
# 변환된 데이터셋에 있는 이미지를 학습에 맞게 정사각형으로 변환 및 416x416 사이즈로 변경한다.

IMG_SIZE = 416

def image_resize(savePath, source):
	img = cv2.imread(source)
	h, w = img.shape[:2]

	color = 0
	if w > h:
		borderSize = int((w - h) / 2)
		border = cv2.copyMakeBorder(
			img,
			top=borderSize,
			bottom=borderSize,
			left=0,
			right=0,
			borderType=cv2.BORDER_CONSTANT,
			value=[color, color, color]
		)
	else:
		borderSize = int((h - w) / 2)
		border = cv2.copyMakeBorder(
			img,
			top=0,
			bottom=0,
			left=borderSize,
			right=borderSize,
			borderType=cv2.BORDER_CONSTANT,
			value=[color, color, color]
		)

	resize = cv2.resize(border, (IMG_SIZE, IMG_SIZE), interpolation = cv2.INTER_CUBIC)
	filename = os.path.basename(source)
	full_path = os.path.join(savePath, filename)
	cv2.imwrite(full_path, resize)


def resizing_img(savePath, images_directory_list):
	count = 0
	for idx, sourcePath in enumerate(images_directory_list):
		imgList = glob(sourcePath + '/*.jpg')
		print(idx+1, "/", len(labels_directory_list), " files : ", len(imgList), " - ", sourcePath)
		time.sleep(1)
		
		for img in tqdm(imgList):
			count+=1
			#print(count, "image resizing :", img)
			image_resize(savePath, img)

resizing_img("./dataset/images", images_directory_list)


1 / 24  files :  0  -  /media/mb/exFAT256/wildlife_model/data/training/images


0it [00:00, ?it/s]


2 / 24  files :  31074  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_01.고라니
3 / 24  files :  30700  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_02.멧돼지


100%|██████████| 30700/30700 [15:59<00:00, 31.98it/s]


4 / 24  files :  13743  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_03.너구리


100%|██████████| 13743/13743 [06:19<00:00, 36.23it/s]


5 / 24  files :  34152  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_04.다람쥐


100%|██████████| 34152/34152 [15:49<00:00, 35.99it/s]


6 / 24  files :  22258  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_05.청설모


100%|██████████| 22258/22258 [11:03<00:00, 33.56it/s]


7 / 24  files :  14960  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_06.반달가슴곰


100%|██████████| 14960/14960 [06:33<00:00, 37.98it/s]


8 / 24  files :  14857  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_07.족제비


100%|██████████| 14857/14857 [06:09<00:00, 40.23it/s]


9 / 24  files :  24514  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_08.멧토끼


100%|██████████| 24514/24514 [11:01<00:00, 37.03it/s]


10 / 24  files :  25974  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_09.왜가리


100%|██████████| 25974/25974 [09:50<00:00, 43.97it/s]


11 / 24  files :  21587  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_10.중대백로


100%|██████████| 21587/21587 [09:08<00:00, 39.38it/s]


12 / 24  files :  25255  -  /media/mb/exFAT256/wildlife_model/data/training/images/TS_11.노루


100%|██████████| 25255/25255 [10:46<00:00, 39.04it/s]


13 / 24  files :  0  -  /media/mb/exFAT256/wildlife_model/data/validation/images


0it [00:00, ?it/s]


14 / 24  files :  3904  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_01.고라니


100%|██████████| 3904/3904 [01:45<00:00, 37.16it/s]


15 / 24  files :  3805  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_02.멧돼지


100%|██████████| 3805/3805 [01:42<00:00, 37.20it/s]


16 / 24  files :  1720  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_03.너구리


100%|██████████| 1720/1720 [00:40<00:00, 42.10it/s]


17 / 24  files :  4275  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_04.다람쥐


100%|██████████| 4275/4275 [01:44<00:00, 40.83it/s]


18 / 24  files :  2796  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_05.청설모


100%|██████████| 2796/2796 [01:16<00:00, 36.38it/s]


19 / 24  files :  1866  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_06.반달가슴곰


100%|██████████| 1866/1866 [00:49<00:00, 37.91it/s]


20 / 24  files :  1858  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_07.족제비


100%|██████████| 1858/1858 [00:46<00:00, 40.25it/s]


21 / 24  files :  3027  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_08.멧토끼


100%|██████████| 3027/3027 [01:22<00:00, 36.77it/s]


22 / 24  files :  3255  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_09.왜가리


100%|██████████| 3255/3255 [01:14<00:00, 43.53it/s]


23 / 24  files :  2679  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_10.중대백로


100%|██████████| 2679/2679 [01:07<00:00, 39.45it/s]


24 / 24  files :  3142  -  /media/mb/exFAT256/wildlife_model/data/validation/images/VS_11.노루


100%|██████████| 3142/3142 [01:19<00:00, 39.31it/s] 


In [13]:
label_list = glob("./dataset/labels/*.txt")
label_list = sorted(label_list)
print("label list count : ",len(label_list))

image_list = glob("./dataset/images/*.jpg")
image_list = sorted(image_list)
print("image list count : ", len(image_list))


label list count :  291401
image list count :  291401


In [38]:
label_list[0:10]

['./dataset/labels/A01_F03_C074_C_200114_3001_20S_000001.061.txt',
 './dataset/labels/A01_F03_C074_C_200114_3001_20S_000001.941.txt',
 './dataset/labels/A01_F03_C074_C_200114_3001_20S_000002.139.txt',
 './dataset/labels/A01_F03_C074_C_200114_3001_20S_000002.204.txt',
 './dataset/labels/A01_F03_C074_C_200114_3001_20S_000005.145.txt',
 './dataset/labels/A01_F03_C074_C_200114_3001_20S_000006.538.txt',
 './dataset/labels/A01_F03_C074_C_200114_3001_20S_000009.141.txt',
 './dataset/labels/A01_F03_C074_C_200114_3001_20S_000014.639.txt',
 './dataset/labels/A01_F03_C074_C_200114_3002_20S_000013.639.txt',
 './dataset/labels/A01_F03_C074_C_200114_3002_20S_000018.642.txt']

In [39]:
image_list[0:10]

['./dataset/images/A01_F03_C074_C_200114_3001_20S_000001.061.jpg',
 './dataset/images/A01_F03_C074_C_200114_3001_20S_000001.941.jpg',
 './dataset/images/A01_F03_C074_C_200114_3001_20S_000002.139.jpg',
 './dataset/images/A01_F03_C074_C_200114_3001_20S_000002.204.jpg',
 './dataset/images/A01_F03_C074_C_200114_3001_20S_000005.145.jpg',
 './dataset/images/A01_F03_C074_C_200114_3001_20S_000006.538.jpg',
 './dataset/images/A01_F03_C074_C_200114_3001_20S_000009.141.jpg',
 './dataset/images/A01_F03_C074_C_200114_3001_20S_000014.639.jpg',
 './dataset/images/A01_F03_C074_C_200114_3002_20S_000013.639.jpg',
 './dataset/images/A01_F03_C074_C_200114_3002_20S_000018.642.jpg']

In [14]:
# image 와 label 누락 파일 검사

for idx, full_path in tqdm(enumerate(image_list)):
    filename_with_extension = os.path.basename(full_path)  # 경로 제거
    image_filename, file_extension = os.path.splitext(filename_with_extension)
    filename_with_extension = os.path.basename(label_list[idx])  # 경로 제거
    label_filename, file_extension = os.path.splitext(filename_with_extension)
    
    if image_filename != label_filename:
        print(idx, full_path)
        print(label_list[idx])
        break;

291401it [00:00, 327025.04it/s]


In [15]:
# Test 데이터셋 작성
# original 데이터셋에서 Validation 데이터 셋으로 구성

def get_directories_in_directory(directory):
    directories = [os.path.join(directory, d) for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
    return directories

TARGET_PATH = "./origin_data/training/labels"
training_labels_directory_list = sorted(get_directories_in_directory(TARGET_PATH))

TARGET_PATH = "./origin_data/validation/labels"
validation_labels_directory_list = sorted(get_directories_in_directory(TARGET_PATH))

total_labels_dir_list = training_labels_directory_list + validation_labels_directory_list
total_labels_dir_list


['./origin_data/training/labels/TL_01.고라니',
 './origin_data/training/labels/TL_02.멧돼지',
 './origin_data/training/labels/TL_03.너구리',
 './origin_data/training/labels/TL_04.다람쥐',
 './origin_data/training/labels/TL_05.청설모',
 './origin_data/training/labels/TL_06.반달가슴곰',
 './origin_data/training/labels/TL_07.족제비',
 './origin_data/training/labels/TL_08.멧토끼',
 './origin_data/training/labels/TL_09.왜가리',
 './origin_data/training/labels/TL_10.중대백로',
 './origin_data/training/labels/TL_11.노루',
 './origin_data/validation/labels/VL_01.고라니',
 './origin_data/validation/labels/VL_02.멧돼지',
 './origin_data/validation/labels/VL_03.너구리',
 './origin_data/validation/labels/VL_04.다람쥐',
 './origin_data/validation/labels/VL_05.청설모',
 './origin_data/validation/labels/VL_06.반달가슴곰',
 './origin_data/validation/labels/VL_07.족제비',
 './origin_data/validation/labels/VL_08.멧토끼',
 './origin_data/validation/labels/VL_09.왜가리',
 './origin_data/validation/labels/VL_10.중대백로',
 './origin_data/validation/labels/VL_11.노루']

In [51]:
training_labels_directory_list

['./origin_data/training/labels/TL_01.고라니',
 './origin_data/training/labels/TL_02.멧돼지',
 './origin_data/training/labels/TL_03.너구리',
 './origin_data/training/labels/TL_04.다람쥐',
 './origin_data/training/labels/TL_05.청설모',
 './origin_data/training/labels/TL_06.반달가슴곰',
 './origin_data/training/labels/TL_07.족제비',
 './origin_data/training/labels/TL_08.멧토끼',
 './origin_data/training/labels/TL_09.왜가리',
 './origin_data/training/labels/TL_10.중대백로',
 './origin_data/training/labels/TL_11.노루']

In [52]:
validation_labels_directory_list

['./origin_data/validation/labels/VL_01.고라니',
 './origin_data/validation/labels/VL_02.멧돼지',
 './origin_data/validation/labels/VL_03.너구리',
 './origin_data/validation/labels/VL_04.다람쥐',
 './origin_data/validation/labels/VL_05.청설모',
 './origin_data/validation/labels/VL_06.반달가슴곰',
 './origin_data/validation/labels/VL_07.족제비',
 './origin_data/validation/labels/VL_08.멧토끼',
 './origin_data/validation/labels/VL_09.왜가리',
 './origin_data/validation/labels/VL_10.중대백로',
 './origin_data/validation/labels/VL_11.노루']

In [17]:
# 학습에 사용할 데이터셋을 Train:Valid:Test 로 구분하여 각 8:1:1 의 비율로 나눈다.
# 나눠진 데이터셋의 상세 내용은 count.xlsx 엑셀파일에 저장
# 에러가 있는 데이터는 err.txt 에 저장

pr = [0]

animal = ["고라니", "멧돼지", "너구리 ", "다람쥐", "청설모", "반달가슴곰", "족제비", "멧토끼", "왜가리", "중대백로", "노루"]

train_list = []
valid_list = []
test_list = []

obj_count = []
img_count = []
err_count = []

def get_json_data(fileName, trainList, validList, testList):
	global obj_count, img_count, err_count

	with open(fileName, encoding='utf-8-sig') as jsonFile:
		jsonData = json.load(jsonFile)
	#fName = ('.'.join(fileName.split('.')[0:-1]) + ".jpg").replace("/json/", "/images/")
	fName = ("./images/" + fileName.split('/')[-1]).replace(".json",".jpg")
	pr[0] += 1
	#print(pr, fName)

	day = str(jsonData['images'][0]['type'])
	if day != "RGB" and day != "IR":
		err_count.append(fName)
		return

	obj_list = jsonData['annotations']
	
	trainCount = 0
	validCount = 0
	testCount = 0
	for i in obj_count:
		if i[0] == ("train", obj_list[0]['species'], day):	trainCount += i[1]
		elif i[0] == ("valid", obj_list[0]['species'], day):	validCount += i[1]
		elif i[0] == ("test", obj_list[0]['species'], day):	testCount += i[1]

	if int(trainCount / 8) < validCount:
		data_type = "train"
		trainList.append(fName)
	elif validCount <= testCount:
		data_type = "valid"
		validList.append(fName)
	else:
		data_type = "test"
		testList.append(fName)

	for obj in obj_list:
		check = 0
		for i in obj_count:
			if i[0] == (data_type, obj['species'], day):
				i[1] += 1
				check = 1
				break
		if not check:
			obj_count.append([(data_type, obj['species'], day), 1])

	spec = animal[int(os.path.basename(fileName).split('_')[0][1:]) - 1]
	check = 0
	for j in img_count:
		if j[0] == (data_type, spec, day):
			j[1] += 1
			check = 1
			break
	if not check:
		img_count.append([(data_type, spec, day), 1])

def parse_data(targetPathList, trainList, validList, testList):
	for dir in tqdm(targetPathList):
		data_list = sorted(os.listdir(dir))
		for line in data_list:
			if '.json' in line:
				get_json_data(dir + "/" + line, trainList, validList, testList)
				#print(line)
			else:
				pass

def print_data(writePath):
	obj_count.sort()
	img_count.sort()

	count = 0
	print("==========object count==========")
	for i in obj_count:
		print(f"{i[0][0]}: {i[0][1]}: {i[0][2]}: {i[1]} counted")
		count += i[1]
	print("count:",count)

	count = 0
	print("==========image count==========")
	for i in img_count:
		print(f"{i[0][0]}: {i[0][1]}: {i[0][2]}: {i[1]} counted")
		count += i[1]
	print("count:",count)

	print("==========err count==========")
	with open(writePath + "/err.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(err_count) + "\n")
	print("count:",len(err_count))

def print_excel(writePath):
	wb = Workbook()
	ws = wb.active
	ws.title = "count"
	ws.append(["Object"])
	ws.append(["data_type", "object_type", "day", "count"])
	for i in obj_count:
		ws.append(i[0] + (i[1],))

	ws.append(["Image"])
	ws.append(["data_type", "object_type", "day", "count"])
	for i in img_count:
		ws.append(i[0] + (i[1],))
	wb.save(writePath + "/count.xlsx")

def split_dataset(targetPathList, writePath):
	global train_list, valid_list, test_list
 
	parse_data(targetPathList, train_list, valid_list, test_list)
    
	with open(writePath + "/train.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(train_list) + "\n")
	with open(writePath + "/valid.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(valid_list) + "\n")
	with open(writePath + "/test.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(test_list) + "\n")

	print("train data: ", len(train_list))
	print("valid data: ", len(valid_list))
	print("test data: ", len(test_list))
	print_data(writePath)
	print_excel(writePath)

	return 



In [18]:
# 데이터셋 추가
split_dataset(total_labels_dir_list, "./dataset")


  0%|          | 0/22 [00:00<?, ?it/s]

100%|██████████| 22/22 [00:56<00:00,  2.59s/it]

train data:  233388
valid data:  28952
test data:  29020
test: 고라니: IR: 1608 counted
test: 고라니: RGB: 2020 counted
test: 너구리: IR: 750 counted
test: 너구리: RGB: 810 counted
test: 노루: IR: 1405 counted
test: 노루: RGB: 1941 counted
test: 다람쥐: IR: 16 counted
test: 다람쥐: RGB: 3899 counted
test: 멧돼지: IR: 2005 counted
test: 멧돼지: RGB: 1874 counted
test: 멧토끼: IR: 1412 counted
test: 멧토끼: RGB: 2266 counted
test: 반달가슴곰: IR: 843 counted
test: 반달가슴곰: RGB: 1833 counted
test: 왜가리: RGB: 3117 counted
test: 족제비: IR: 747 counted
test: 족제비: RGB: 924 counted
test: 중대백로: RGB: 2796 counted
test: 청설모: IR: 6 counted
test: 청설모: RGB: 2604 counted
train: 고라니: IR: 12865 counted
train: 고라니: RGB: 16168 counted
train: 너구리: IR: 6000 counted
train: 너구리: RGB: 6481 counted
train: 노루: IR: 11233 counted
train: 노루: RGB: 15531 counted
train: 다람쥐: IR: 132 counted
train: 다람쥐: RGB: 31198 counted
train: 멧돼지: IR: 16028 counted
train: 멧돼지: RGB: 14992 counted
train: 멧토끼: IR: 11301 counted
train: 멧토끼: RGB: 18131 counted
train: 반달가슴곰: IR: 6




In [None]:
# yolo 및 yolo 실행에 필요한 모듈 설치
!git clone https://github.com/ultralytics/yolov5.git

%cd yolov5
!pip3 install -r requirements.txt

In [43]:
!cd yolov5; pwd

/home/mb/wildlife_model/yolov5


In [19]:
# test 학습
!cd yolov5; python3 train.py --img 416 --batch 32 --epochs 1 --data ../dataset/data.yaml --cfg ./models/yolov5s.yaml --weights yolov5s.pt

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=./models/yolov5s.yaml, data=../dataset/data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=1, batch_size=32, imgsz=416, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-247-g3f02fde Python-3.10.12 torch-2.1.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 5938MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, ob

In [20]:
# yolo 테스트 코드

!cd yolov5; python3 val.py --data ../dataset/data.yaml --batch 64 --weights ./runs/train/exp9/weights/best.pt --task test --img 416 --save-txt --save-conf

[34m[1mval: [0mdata=../dataset/data.yaml, weights=['./runs/train/exp9/weights/best.pt'], batch_size=64, imgsz=416, conf_thres=0.001, iou_thres=0.6, max_det=300, task=test, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=True, save_hybrid=False, save_conf=True, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-247-g3f02fde Python-3.10.12 torch-2.1.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 5938MiB)

Fusing layers... 
YOLOv5s summary: 157 layers, 7039792 parameters, 0 gradients, 15.8 GFLOPs
[34m[1mtest: [0mScanning /home/mb/wildlife_model/dataset/test... 29019 images, 0 backgroun[0m
[34m[1mtest: [0mNew cache created: /home/mb/wildlife_model/dataset/test.cache
                 Class     Images  Instances          P          R      mAP50   
                   all      29019      32872      0.943      0.935       0.97      0.738
                   고라니      29019       3627      0.956      0.938   

In [None]:
# yolo 학습 코드 (약 120시간 실행) 

!sudo python3 train.py --img 416 --batch 64 --epochs 100 --data ../dataset/data.yaml --cfg ./models/yolov5s.yaml --weights yolov5s.pt

In [None]:
# yolo 테스트 코드

!sudo python3 val.py --data ../dataset/wildlife/data.yaml --batch 128 --weights ./runs/train/exp/weights/best.pt --task test --img 416 --save-txt --save-conf