In [30]:
# @title 기본 라이브러리 import
import os
import sys
import math
import glob
import random
import shutil

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance, ImageFilter

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.data.sampler import WeightedRandomSampler

import torchvision
import torchvision.models as models
import torchvision.transforms.functional as F2
import torchvision.transforms.v2 as v2
from torchvision import transforms
from torchvision.models.detection.ssd import SSD300_VGG16_Weights

import pickle

In [31]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [37]:
# 데이터 경로 지정
base_dir = 'C:/Users/hyeon/codeit_project/ai05-level1-project'
train_images_dir = os.path.join(base_dir, 'train_images')
train_ann_dir = os.path.join(base_dir, 'train_annotations')
test_images_dir = os.path.join(base_dir, 'test_images')

# 폴더 안에 있는 어노테이션 개수 확인
json_files = glob.glob(os.path.join(train_ann_dir, "**", "*.json"), recursive=True)

# 각 데이터의 개수 확인
print(f"train 이미지 개수 : {len(os.listdir(train_images_dir))}")
print(f"train 어노테이션 개수 : {len(json_files)}")
print(f"test 이미지 개수 : {len(os.listdir(test_images_dir))}")

train 이미지 개수 : 1489
train 어노테이션 개수 : 4526
test 이미지 개수 : 843


In [48]:
# json 구조 확인
import json

# annotation 구조 확인
with open(json_files[0], 'r') as f:
    annotation_check = json.load(f)
print(annotation_check.keys())
annotation_check

dict_keys(['images', 'type', 'annotations', 'categories'])


{'images': [{'file_name': 'K-001900-010224-016551-031705_0_2_0_2_70_000_200.png',
   'width': 976,
   'height': 1280,
   'imgfile': 'K-001900-010224-016551-031705_0_2_0_2_70_000_200.png',
   'drug_N': 'K-001900',
   'drug_S': '정상알약',
   'back_color': '연회색 배경',
   'drug_dir': '앞면',
   'light_color': '주백색',
   'camera_la': 70,
   'camera_lo': 0,
   'size': 200,
   'dl_idx': '1899',
   'dl_mapping_code': 'K-001900',
   'dl_name': '보령부스파정 5mg',
   'dl_name_en': 'Buspar Tab. 5mg Boryung',
   'img_key': 'http://connectdi.com/design/img/drug/1Mxwka5v0lL.jpg',
   'dl_material': '부스피론염산염',
   'dl_material_en': 'Buspirone Hydrochloride',
   'dl_custom_shape': '정제, 저작정',
   'dl_company': '보령제약(주)',
   'dl_company_en': 'Boryung',
   'di_company_mf': '',
   'di_company_mf_en': '',
   'item_seq': 198700706,
   'di_item_permit_date': '19870323',
   'di_class_no': '[01170]정신신경용제',
   'di_etc_otc_code': '전문의약품',
   'di_edi_code': '641901280,A09302381',
   'chart': '이약은 양면볼록한 장방형의 흰색정제이다',
   'drug_shap

In [None]:
from collections import defaultdict

ann_images = {}
ann_type = {}
ann_annotations = {}
ann_categories = {}

for path in json_files:
    with open(path, 'r', encoding='utf-8') as f:
        json_file = json.load(f)
    for data in data.get('images, []'):
        


✅ COCO 병합 완료: 1489 images, 4526 anns, 73 cats


In [49]:
import os, json
import pandas as pd

records_images = []
records_annotations = []
records_categories = []

for path in json_files:  # json_files = [파일 경로 리스트]
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # images
    for img in data.get('images', []):
        img['source_file'] = os.path.basename(path)
        records_images.append(img)

    # annotations
    for ann in data.get('annotations', []):
        ann['source_file'] = os.path.basename(path)
        records_annotations.append(ann)

    # categories
    for cat in data.get('categories', []):
        cat['source_file'] = os.path.basename(path)
        records_categories.append(cat)

# DataFrame 생성
df_images = pd.DataFrame(records_images)
df_annotations = pd.DataFrame(records_annotations)
df_categories = pd.DataFrame(records_categories)

print(df_images.head())
print(df_annotations.head())
print(df_categories.head())


                                           file_name  width  height  \
0  K-001900-010224-016551-031705_0_2_0_2_70_000_2...    976    1280   
1  K-001900-010224-016551-031705_0_2_0_2_75_000_2...    976    1280   
2  K-001900-010224-016551-031705_0_2_0_2_90_000_2...    976    1280   
3  K-001900-010224-016551-031705_0_2_0_2_70_000_2...    976    1280   
4  K-001900-010224-016551-031705_0_2_0_2_75_000_2...    976    1280   

                                             imgfile    drug_N drug_S  \
0  K-001900-010224-016551-031705_0_2_0_2_70_000_2...  K-001900   정상알약   
1  K-001900-010224-016551-031705_0_2_0_2_75_000_2...  K-001900   정상알약   
2  K-001900-010224-016551-031705_0_2_0_2_90_000_2...  K-001900   정상알약   
3  K-001900-010224-016551-031705_0_2_0_2_70_000_2...  K-016551   정상알약   
4  K-001900-010224-016551-031705_0_2_0_2_75_000_2...  K-016551   정상알약   

  back_color drug_dir light_color  camera_la  ...  form_code_name  \
0     연회색 배경       앞면         주백색         70  ...              나정

In [None]:
df_merged = df_annotations.merge(
    df_images, left_on='image_id', right_on='id',
    suffixes=('_ann', '_img')
)
df_merged.to_csv("merged_annotations.csv", index=False)

In [53]:
df_images.head()

Unnamed: 0,file_name,width,height,imgfile,drug_N,drug_S,back_color,drug_dir,light_color,camera_la,...,form_code_name,mark_code_front_anal,mark_code_back_anal,mark_code_front_img,mark_code_back_img,mark_code_front,mark_code_back,change_date,id,source_file
0,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,976,1280,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,K-001900,정상알약,연회색 배경,앞면,주백색,70,...,나정,,,,,,,20160825,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
1,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,976,1280,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,K-001900,정상알약,연회색 배경,앞면,주백색,75,...,나정,,,,,,,20160825,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...
2,K-001900-010224-016551-031705_0_2_0_2_90_000_2...,976,1280,K-001900-010224-016551-031705_0_2_0_2_90_000_2...,K-001900,정상알약,연회색 배경,앞면,주백색,90,...,나정,,,,,,,20160825,1419,K-001900-010224-016551-031705_0_2_0_2_90_000_2...
3,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,976,1280,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,K-016551,정상알약,연회색 배경,앞면,주백색,70,...,필름코팅정,,,,,,,20190124,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
4,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,976,1280,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,K-016551,정상알약,연회색 배경,앞면,주백색,75,...,필름코팅정,,,,,,,20190124,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...


In [52]:
df_annotations.head()

Unnamed: 0,area,iscrowd,bbox,category_id,ignore,segmentation,id,image_id,source_file
0,33180,0,"[645, 859, 210, 158]",1899,0,[],5393,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
1,29008,0,"[128, 258, 196, 148]",1899,0,[],5395,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...
2,30044,0,"[144, 264, 203, 148]",1899,0,[],5394,1419,K-001900-010224-016551-031705_0_2_0_2_90_000_2...
3,128250,0,"[566, 131, 285, 450]",16550,0,[],5390,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
4,142848,0,"[109, 648, 288, 496]",16550,0,[],5391,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...


In [54]:
df_categories.head()

Unnamed: 0,supercategory,id,name,source_file
0,pill,1899,보령부스파정 5mg,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
1,pill,1899,보령부스파정 5mg,K-001900-010224-016551-031705_0_2_0_2_75_000_2...
2,pill,1899,보령부스파정 5mg,K-001900-010224-016551-031705_0_2_0_2_90_000_2...
3,pill,16550,동아가바펜틴정 800mg,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
4,pill,16550,동아가바펜틴정 800mg,K-001900-010224-016551-031705_0_2_0_2_75_000_2...


In [57]:
df_merged.head()

Unnamed: 0,area,iscrowd,bbox,category_id,ignore,segmentation,id_ann,image_id,source_file_ann,file_name,...,form_code_name,mark_code_front_anal,mark_code_back_anal,mark_code_front_img,mark_code_back_img,mark_code_front,mark_code_back,change_date,id_img,source_file_img
0,33180,0,"[645, 859, 210, 158]",1899,0,[],5393,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,...,나정,,,,,,,20160825,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
1,33180,0,"[645, 859, 210, 158]",1899,0,[],5393,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,...,필름코팅정,,,,,,,20190124,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
2,33180,0,"[645, 859, 210, 158]",1899,0,[],5393,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,K-001900-010224-016551-031705_0_2_0_2_70_000_2...,...,필름코팅정,Hanmi,,http://connectdi.com/design/img/drug/147938669...,,,,20200901,1417,K-001900-010224-016551-031705_0_2_0_2_70_000_2...
3,29008,0,"[128, 258, 196, 148]",1899,0,[],5395,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,...,나정,,,,,,,20160825,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...
4,29008,0,"[128, 258, 196, 148]",1899,0,[],5395,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,K-001900-010224-016551-031705_0_2_0_2_75_000_2...,...,필름코팅정,,,,,,,20190124,1418,K-001900-010224-016551-031705_0_2_0_2_75_000_2...
