In [None]:
!pip install ultralytics



In [None]:
import os
import json

# 변환할 JSON 파일들이 들어있는 폴더 경로
input_dir = "/content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json"
output_dir = "/content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_txt"

# 출력 폴더가 없으면 생성
os.makedirs(output_dir, exist_ok=True)

# 모든 .json 파일 처리
for filename in os.listdir(input_dir):
    if filename.endswith(".json"):
        json_path = os.path.join(input_dir, filename)
        txt_path = os.path.join(output_dir, filename.replace(".json", ".txt"))

        # JSON 읽기
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # TXT로 저장 (JSON pretty-print 형태)
        with open(txt_path, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=4)

        print(f"변환 완료: {json_path} → {txt_path}")


print("작업종료")


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json'

In [None]:
# json파일의 x,y,w,h 바운딩박스의 값을 0~1의 값으로 변환하여 저장
# txt파일에는 class_id와 bbox의 값 x,y,w,h 값이 나오게 된다.
# 해당 과정을 통해 imgsize를 512에서 256으로 학습이 가능하게 됨
# 여러개의 bbox의 경우에는 줄바꿈으로 저장됨


import os
import json

input_dir = "/content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json"
output_dir = "/content/drive/MyDrive/mission1/labels/train/1TL_KS_BBOX_txt"
os.makedirs(output_dir, exist_ok=True)

# chi_id → YOLO 클래스 매핑
CLASS_MAP = {"1": 0}  # 굴뚝을 0번 클래스

for filename in os.listdir(input_dir):
    if filename.endswith(".json"):
        json_path = os.path.join(input_dir, filename)
        txt_path = os.path.join(output_dir, filename.replace(".json", ".txt"))

        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        lines = []
        for _, entry in data.items():
            img_w = int(entry["file_attributes"]["img_width"])
            img_h = int(entry["file_attributes"]["img_height"])

            for region in entry["regions"]:
                shape = region["shape_attributes"]
                attrs = region["region_attributes"]
                chi_id = attrs.get("chi_id")

                # 굴뚝만 처리
                if chi_id not in CLASS_MAP:
                    continue

                x = shape["x"]
                y = shape["y"]
                w = shape["width"]
                h = shape["height"]

                cx = (x + w/2) / img_w
                cy = (y + h/2) / img_h
                nw = w / img_w
                nh = h / img_h

                class_id = CLASS_MAP[chi_id]
                lines.append(f"{class_id} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}")

        # 라벨 저장
        with open(txt_path, "w", encoding="utf-8") as f:
            f.write("\n".join(lines))

        print(f"YOLO 변환 완료: {json_path} → {txt_path}")

print("작업종료")


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json'

In [None]:
# 각 클래스 갯수 확인

import os
import json
from collections import Counter

# JSON 파일들이 들어있는 폴더
input_dir = "/content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json"

chi_ids = []

# 모든 JSON 파일 탐색
for filename in os.listdir(input_dir):
    if filename.endswith(".json"):
        json_path = os.path.join(input_dir, filename)
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # JSON 구조에 따라 chi_id 추출
        for _, entry in data.items():
            for region in entry.get("regions", []):
                chi_id = region.get("region_attributes", {}).get("chi_id")
                if chi_id is not None:
                    chi_ids.append(str(chi_id))

# chi_id 분포 출력
counter = Counter(chi_ids)
print("발견된 chi_id 클래스 목록:")
for k, v in counter.items():
    print(f"  chi_id={k}: {v}개")


발견된 chi_id 클래스 목록:
  chi_id=1: 8078개
  chi_id=2: 1852개
  chi_id=3: 518개
  chi_id=4: 156개
  chi_id=5: 34개
  chi_id=6: 8개
  chi_id=2 : 1개
  chi_id=7: 3개


In [None]:
#json과 jpg 파일 합쳐서 시각화
#class_id가 해당 이미지속 굴뚝의 갯수를 확인하는 걸 알게됨

import os, json, cv2
from glob import glob

IMG_DIR = "C:/dcc/data/mission1/training/TS_KS/"
JSON_DIR = "C:/dcc/data/mission1/training/TL_KS_BBOX/"
IMG_EXTS = [".jpg",".PNG"]

files = sorted([os.path.splitext(f)[0] for f in os.listdir(JSON_DIR) if f.endswith(".json")])
if not files:
    print("[오류] JSON이 없습니다."); exit(1)

def find_image_path(img_dir, stem, preferred_filename=None):
    if preferred_filename:
        p = os.path.join(img_dir, preferred_filename)
        if os.path.exists(p):
            return p
        base = os.path.splitext(preferred_filename)[0]
        for ext in IMG_EXTS:
            p2 = os.path.join(img_dir, base + ext)
            if os.path.exists(p2):
                return p2
    for ext in IMG_EXTS:
        p = os.path.join(img_dir, stem + ext)
        if os.path.exists(p):
            return p
    matches = glob(os.path.join(img_dir, stem + ".*"))
    return matches[0] if matches else None

idx = 0
win = "viewer"
cv2.namedWindow(win, cv2.WINDOW_NORMAL)
cv2.resizeWindow(win, 900, 900)

def render(i):
    name = files[i]
    json_path = os.path.join(JSON_DIR, name + ".json")
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    shown = False
    for _, entry in data.items():
        fname = entry.get("filename", None)
        stem  = os.path.splitext(fname)[0] if fname else name
        img_path = find_image_path(IMG_DIR, stem, preferred_filename=fname)

        print("이미지 경로 확인:", img_path)
        if not img_path:
            print(f"[경고] 이미지 없음 → 패스: {stem}")
            continue

        # 한글 경로/특수문자 우회 로딩
        try:
            img = cv2.imread(img_path)
            if img is None:
                raise RuntimeError("cv2.imread failed")
        except:
            import numpy as np
            with open(img_path, "rb") as f:
                buf = np.frombuffer(f.read(), np.uint8)
            img = cv2.imdecode(buf, cv2.IMREAD_COLOR)

        for region in entry.get("regions", []):
            s  = region["shape_attributes"]
            ra = region["region_attributes"]
            chi_id = ra.get("chi_id", "?")
            x, y = int(s["x"]), int(s["y"])
            w, h = int(s["width"]), int(s["height"])
            cv2.rectangle(img, (x,y), (x+w,y+h), (0,0,255), 2)
            cv2.putText(img, f"class_id:{chi_id}", (x, max(y-5,0)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)

        cv2.setWindowTitle(win, f"{os.path.basename(img_path)} ({i+1}/{len(files)})")
        cv2.imshow(win, img)
        shown = True

    if not shown:
        # 빈 캔버스라도 띄워서 넘기기 가능케 함
        blank = 255 * (cv2.UMat(900, 900, cv2.CV_8UC3).get() * 0 + 1)
        cv2.putText(blank, "No displayable image", (40, 450),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
        cv2.imshow(win, blank)

# 초기 렌더
render(idx)
print("[안내] ←/→ 또는 A/D: 이전/다음, ESC: 종료")

while True:
    key = cv2.waitKey(0) & 0xFF
    # 화살표키: 81(←), 83(→)  /  A:97, D:100
    if key in (27,):  # ESC
        break
    elif key in (81, 97):  # left or 'a'
        idx = (idx - 1) % len(files)
        render(idx)
    elif key in (83, 100):  # right or 'd'
        idx = (idx + 1) % len(files)
        render(idx)

cv2.destroyAllWindows()


In [None]:
# train 데이터 변환
# json파일의 x,y,w,h 바운딩박스의 값을 0~1의 값으로 변환하여 저장
# txt파일에는 class_id와 bbox의 값 x,y,w,h 값이 나오게 된다.
# 해당 과정을 통해 imgsize를 512에서 256으로 학습이 가능하게 됨
# 여러개의 bbox의 경우에는 줄바꿈으로 저장됨
# chi_id가 1~7이면 모두 굴뚝(class 0)으로 처리

import os
import json

input_dir = "/content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json"
output_dir = "/content/drive/MyDrive/mission1/labels/train/1TL_KS_BBOX_txt"
os.makedirs(output_dir, exist_ok=True)

# chi_id 1~7 → class 0 (굴뚝)
CLASS_WHITELIST = {str(i) for i in range(1, 8)}  # {"1","2","3","4","5","6","7"}
CHIMNEY_CLASS_ID = 0 #굴뚝이 있다

for filename in os.listdir(input_dir):
    if not filename.endswith(".json"):
        continue

    json_path = os.path.join(input_dir, filename)
    txt_path = os.path.join(output_dir, filename.replace(".json", ".txt"))

    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    lines = []
    # 최상위 키마다 entry 구조 순회
    for _, entry in data.items():
        fa = entry.get("file_attributes", {})
        try:
            img_w = int(float(fa.get("img_width", 0)))
            img_h = int(float(fa.get("img_height", 0)))
        except Exception:
            img_w, img_h = 0, 0
        if img_w <= 0 or img_h <= 0:
            # 이미지 크기 없으면 스킵
            continue

        for region in entry.get("regions", []):
            shape = region.get("shape_attributes", {})
            attrs = region.get("region_attributes", {})

            chi_id = attrs.get("chi_id")
            if chi_id is None:
                continue
            chi_id = str(chi_id).strip()

            # 1~7만 굴뚝으로 사용
            if chi_id not in CLASS_WHITELIST:
                continue

            # 좌표 읽기
            try:
                x = float(shape["x"])
                y = float(shape["y"])
                w = float(shape["width"])
                h = float(shape["height"])
            except Exception:
                continue

            if w <= 0 or h <= 0:
                continue

            # YOLO 정규화 (cx, cy, w, h)
            cx = (x + w/2) / img_w
            cy = (y + h/2) / img_h
            nw =  w / img_w
            nh =  h / img_h

            # 굴뚝 class 0으로 저장
            lines.append(f"{CHIMNEY_CLASS_ID} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}")

    # 라벨 저장 (없으면 빈 파일 생성될 수 있음 → 원치 않으면 if lines: 로 감싸기)
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))

    print(f"YOLO 변환 완료: {json_path} → {txt_path}")

print("작업종료")


[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json/K3_CHN_20161206051028_29.json → /content/drive/MyDrive/mission1/labels/train/1TL_KS_BBOX_txt/K3_CHN_20161206051028_29.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json/K3_CHN_20161204053052_31.json → /content/drive/MyDrive/mission1/labels/train/1TL_KS_BBOX_txt/K3_CHN_20161204053052_31.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json/K3_CHN_20161206051028_21.json → /content/drive/MyDrive/mission1/labels/train/1TL_KS_BBOX_txt/K3_CHN_20161206051028_21.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json/K3_CHN_20170101053324_31.json → /content/drive/MyDrive/mission1/labels/train/1TL_KS_BBOX_txt/K3_CHN_20170101053324_31.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/train/TL_KS_BBOX_json/K3_CHN_20161229051642_6.json → /content/drive/MyDrive/mission1/labels/train/1TL_KS_BBOX_txt/K3_CH

In [None]:
# valid 데이터 변환
# json파일의 x,y,w,h 바운딩박스의 값을 0~1의 값으로 변환하여 저장
# txt파일에는 class_id와 bbox의 값 x,y,w,h 값이 나오게 된다.
# 해당 과정을 통해 imgsize를 512에서 256으로 학습이 가능하게 됨
# 여러개의 bbox의 경우에는 줄바꿈으로 저장됨
# chi_id가 1~7이면 모두 굴뚝(class 0)으로 처리

import os
import json

input_dir = "/content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX_json"
output_dir = "/content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX"
os.makedirs(output_dir, exist_ok=True)

# chi_id 1~7 → class 0 (굴뚝)
CLASS_WHITELIST = {str(i) for i in range(1, 8)}  # {"1","2","3","4","5","6","7"}
CHIMNEY_CLASS_ID = 0

for filename in os.listdir(input_dir):
    if not filename.endswith(".json"):
        continue

    json_path = os.path.join(input_dir, filename)
    txt_path = os.path.join(output_dir, filename.replace(".json", ".txt"))

    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    lines = []
    # 최상위 키마다 entry 구조 순회
    for _, entry in data.items():
        fa = entry.get("file_attributes", {})
        try:
            img_w = int(float(fa.get("img_width", 0)))
            img_h = int(float(fa.get("img_height", 0)))
        except Exception:
            img_w, img_h = 0, 0
        if img_w <= 0 or img_h <= 0:
            # 이미지 크기 없으면 스킵
            continue

        for region in entry.get("regions", []):
            shape = region.get("shape_attributes", {})
            attrs = region.get("region_attributes", {})

            chi_id = attrs.get("chi_id")
            if chi_id is None:
                continue
            chi_id = str(chi_id).strip()

            # 1~7만 굴뚝으로 사용
            if chi_id not in CLASS_WHITELIST:
                continue

            # 좌표 읽기
            try:
                x = float(shape["x"])
                y = float(shape["y"])
                w = float(shape["width"])
                h = float(shape["height"])
            except Exception:
                continue

            if w <= 0 or h <= 0:
                continue

            # YOLO 정규화 (cx, cy, w, h)
            cx = (x + w/2) / img_w
            cy = (y + h/2) / img_h
            nw =  w / img_w
            nh =  h / img_h

            # 굴뚝 class 0으로 저장
            lines.append(f"{CHIMNEY_CLASS_ID} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}")

    # 라벨 저장 (없으면 빈 파일 생성될 수 있음 → 원치 않으면 if lines: 로 감싸기)
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))

    print(f"YOLO 변환 완료: {json_path} → {txt_path}")

print("작업종료")


YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX_json/K3A_CHN_20191204050655_55.json → /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX/K3A_CHN_20191204050655_55.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX_json/K3A_CHN_20191231050844_15.json → /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX/K3A_CHN_20191231050844_15.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX_json/K3A_CHN_20190513050616_2.json → /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX/K3A_CHN_20190513050616_2.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX_json/K3A_CHN_20170723050144_20.json → /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX/K3A_CHN_20170723050144_20.txt
YOLO 변환 완료: /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX_json/K3A_CHN_20210114052054_22.json → /content/drive/MyDrive/mission1/labels/valid/VL_KS_BBOX/K3A_CHN_20210114052054_22.txt
YOLO 변환 완료: /content/drive/MyDrive/mission

In [None]:
!pip install ultralytics==8.*

Collecting ultralytics==8.*
  Downloading ultralytics-8.3.191-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics==8.*)
  Downloading ultralytics_thop-2.0.16-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.191-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.16-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.191 ultralytics-thop-2.0.16


In [None]:
from ultralytics import YOLO

# 1) 모델 로드 (yolov8m)
model = YOLO("yolov8m.pt")

# 2) 학습
results = model.train(
    data="/content/drive/MyDrive/mission1/data.yaml",  # 데이터셋 yaml 경로
    epochs=100,
    imgsz=512,          # 256/320/640 등 가능 (라벨은 정규화라 그대로 사용)
    batch=16,
    workers=4,          # 로더 스레드 수 (환경에 맞게)
    project="runs",     # 결과 저장 루트
    name="yolov8m_chimney",  # 실험명
    exist_ok=False,      # 같은 이름 덮어쓰기 허용
    verbose=True,
    patience=20,        # 20epoch 이후에도 성능 향상이 없을시 종료
    seed=42,
)

# 3) 검증(선택: best.pt로 val set 평가)
metrics = model.val(data="/content/drive/MyDrive/mission1/data.yaml")

# 4) 예측(시각화 저장)
pred = model.predict(
    source="/content/drive/MyDrive/mission1/images/val",  # 이미지/폴더/동영상 가능
    conf=0.25,
    imgsz=512,
    save=True,
    project="runs",
    name="yolov8m_chimney_pred",
    exist_ok=True
)


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt': 100% ━━━━━━━━━━━━ 49.7/49.7MB 177.3MB/s 0.3s
Ultralytics 8.3.191 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/mission1/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.

FileNotFoundError: /content/drive/MyDrive/mission1/images/val does not exist