In [None]:
# !pip install -U ultralytics

import os, shutil, glob, random
import kagglehub
from pathlib import Path
random.seed(42)

# Ships Dataset 다운로드
ds_path = kagglehub.dataset_download("oleksandershevchenko/ship-classification-dataset")
target_dir = os.path.join(os.getcwd(), "ship-classification-dataset")  # /ship-classification-dataset

if os.path.exists(target_dir):
    shutil.rmtree(target_dir)  # 기존 있으면 삭제
shutil.copytree(ds_path, target_dir)

print("Copied to:", target_dir)

dir_train = './ship-classification-dataset/ships_dataset/train/'
dir_valid = './ship-classification-dataset/ships_dataset/valid/'
dir_test = './ship-classification-dataset/ships_dataset/test/'

list_train = []
file_train = []
for dirname, _, filenames in os.walk(dir_train):
    for filename in filenames:
        file_train.append(os.path.join(dirname, filename))
        list_train.append(filename.split('_')[0])
list_valid = []
file_valid = []
for dirname, _, filenames in os.walk(dir_valid):
    for filename in filenames:
        file_valid.append(os.path.join(dirname, filename))
        list_valid.append(filename.split('_')[0])    
list_test = []
file_test = []
for dirname, _, filenames in os.walk(dir_test):
    for filename in filenames:
        file_test.append(os.path.join(dirname, filename))
        list_test.append(filename.split('_')[0])     

# file_train, list_train 을 묶어서 섞기
combined = list(zip(file_train, list_train))
random.shuffle(combined)
file_train, list_train = zip(*combined)
file_train = list(file_train)
list_train = list(list_train)   
combined = list(zip(file_valid, list_valid))
random.shuffle(combined)
file_valid, list_valid = zip(*combined)
file_valid = list(file_valid)
list_valid = list(list_valid)   
combined = list(zip(file_test, list_test))
random.shuffle(combined)
file_test, list_test = zip(*combined)
file_test = list(file_test)
list_test = list(list_test)   

for f in file_train[:10]:
    print(f)
for l in list_train[:10]:
    print(l)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import cv2

# --- 1. 데이터 불러오기 ---
img_size = (128, 128)  # 필요시 변경

def load_images(file_paths, labels, img_size):
    X, y = [], []
    for f, lbl in zip(file_paths, labels):
        img = cv2.imread(f)
        img = cv2.resize(img, img_size)
        X.append(img)
        y.append(lbl)
    return np.array(X, dtype="float32"), np.array(y)

X_train, y_train = load_images(file_train, list_train, img_size)
X_valid, y_valid = load_images(file_valid, list_valid, img_size)
X_test, y_test = load_images(file_test, list_test, img_size)  # load_images는 앞에서 정의한 함수

# 정규화
X_train /= 255.0
X_valid /= 255.0
X_test /= 255.0

# --- 2. 라벨 인코딩 ---
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_valid_enc = le.transform(y_valid)
y_test_enc = le.transform(y_test)

num_classes = len(le.classes_)
y_train_cat = to_categorical(y_train_enc, num_classes)
y_valid_cat = to_categorical(y_valid_enc, num_classes)
y_test_cat = to_categorical(y_test_enc, num_classes)

print("클래스:", le.classes_)