In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/umr-ml-2025-hackathon-2/train.csv
/kaggle/input/umr-ml-2025-hackathon-2/test/test/7981.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/6234.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/1269.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/3863.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/6241.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/623.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/2193.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/3750.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/2008.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/5982.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/7737.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/2081.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/6588.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/7966.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/3919.jpg
/kaggle/input/umr-ml-2025-hackathon-2/test/test/6197.jpg
/kaggle/input/umr-ml-2025-hackathon-2/tes

In [2]:
# ==========================
# [1] 데이터 준비
# ==========================
import pandas as pd
import os

# 경로 설정
TRAIN_CSV = "/kaggle/input/umr-ml-2025-hackathon-2/train.csv"
TRAIN_IMG_DIR = "/kaggle/input/umr-ml-2025-hackathon-2/train/train"
TEST_IMG_DIR = "/kaggle/input/umr-ml-2025-hackathon-2/test/test"

# CSV 불러오기
df = pd.read_csv(TRAIN_CSV)

# 클래스 인코딩
labels = sorted(df["class"].unique())
label2idx = {label: idx for idx, label in enumerate(labels)}
idx2label = {idx: label for label, idx in label2idx.items()}

# 정수 인코딩 컬럼 추가
df["label_idx"] = df["class"].map(label2idx)

# 이미지 경로 추가
df["image_path"] = df["fn"].apply(lambda x: os.path.join(TRAIN_IMG_DIR, x))

# 확인
print("총 클래스 수:", len(labels))
print("라벨 인코딩:", label2idx)
print(df.head())

# ==========================
# [2] 이미지 불러오기 및 전처리
# ==========================
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np

def load_images(paths, size=(64, 64)):
    return np.array([img_to_array(load_img(p, target_size=size))/255.0 for p in paths])

X = load_images(df["image_path"])
y = pd.get_dummies(df["label_idx"]).values  # One-hot

# ==========================
# [3] 학습/검증 데이터 분할
# ==========================
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ==========================
# [4] 모델 정의
# ==========================
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(64,64,3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(labels), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# ==========================
# [5] 모델 학습
# ==========================
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(patience=3, restore_best_weights=True)

model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=15,
    batch_size=32,
    callbacks=[early_stop]
)

# ==========================
# [6] 테스트셋 예측
# ==========================
# 테스트 이미지 경로 가져오기
test_fns = sorted(os.listdir(TEST_IMG_DIR))  # 예: ['18001.jpg', ..., '27000.jpg']
test_paths = [os.path.join(TEST_IMG_DIR, fn) for fn in test_fns]

X_test = load_images(test_paths)

# 예측
y_pred_probs = model.predict(X_test)
y_pred_idx = np.argmax(y_pred_probs, axis=1)
y_pred_labels = [idx2label[idx] for idx in y_pred_idx]

# ==========================
# [7] 제출 파일 생성
# ==========================
submission = pd.DataFrame({
    "fn": test_fns,
    "class": y_pred_labels
})
submission.to_csv("submission.csv", index=False)
print(submission.head())


총 클래스 수: 10
라벨 인코딩: {'AnnualCrop': 0, 'Forest': 1, 'HerbaceousVegetation': 2, 'Highway': 3, 'Industrial': 4, 'Pasture': 5, 'PermanentCrop': 6, 'Residential': 7, 'River': 8, 'SeaLake': 9}
      fn                 class  label_idx  \
0  1.jpg                Forest          1   
1  2.jpg                 River          8   
2  3.jpg               Pasture          5   
3  4.jpg                 River          8   
4  5.jpg  HerbaceousVegetation          2   

                                          image_path  
0  /kaggle/input/umr-ml-2025-hackathon-2/train/tr...  
1  /kaggle/input/umr-ml-2025-hackathon-2/train/tr...  
2  /kaggle/input/umr-ml-2025-hackathon-2/train/tr...  
3  /kaggle/input/umr-ml-2025-hackathon-2/train/tr...  
4  /kaggle/input/umr-ml-2025-hackathon-2/train/tr...  


2025-07-18 17:08:00.174103: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752858480.369129      18 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752858480.422534      18 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1752858570.826755      18 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/15


I0000 00:00:1752858575.944672      62 service.cc:148] XLA service 0x7efd100048e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1752858575.945294      62 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1752858576.199168      62 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 41/450[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 4ms/step - accuracy: 0.1193 - loss: 2.3362

I0000 00:00:1752858578.344571      62 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.2918 - loss: 1.8289 - val_accuracy: 0.5744 - val_loss: 1.2188
Epoch 2/15
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5273 - loss: 1.2835 - val_accuracy: 0.6547 - val_loss: 0.9787
Epoch 3/15
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.6177 - loss: 1.0682 - val_accuracy: 0.6525 - val_loss: 0.9395
Epoch 4/15
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.6635 - loss: 0.9540 - val_accuracy: 0.7417 - val_loss: 0.7499
Epoch 5/15
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7119 - loss: 0.8240 - val_accuracy: 0.7556 - val_loss: 0.6819
Epoch 6/15
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7197 - loss: 0.7869 - val_accuracy: 0.7783 - val_loss: 0.6394
Epoch 7/15
[1m450/450[0m [32m━━━━━━━