### 1.전처리
- feature: 각 이미지에 대해 HSV, LAB, RGB 평균/표준편차 통계/ 피부 채도 비율 (로우 채도 피부인지)/ 대표 RGB dominant 색상 2개
- 이미지 500 -> 2000로 늘림

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from tqdm import tqdm

# 기본 설정
dataset_path = '/content/drive/MyDrive/dataset_for_capstone'
seasons = ['spring_cropped', 'summer_cropped', 'autumn_cropped', 'winter_cropped']
output_csv = '/content/drive/MyDrive/personal_color_features.csv'

In [2]:
# HSV 기반 피부 마스크 함수
def skin_mask_hsv(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lower = np.array([0, 20, 70], dtype=np.uint8)
    upper = np.array([50, 255, 255], dtype=np.uint8)
    mask = cv2.inRange(hsv, lower, upper)
    return mask

# Dominant color 추출 함수
def extract_dominant_color(img, mask, k=2):
    masked_img = cv2.bitwise_and(img, img, mask=mask)
    pixels = masked_img[mask > 0]
    if len(pixels) == 0:
        return [0, 0, 0] * k
    kmeans = KMeans(n_clusters=k, n_init='auto')
    kmeans.fit(pixels)
    centers = kmeans.cluster_centers_.astype(int)
    return centers.flatten().tolist()

# feature 추출
def extract_features(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None

    img = cv2.resize(img, (224, 224))
    mask = skin_mask_hsv(img)

    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    rgb = img

    skin_indices = np.where(mask > 0)

    if len(skin_indices[0]) < 50:
        return None

    def channel_stats(channel):
        values = channel[skin_indices]
        return np.mean(values), np.std(values)

    # HSV
    mean_H, std_H = channel_stats(hsv[:, :, 0])
    mean_S, std_S = channel_stats(hsv[:, :, 1])
    mean_V, std_V = channel_stats(hsv[:, :, 2])
    # LAB
    mean_L, std_L = channel_stats(lab[:, :, 0])
    mean_a, std_a = channel_stats(lab[:, :, 1])
    mean_b, std_b = channel_stats(lab[:, :, 2])
    # RGB
    mean_R, std_R = channel_stats(rgb[:, :, 2])
    mean_G, std_G = channel_stats(rgb[:, :, 1])
    mean_B, std_B = channel_stats(rgb[:, :, 0])

    S_vals = hsv[:, :, 1][skin_indices]
    low_s_ratio = np.sum(S_vals < 40) / len(S_vals)

    dom_colors = extract_dominant_color(img, mask, k=2)

    return [
        mean_H, std_H, mean_S, std_S, mean_V, std_V,
        mean_L, std_L, mean_a, std_a, mean_b, std_b,
        mean_R, std_R, mean_G, std_G, mean_B, std_B,
        low_s_ratio, *dom_colors
    ]


In [None]:
# 전체 반복
results = []
for season in seasons:
    folder = os.path.join(dataset_path, f"{season}_cropped")
    for fname in tqdm(os.listdir(folder), desc=season):
        if not fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue
        fpath = os.path.join(folder, fname)
        features = extract_features(fpath)
        if features:
            results.append([fname, season] + features)

In [None]:
columns = ['image_name', 'season',
           'mean_H', 'std_H', 'mean_S', 'std_S', 'mean_V', 'std_V',
           'mean_L', 'std_L', 'mean_a', 'std_a', 'mean_b', 'std_b',
           'mean_R', 'std_R', 'mean_G', 'std_G', 'mean_B', 'std_B',
           'low_s_ratio',
           'dom1_R', 'dom1_G', 'dom1_B', 'dom2_R', 'dom2_G', 'dom2_B']

# CSV 저장
df = pd.DataFrame(results, columns=columns)
df.to_csv(output_csv, index=False)
print(f"✅ Feature CSV saved to: {output_csv}")

### 2. CNN 모델 개발

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

In [None]:

csv_path = '/content/drive/MyDrive/personal_color_result.csv'
data = pd.read_csv(csv_path)

X = data[['H', 'S', 'V']].values  # HSV 입력
y = data[['spring', 'summer', 'autumn', 'winter']].values  # 원-핫 인코딩된 계절 라벨

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# CNN 모델에 맞게 차원 확장
X_train = X_train.reshape(-1, 3, 1)
X_val = X_val.reshape(-1, 3, 1)

In [None]:

# CNN 모델 구성
model = Sequential([
    Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(3, 1)),
    Dropout(0.3),
    Conv1D(filters=64, kernel_size=2, activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(4, activation='softmax')  # 출력 4개 (계절)

])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

#모델 학습
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=50,
                    batch_size=32,
                    callbacks=[early_stopping])

In [None]:
plt.figure(figsize=(12,5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('CNN Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# 데이터 로드
data = pd.read_csv('/content/drive/MyDrive/personal_color_result.csv')

# 입력 X, 라벨 y (단일 라벨 인코딩)
X = data[['H', 'S', 'V']].values
y_labels = data[['spring', 'summer', 'autumn', 'winter']].idxmax(axis=1)
label_map = {'spring':0, 'summer':1, 'autumn':2, 'winter':3}
y = y_labels.map(label_map).values
y = to_categorical(y, 4)

# 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 간단한 모델 함수
def create_model():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(3,)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(4, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# 교차검증
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
acc_per_fold = []

for train_idx, val_idx in kfold.split(X_scaled, y_labels):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    model = create_model()
    model.fit(X_train, y_train, epochs=30, batch_size=16, verbose=0)
    scores = model.evaluate(X_val, y_val, verbose=0)
    acc_per_fold.append(scores[1])
    print(f'Validation Accuracy: {scores[1]:.4f}')

print(f'Average Validation Accuracy: {np.mean(acc_per_fold):.4f}')