In [None]:
# CIFAR-100 데이터 불러오기
import numpy as np
import pickle
import os

# 1. 데이터 다운로드 및 압축 풀기
!wget -q https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
!tar -xzf cifar-100-python.tar.gz

# 2. 데이터 로딩 함수
def load_cifar100(data_dir='/content/cifar-100-python'):
    def load_file(filename):
        with open(filename, 'rb') as f:
            data = pickle.load(f, encoding='latin1')
        images = data['data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
        images = images.astype('float32') / 255.0  # 정규화
        fine_labels = np.array(data['fine_labels'])
        coarse_labels = np.array(data['coarse_labels'])
        return images, fine_labels, coarse_labels

    train_images, train_fine_labels, train_coarse_labels = load_file(os.path.join(data_dir, 'train'))
    test_images, test_fine_labels, test_coarse_labels = load_file(os.path.join(data_dir, 'test'))
    return (train_images, train_fine_labels, train_coarse_labels), (test_images, test_fine_labels, test_coarse_labels)

# 3. 데이터 불러오기
(train_images, train_fine_labels, train_coarse_labels), (test_images, test_fine_labels, test_coarse_labels) = load_cifar100()

# 4. 확인
print("Train images:", train_images.shape)
print("Train fine labels:", train_fine_labels.shape)
print("Test images:", test_images.shape)
print("Test fine labels:", test_fine_labels.shape)

Train images: (50000, 32, 32, 3)
Train fine labels: (50000,)
Test images: (10000, 32, 32, 3)
Test fine labels: (10000,)


In [None]:
import numpy as np
import os

class SimpleFeatureMLP:
    def __init__(self, input_size=768, hidden_sizes=[512, 256], output_size=100):
        self.params = {}
        self.init_weights(input_size, hidden_sizes, output_size)

    def init_weights(self, input_size, hidden_sizes, output_size):
        layer_sizes = [input_size] + hidden_sizes + [output_size]
        for i in range(len(layer_sizes) - 1):
            fan_in, fan_out = layer_sizes[i], layer_sizes[i+1]
            limit = np.sqrt(6 / (fan_in + fan_out))  # Xavier 초기화
            self.params[f'W{i+1}'] = np.random.uniform(-limit, limit, (fan_in, fan_out)).astype(np.float32)
            self.params[f'b{i+1}'] = np.zeros((1, fan_out), dtype=np.float32)

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return (x > 0).astype(float)

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, X):
        cache = {}
        A = X
        num_layers = len(self.params) // 2

        for i in range(1, num_layers):
            Z = np.dot(A, self.params[f'W{i}']) + self.params[f'b{i}']
            A = self.relu(Z)
            cache[f'Z{i}'] = Z
            cache[f'A{i}'] = A

        Z_final = np.dot(A, self.params[f'W{num_layers}']) + self.params[f'b{num_layers}']
        A_final = self.softmax(Z_final)
        cache[f'Z{num_layers}'] = Z_final
        cache[f'A{num_layers}'] = A_final

        return A_final, cache

    def compute_loss(self, y_true, y_pred):
        n_samples = y_true.shape[0]
        log_probs = -np.log(y_pred[np.arange(n_samples), y_true] + 1e-8)
        return np.sum(log_probs) / n_samples

    def backward(self, X, y_true, cache):
        grads = {}
        num_layers = len(self.params) // 2
        n_samples = X.shape[0]

        A_final = cache[f'A{num_layers}']
        delta = A_final
        delta[np.arange(n_samples), y_true] -= 1
        delta /= n_samples

        grads[f'W{num_layers}'] = np.dot(cache[f'A{num_layers-1}'].T, delta)
        grads[f'b{num_layers}'] = np.sum(delta, axis=0, keepdims=True)

        for i in reversed(range(1, num_layers)):
            delta = np.dot(delta, self.params[f'W{i+1}'].T) * self.relu_derivative(cache[f'Z{i}'])
            A_prev = X if i == 1 else cache[f'A{i-1}']
            grads[f'W{i}'] = np.dot(A_prev.T, delta)
            grads[f'b{i}'] = np.sum(delta, axis=0, keepdims=True)

        return grads

    def update_params(self, grads, learning_rate):
        num_layers = len(self.params) // 2
        for i in range(1, num_layers + 1):
            self.params[f'W{i}'] -= learning_rate * grads[f'W{i}']
            self.params[f'b{i}'] -= learning_rate * grads[f'b{i}']

    def save(self, filename):
        np.savez(filename, **self.params)

    def load(self, filename):
        data = np.load(filename)
        for key in self.params.keys():
            self.params[key] = data[key]

    def train(self, X_train_full, y_train_full, epochs, learning_rate, batch_size=128, save_path=None, validation_split=0.1):
        n_samples = X_train_full.shape[0]
        n_train = int(n_samples * (1 - validation_split))

        X_train = X_train_full[:n_train]
        y_train = y_train_full[:n_train]
        X_val = X_train_full[n_train:]
        y_val = y_train_full[n_train:]

        num_batches = n_train // batch_size

        for epoch in range(1, epochs + 1):
            indices = np.arange(n_train)
            np.random.shuffle(indices)
            X_train = X_train[indices]
            y_train = y_train[indices]

            epoch_loss = 0

            for batch_idx in range(num_batches):
                start = batch_idx * batch_size
                end = start + batch_size
                X_batch = X_train[start:end]
                y_batch = y_train[start:end]

                y_pred, cache = self.forward(X_batch)
                loss = self.compute_loss(y_batch, y_pred)
                grads = self.backward(X_batch, y_batch, cache)
                self.update_params(grads, learning_rate)

                epoch_loss += loss

            avg_train_loss = epoch_loss / num_batches
            train_acc = self.evaluate(X_train, y_train)
            val_acc = self.evaluate(X_val, y_val)

            y_val_pred, _ = self.forward(X_val)
            val_loss = self.compute_loss(y_val, y_val_pred)

            if save_path is not None:
                self.save(save_path)

            print(f"Epoch {epoch}: Train Loss={avg_train_loss:.4f}, Val Loss={val_loss:.4f}, Train Acc={train_acc:.4f}, Val Acc={val_acc:.4f}")

    def evaluate(self, X, y_true):
        y_pred, _ = self.forward(X)
        predictions = np.argmax(y_pred, axis=1)
        accuracy = np.mean(predictions == y_true)
        return accuracy

In [None]:
import os

# ✅ 데이터 준비
# (32x32x3 이미지를 16x16x3으로 다운샘플링 + Flatten)
def downsample_images(images):
    # 2픽셀마다 하나씩 추출해서 16x16으로 줄이기
    return images[:, ::2, ::2, :]  # (50000, 16, 16, 3)

X_train = downsample_images(train_images)
X_train = X_train.reshape(-1, 16*16*3).astype(np.float32)  # (50000, 768)
y_train = train_fine_labels

# ✅ 모델 생성
model = SimpleFeatureMLP(input_size=16*16*3, hidden_sizes=[512, 256], output_size=100)

# ✅ 저장 경로
model_save_path = '/content/optimized_simple_feature_mlp_checkpoint.npz'

# ✅ 저장된 모델 불러오기
if os.path.exists(model_save_path):
    print("저장된 모델을 불러옵니다...")
    model.load(model_save_path)
else:
    print("새 모델로 학습을 시작합니다...")

# ✅ 학습 설정
epochs = 50
learning_rate = 0.01
batch_size = 128

# ✅ 학습 시작
model.train(X_train, y_train, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, save_path=model_save_path)

# ✅ 최종 정확도 출력
final_accuracy = model.evaluate(X_train, y_train)
print(f"\n최종 학습 정확도: {final_accuracy:.4f}")

새 모델로 학습을 시작합니다...
Epoch 1: Train Loss=4.5724, Val Loss=4.5195, Train Acc=0.0406, Val Acc=0.0416
Epoch 2: Train Loss=4.4512, Val Loss=4.3931, Train Acc=0.0588, Val Acc=0.0562
Epoch 3: Train Loss=4.3062, Val Loss=4.2525, Train Acc=0.0746, Val Acc=0.0694
Epoch 4: Train Loss=4.1692, Val Loss=4.1402, Train Acc=0.0838, Val Acc=0.0802
Epoch 5: Train Loss=4.0724, Val Loss=4.0715, Train Acc=0.0980, Val Acc=0.0904
Epoch 6: Train Loss=4.0056, Val Loss=4.0218, Train Acc=0.1083, Val Acc=0.0958
Epoch 7: Train Loss=3.9534, Val Loss=3.9706, Train Acc=0.1166, Val Acc=0.1072
Epoch 8: Train Loss=3.9074, Val Loss=3.9344, Train Acc=0.1208, Val Acc=0.1132
Epoch 9: Train Loss=3.8664, Val Loss=3.8934, Train Acc=0.1309, Val Acc=0.1180
Epoch 10: Train Loss=3.8284, Val Loss=3.8637, Train Acc=0.1359, Val Acc=0.1194
Epoch 11: Train Loss=3.7934, Val Loss=3.8363, Train Acc=0.1392, Val Acc=0.1268
Epoch 12: Train Loss=3.7614, Val Loss=3.8072, Train Acc=0.1428, Val Acc=0.1328
Epoch 13: Train Loss=3.7325, Val Loss=3.78

In [None]:
# 모델 생성
model = SimpleFeatureMLP(input_size=16*16*3, hidden_sizes=[512, 256], output_size=100)

# 저장된 모델 불러오기
model_save_path = '/content/optimized_simple_feature_mlp_checkpoint.npz'

if os.path.exists(model_save_path):
    print("저장된 모델을 불러옵니다...")
    model.load(model_save_path)
else:
    print("모델을 찾을 수 없습니다. 새로 학습을 시작합니다.")

# 추가 학습 설정
epochs = 50  # 추가로 50 에폭
learning_rate = 0.01
batch_size = 128

# 학습 시작
model.train(X_train, y_train, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, save_path=model_save_path)

# 최종 정확도 출력
final_accuracy = model.evaluate(X_train, y_train)
print(f"\n추가 학습 후 최종 학습 정확도: {final_accuracy:.4f}")

저장된 모델을 불러옵니다...
Epoch 1: Train Loss=3.2258, Val Loss=3.4038, Train Acc=0.2417, Val Acc=0.2030
Epoch 2: Train Loss=3.2176, Val Loss=3.3965, Train Acc=0.2402, Val Acc=0.1992
Epoch 3: Train Loss=3.2090, Val Loss=3.3888, Train Acc=0.2473, Val Acc=0.2018
Epoch 4: Train Loss=3.1998, Val Loss=3.3791, Train Acc=0.2457, Val Acc=0.2016
Epoch 5: Train Loss=3.1913, Val Loss=3.3744, Train Acc=0.2461, Val Acc=0.2002
Epoch 6: Train Loss=3.1827, Val Loss=3.3803, Train Acc=0.2483, Val Acc=0.2022
Epoch 7: Train Loss=3.1748, Val Loss=3.3738, Train Acc=0.2489, Val Acc=0.2014
Epoch 8: Train Loss=3.1665, Val Loss=3.3587, Train Acc=0.2492, Val Acc=0.2094
Epoch 9: Train Loss=3.1574, Val Loss=3.3601, Train Acc=0.2521, Val Acc=0.2046
Epoch 10: Train Loss=3.1496, Val Loss=3.3581, Train Acc=0.2529, Val Acc=0.2064
Epoch 11: Train Loss=3.1422, Val Loss=3.3469, Train Acc=0.2572, Val Acc=0.2086
Epoch 12: Train Loss=3.1348, Val Loss=3.3452, Train Acc=0.2599, Val Acc=0.2076
Epoch 13: Train Loss=3.1263, Val Loss=3.3411