In [4]:
import pickle
import numpy as np
import os

In [26]:
import matplotlib.pyplot as plt
#from dataset.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from common.multi_layer_net_extend import MultiLayerNetExtend
from common.util import shuffle_dataset
from common.optimizer import SGD
from common.functions import softmax

In [64]:
#데이터셋 불러오기
#나중에 .py 파일로 뺄것.
def load_cifar100(data_dir='./cifar-100-python', validation_rate=0.2):
    def unpickle(file):
        with open(file, 'rb') as fo:
            return pickle.load(fo, encoding='bytes')

    # 데이터 로딩
    train = unpickle(os.path.join(data_dir, 'train'))
    test = unpickle(os.path.join(data_dir, 'test'))
    meta = unpickle(os.path.join(data_dir, 'meta'))

    # 이미지 정규화 및 reshape
    x_train = train[b'data'].reshape(-1, 3, 32, 32).astype(np.float32) / 255.0
    x_test = test[b'data'].reshape(-1, 3, 32, 32).astype(np.float32) / 255.0

    # 라벨 분리 (fine, coarse)
    y_train_fine = np.array(train[b'fine_labels'])
    y_test_fine = np.array(test[b'fine_labels'])

    y_train_coarse = np.array(train[b'coarse_labels'])
    y_test_coarse = np.array(test[b'coarse_labels'])

    # --- 셔플 후 validation 분할 ---
    from common.util import shuffle_dataset
    x_train, y_train_fine = shuffle_dataset(x_train, y_train_fine)
    _, y_train_coarse = shuffle_dataset(x_train, y_train_coarse)

    validation_num = int(x_train.shape[0] * validation_rate)

    x_val = x_train[:validation_num]
    y_val = y_train_fine[:validation_num]

    x_train = x_train[validation_num:]
    y_train = y_train_fine[validation_num:]
    y_train_coarse = y_train_coarse[validation_num:]

    return (x_train, y_train), (x_val, y_val), (x_test, y_test_fine), (y_train_coarse, y_test_coarse)


In [65]:
#데이터셋 사용
data_dir = './cifar-100-python'  # 이 위치에 압축 푼 폴더가 있어야 해

# 데이터 로딩 및 분할
(x_train, y_train), (x_valid, y_valid), (x_test, y_test), (y_train_coarse, y_test_coarse) = load_cifar100()

#예시 출력
print("Fine label 예시 (y_train):", y_train[:10])
print("Coarse label 예시 (y_train_coarse):", y_train_coarse[:10])

Fine label 예시 (y_train): [31 79 74 40 12 71 58  2 10 88]
Coarse label 예시 (y_train_coarse): [ 7  2 13 17  8  5  9  0  9  3]


In [78]:
#데이터셋 크기 설정
print("전체 training data 수:", x_train.shape[0])
desired_num=40000
x_train = x_train[:desired_num]
y_train = y_train[:desired_num]
y_train_coarse = y_train_coarse[:desired_num]

전체 training data 수: 4000


In [79]:
##데이터셋 셔플
# 총 학습 데이터 개수 계산
num_train = x_train.shape[0]
num_valid = x_valid.shape[0]
num_test = x_test.shape[0]

#1. 섞을 순서 만들기.
shuffle_idx = np.random.permutation(num_train)

#2. 이미지와 fine label, coars label을 같은 순서로 섞기.
x_train = x_train[shuffle_idx]
y_train = y_train[shuffle_idx]
y_train_coarse = y_train_coarse[shuffle_idx]

# 검증 비율 설정
validation_rate = 0.2
validation_num = int(num_train * validation_rate)

# 검증 데이터 분리
x_val = x_train[:validation_num]
y_val = y_train[:validation_num]
y_val_coarse = y_train_coarse[:validation_num]

# 학습 데이터 재설정
x_train = x_train[validation_num:]
y_train = y_train[validation_num:]
y_train_coarse = y_train_coarse[validation_num:]


In [80]:
# 학습용 데이터 추출
num_train = x_train.shape[0]
idx_train = np.arange(num_train)

# 검증용 데이터 추출
num_valid = x_val.shape[0]
idx_valid = np.arange(num_valid)

# 평가용 데이터 추출
num_test = x_test.shape[0]
idx_test = np.arange(num_test)

In [81]:
#신경망 하이퍼파라미터 설정
input_size = 3072                   # 입력 차원 (32X32X3) #RGB
output_size = 100                   # 출력 차원 (100 클래스) #fine label
hideen_size_list = [100, 50]        # 은닉층 2개

max_epochs = 1000                     # Epoch 횟수
batch_size = 100                    # mini-batch 개수
learning_rate = 0.01                # 학습률
weight_decay_lambda = 0.01           # 가중치 감소 계수 (정규화 항)
activation = 'relu'
weight_init_std = 'he'              # 가중치 초기화 방식 ('xavier', 'he', 1.0)
use_batchnorm = True                # 배치 정규화 사용 여부

In [82]:
# --- 다층 신경망 인스턴스 생성 ---
# --- MLP---
network = MultiLayerNetExtend(
    input_size=input_size,
    hidden_size_list=hideen_size_list,
    output_size=output_size,
    activation=activation,
    weight_init_std=weight_init_std,
    weight_decay_lambda=weight_decay_lambda,
    use_batchnorm=use_batchnorm
)

In [83]:
# --- 옵티마이저 설정 ---
optimizer = SGD(lr=learning_rate)
# optimizer = Momentum(lr=learning_rate)
# optimizer = AdaGrad(lr=learning_rate)
# optimizer = Adam(lr=learning_rate)

In [84]:
# --- 학습 로그 저장용 리스트 초기화 ---
train_loss_list = []
train_acc_list = []
train_loss_per_epoch_list = []
train_acc_per_epoch_list = []

valid_loss_list = []
valid_acc_list = []
valid_loss_per_epoch_list = []
valid_acc_per_epoch_list = []

test_loss_list = []
test_acc_list = []
test_loss_per_epoch_list = []
test_acc_per_epoch_list = []


In [85]:
# --- 1에폭당 반복 수 계산 ---
train_per_epoch = max(num_train / batch_size, 1)
valid_per_epoch = max(num_valid / batch_size, 1)
test_per_epoch = max(num_test / batch_size, 1)

In [86]:
# --- 로그 데이터 저장 함수 ---
# --- 나중에 .py 파일로 뺄것.
import os
import numpy as np

def save_logs_npy(path_prefix='logs/', logs_dict={}):
    os.makedirs(path_prefix, exist_ok=True)
    for name, values in logs_dict.items():
        np.save(os.path.join(path_prefix, f'{name}.npy'), np.array(values))


In [87]:
# 예: 실험 이름마다 별도 폴더로 로그 저장
experiment_name = "exp1_MLP_mapping"
log_path = os.path.join("logs", experiment_name)

save_logs_npy(log_path, {
    'train_loss': train_loss_per_epoch_list,
    'train_acc': train_acc_per_epoch_list,
    'valid_loss': valid_loss_per_epoch_list,
    'valid_acc': valid_acc_per_epoch_list
})

#나중에 저장된 파일 불러올 때
#train_loss = np.load('logs/exp1_MLP_mapping/train_loss.npy')


In [88]:
#timestamp 추가된 로그 데이터 저장 함수
'''
import os
import numpy as np
from datetime import datetime

def save_logs_npy_with_timestamp(logs_dict, base_dir='logs', exp_name='exp'):
    # 현재 시간 기반 타임스탬프 생성
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    full_path = os.path.join(base_dir, f"{exp_name}_{timestamp}")

    os.makedirs(full_path, exist_ok=True)

    for name, values in logs_dict.items():
        np.save(os.path.join(full_path, f'{name}.npy'), np.array(values))
    
    print(f"로그 저장 완료: {full_path}")
    return full_path  # 경로 반환해줘도 좋음

'''


'\nimport os\nimport numpy as np\nfrom datetime import datetime\n\ndef save_logs_npy_with_timestamp(logs_dict, base_dir=\'logs\', exp_name=\'exp\'):\n    # 현재 시간 기반 타임스탬프 생성\n    timestamp = datetime.now().strftime(\'%Y%m%d_%H%M%S\')\n    full_path = os.path.join(base_dir, f"{exp_name}_{timestamp}")\n\n    os.makedirs(full_path, exist_ok=True)\n\n    for name, values in logs_dict.items():\n        np.save(os.path.join(full_path, f\'{name}.npy\'), np.array(values))\n    \n    print(f"로그 저장 완료: {full_path}")\n    return full_path  # 경로 반환해줘도 좋음\n\n'

In [121]:
# --- 학습 루프 시작 ---
for iepoch in range(max_epochs):
    # 학습/검증 인덱스 셔플
    #교수님은 여기서 train, valid에 대한 셔플을 진행하는데. 일단 나는 뺌..
    #매 epoch마다 셔플을 하는게 좋대. 
    np.random.shuffle(idx_train)
    np.random.shuffle(idx_valid)

    train_loss = 0
    train_acc = 0
    valid_loss = 0
    valid_acc = 0

    # --- 학습 단계 ---
    for ibatch in range(0, num_train, batch_size):
        batch_mask = idx_train[ibatch:ibatch + batch_size]
        x_batch = x_train[batch_mask]
        y_batch = y_train[batch_mask]
        #y_batch_coarse = y_train_coarse[batch_mask]

        # 손실 및 정확도 계산
        _train_loss = network.loss(x_batch, y_batch)
        _train_acc = network.accuracy(x_batch, y_batch)
        train_loss += _train_loss
        train_acc += _train_acc

        # 기울기 계산 및 파라미터 업데이트
        grads = network.gradient(x_batch, y_batch)
        optimizer.update(network.params, grads)

        # 배치 단위 기록
        train_loss_list.append(_train_loss)
        train_acc_list.append(_train_acc)

    # 에폭 단위 기록
    train_loss_per_epoch_list.append(train_loss / train_per_epoch)
    train_acc_per_epoch_list.append(train_acc / train_per_epoch)

    # --- 검증 단계 ---
    for ibatch in range(0, num_valid, batch_size):
        batch_mask = idx_valid[ibatch:ibatch + batch_size]
        x_batch = x_valid[batch_mask]
        y_batch = y_valid[batch_mask]

        # 손실 및 정확도 계산 (업데이트 없음)
        _valid_loss = network.loss(x_batch, y_batch)
        _valid_acc = network.accuracy(x_batch, y_batch)
        valid_loss += _valid_loss
        valid_acc += _valid_acc

        valid_loss_list.append(_valid_loss)
        valid_acc_list.append(_valid_acc)

    # 에폭 단위 기록
    valid_loss_per_epoch_list.append(valid_loss / valid_per_epoch)
    valid_acc_per_epoch_list.append(valid_acc / valid_per_epoch)
    #print("train_loss_per_epoch_list:", train_loss_per_epoch_list)

    # --- 로그 출력 ---
    print(f"[epoch] {iepoch} / {max_epochs} | [Train] loss:{train_loss_per_epoch_list[-1]:.4f}, acc:{train_acc_per_epoch_list[-1]:.4f} | [Valid] loss:{valid_loss_per_epoch_list[-1]:.4f}, acc:{valid_acc_per_epoch_list[-1]:.4f}")



[epoch] 0 / 1000 | [Train] loss:618.2089, acc:0.9984 | [Valid] loss:623.2672, acc:0.0638
[epoch] 1 / 1000 | [Train] loss:618.2712, acc:0.9978 | [Valid] loss:623.2317, acc:0.0725
[epoch] 2 / 1000 | [Train] loss:618.1394, acc:0.9981 | [Valid] loss:623.1446, acc:0.0688
[epoch] 3 / 1000 | [Train] loss:618.2080, acc:0.9978 | [Valid] loss:623.2760, acc:0.0675
[epoch] 4 / 1000 | [Train] loss:618.2640, acc:0.9978 | [Valid] loss:623.3024, acc:0.0725
[epoch] 5 / 1000 | [Train] loss:618.2083, acc:0.9981 | [Valid] loss:623.3266, acc:0.0688
[epoch] 6 / 1000 | [Train] loss:618.2916, acc:0.9981 | [Valid] loss:623.3460, acc:0.0700
[epoch] 7 / 1000 | [Train] loss:618.3909, acc:0.9981 | [Valid] loss:623.3869, acc:0.0737
[epoch] 8 / 1000 | [Train] loss:618.3558, acc:0.9981 | [Valid] loss:623.3323, acc:0.0675
[epoch] 9 / 1000 | [Train] loss:618.3224, acc:0.9981 | [Valid] loss:623.4115, acc:0.0713
[epoch] 10 / 1000 | [Train] loss:618.3748, acc:0.9981 | [Valid] loss:623.4962, acc:0.0738
[epoch] 11 / 1000 | 

In [122]:
#교수님이 주신.. 100개 클래스에 대한 테스트 결과
# --- 테스트 손실 및 정확도 초기화 ---
test_loss = 0  # 전체 테스트 손실 누적 변수
test_acc = 0   # 전체 테스트 정확도 누적 변수

# --- 테스트 평가 단계 시작 ---
for ibatch in range(0, num_test, batch_size):
    _ibatch = ibatch + batch_size  # 미니배치 끝 인덱스
    if _ibatch >= num_test: 
        _ibatch = num_test  # 마지막 배치 처리 (데이터 초과 방지)

    batch_mask = idx_test[ibatch:_ibatch]  # 미니배치 인덱스 추출
    x_batch = x_test[batch_mask]           # 입력 데이터 추출
    t_batch = y_test[batch_mask]           # 정답 레이블 추출 ###이거 나중에 확인하기

    # --- 손실 및 정확도 계산 (모델 파라미터 업데이트 없음) ---
    _test_loss = network.loss(x_batch, t_batch)       # 손실 계산
    _test_acc = network.accuracy(x_batch, t_batch)     # 정확도 계산

    test_loss += _test_loss   # 손실 누적
    test_acc += _test_acc     # 정확도 누적

    test_loss_list.append(_test_loss)  # 배치 단위 손실 저장
    test_acc_list.append(_test_acc)    # 배치 단위 정확도 저장

# --- 전체 테스트 세트에 대한 평균 손실 및 정확도 계산 ---
test_loss_per_epoch_list.append(test_loss / test_per_epoch)
test_acc_per_epoch_list.append(test_acc / test_per_epoch)

# --- 테스트 결과 출력 ---
print(f"[Test] loss:{test_loss_per_epoch_list[-1]:.4f}, acc:{test_acc_per_epoch_list[-1]:.4f}")


[Test] loss:624.2639, acc:0.0812


In [116]:
save_logs_npy(log_path, {
    'train_loss': train_loss_per_epoch_list,
    'train_acc': train_acc_per_epoch_list,
    'valid_loss': valid_loss_per_epoch_list,
    'valid_acc': valid_acc_per_epoch_list
})

In [123]:
##학습된 모델 파라미터 저장. 
## 
import numpy as np
import os

save_dir = "logs/exp1_MLP_mapping"   # 원하는 폴더
os.makedirs(save_dir, exist_ok=True)

# network.params는 {'W1': ..., 'b1': ..., 'W2': ..., ...} 형태
np.savez(os.path.join(save_dir, "model_params2.npz"), **network.params)

print("✅ 모델 파라미터 저장 완료")

✅ 모델 파라미터 저장 완료


In [120]:
## 저장된 parameter 값 복원하는 법
loaded = np.load("logs/exp1_MLP_mapping/model_params2.npz")
print("저장된 weight 값:",loaded['W1'])

#모델 복원하는 경우 이 코드 사용
for key in loaded.files:
    network.params[key] = loaded[key]
    
#network.params['W1'] = loaded['W1']
#network.params['b1'] = loaded['b1']

## 저장된 loss 값 불러오는거 되는지
data = np.load('logs/exp1_MLP_mapping/train_acc.npy')
print("저장된 acc값 :",data)
print(data.shape)

y_pred=network.predict(x_val) #valid set을 이용해서 진행.
print("y_pred의 형태 :", y_pred.shape)
#print(y_pred) #출력값 확인하면 음수도 있음. 아직 logit 상태. softmax 안 거침
#x_val의 이미지들을 모델에 넣어서 예측값 뽑아내기.
#실제 예측을 하는거


#예측값 형태 확인(logit)과 softmax 형태로 변환하기
print("x_val shaep:",x_val.shape)

softmax_output = softmax(y_pred)# softmax 적용 후 확률값
#print(softmax_output[0]) #임의로 출력
print(np.argmax(softmax_output[0])) #몇번째 클래스가 가장 높은지

저장된 weight 값: [[ 0.07415928  1.64570128 -0.41703197 ... -1.35573448 -0.33937163
  -0.9568702 ]
 [ 0.06309862  1.68176402 -0.43582437 ... -1.34858703 -0.3531856
  -0.9735085 ]
 [ 0.06881468  1.71895104 -0.4150234  ... -1.38008237 -0.332482
  -0.96638949]
 ...
 [-0.01493041  1.39815839  0.09578059 ... -1.24903055 -0.6537008
  -1.02060704]
 [-0.00768432  1.42674188  0.07950961 ... -1.25036035 -0.62366894
  -1.04892803]
 [-0.00378592  1.44704933  0.08120071 ... -1.24920559 -0.62421141
  -1.03495232]]
저장된 acc값 : [0.0159375 0.0225    0.0359375 ... 0.998125  0.9984375 0.998125 ]
(3000,)
y_pred의 형태 : (800, 100)
x_val shaep: (800, 3, 32, 32)
34


## 매핑 실험!!!!!  

### 단순 mapping.

In [126]:
#지금 y_pred는 로짓형태. 추가적으로  softmax를 추가하거나 해야함
#매핑 테이블 만들기. 딕셔너리 형태
fine_to_coarse = {fine: coarse for fine, coarse in zip(y_train, y_train_coarse)}

def argmax_fine_to_coarse(softmax_output, fine_to_coarse):
    """
    softmax_output: shape (N, 100)
    returns: coarse_pred, shape (N,)
    """
    fine_preds = np.argmax(softmax_output, axis=1)
    coarse_preds = np.array([fine_to_coarse[f] for f in fine_preds])
    return coarse_preds


In [138]:
coarse_pred = argmax_fine_to_coarse(softmax_output, fine_to_coarse)

# 예시 출력
for a in range(0,30,1):
    print("predict fine label:", np.argmax(softmax_output[a]))
    print("true fine label:", y_val[a])  
    print("predict coarse label:", coarse_pred[a])
    print("true coarse label:", y_val_coarse[a],"\n")


predict fine label: 34
true fine label: 51
predict coarse label: 3
true coarse label: 12 

predict fine label: 85
true fine label: 52
predict coarse label: 8
true coarse label: 7 

predict fine label: 25
true fine label: 31
predict coarse label: 2
true coarse label: 9 

predict fine label: 60
true fine label: 71
predict coarse label: 18
true coarse label: 2 

predict fine label: 68
true fine label: 73
predict coarse label: 15
true coarse label: 9 

predict fine label: 45
true fine label: 4
predict coarse label: 14
true coarse label: 9 

predict fine label: 56
true fine label: 59
predict coarse label: 1
true coarse label: 2 

predict fine label: 20
true fine label: 35
predict coarse label: 15
true coarse label: 6 

predict fine label: 82
true fine label: 82
predict coarse label: 10
true coarse label: 7 

predict fine label: 19
true fine label: 62
predict coarse label: 0
true coarse label: 15 

predict fine label: 65
true fine label: 82
predict coarse label: 4
true coarse label: 13 

pre

## 매핑의 성능 평가 

In [113]:
# --- coarse 평가용 손실 및 정확도 초기화 ---
test_loss_coarse = 0
test_acc_coarse = 0

# --- 테스트 평가 루프 시작 ---
for ibatch in range(0, num_test, batch_size):
    _ibatch = ibatch + batch_size
    if _ibatch >= num_test:
        _ibatch = num_test  # 마지막 배치 초과 방지

    batch_mask = idx_test[ibatch:_ibatch]
    x_batch = x_test[batch_mask]
    t_batch_coarse = y_test_coarse[batch_mask]

    # 1. 예측 로짓 → softmax
    logits = network.predict(x_batch)
    softmax_output = softmax(logits)

    # 2. softmax → coarse 확률 맵핑
    coarse_output = map_softmax_to_coarse(softmax_output, fine_to_coarse)

    # 3. coarse 예측
    pred_coarse = np.argmax(coarse_output, axis=1)

    # 4. accuracy 계산
    batch_acc = np.sum(pred_coarse == t_batch_coarse) / len(t_batch_coarse)
    test_acc_coarse += batch_acc

    # 5. cross-entropy loss 계산 (직접 구현)
    probs = np.clip(coarse_output[np.arange(len(t_batch_coarse)), t_batch_coarse], 1e-7, 1.0)
    batch_loss = -np.mean(np.log(probs))
    test_loss_coarse += batch_loss

# --- 평균 계산 ---
test_loss_coarse /= test_per_epoch
test_acc_coarse /= test_per_epoch

# --- coarse 평가 결과 출력 ---
print(f"[Test coarse] loss: {test_loss_coarse:.4f} | acc: {test_acc_coarse:.4f}")


NameError: name 'map_softmax_to_coarse' is not defined