# Traindata 전처리

# JSON -> DataFrame

In [1]:
import os
import json
import pandas as pd

# 라벨링 JSON 파일이 있는 최상위 폴더 경로
label_root = "/media/usou/PortableSSD/mldl/015.감성 및 발화 스타일별 음성합성 데이터/01.데이터/1.Training/라벨링데이터/"

# 실제 WAV 파일이 존재하는 원천 데이터의 최상위 경로
wav_root = "/media/usou/PortableSSD/mldl/015.감성 및 발화 스타일별 음성합성 데이터/01.데이터/1.Training/원천데이터/"

# 정상적으로 처리된 데이터 정보를 담을 리스트
data = []

# 오류 발생 시 해당 JSON 파일 또는 존재하지 않는 WAV 경로를 저장할 리스트
broken_files = []

# 라벨링 폴더 내부의 모든 JSON 파일을 재귀적으로 탐색
for folder_path, _, files in os.walk(label_root):
    for file_name in files:
        if file_name.endswith(".json"):
            # 현재 JSON 파일의 전체 경로 구성
            json_path = os.path.join(folder_path, file_name)
            try:
                # JSON 파일 열기 및 파싱
                with open(json_path, 'r', encoding='utf-8') as f:
                    content = json.load(f)

                # JSON 내부 정보 추출
                emotion = content["화자정보"]["Emotion"]
                style = content["화자정보"].get("SpeechStyle", "N/A")
                sensitivity = content["화자정보"].get("Sensitivity", "N/A")
                wav_file = content["파일정보"]["FileName"]

                # 현재 JSON 경로를 라벨 기준 상대경로로 변환
                relative_path = os.path.relpath(folder_path, start=label_root)

                # 상대 경로에서 모든 TL을 TS로 변경
                relative_path = relative_path.replace("TL", "TS")

                # WAV 경로를 원천 데이터 기준으로 재구성
                wav_path = os.path.join(wav_root, relative_path, wav_file)

                # WAV 파일 존재 여부 확인
                if os.path.exists(wav_path):
                    # 정상 데이터 추가
                    data.append({
                        "wav_path": wav_path,
                        "emotion": emotion,
                        "style": style,
                        "sensitivity": sensitivity
                    })
                else:
                    # WAV 파일이 존재하지 않는 경우 로그에 기록
                    print(f"WAV 파일 없음: {wav_path}")
                    broken_files.append(wav_path)

            except Exception as e:
                # JSON 파싱 중 오류 발생 시 기록
                print(f"JSON 읽기 오류: {json_path}: {e}")
                broken_files.append(json_path)

# 정상적으로 수집된 데이터를 DataFrame으로 변환
df = pd.DataFrame(data)

# 결과 CSV 파일로 저장
os.makedirs("./data/usou", exist_ok=True)
df.to_csv("./data/usou/metadata_cleaned.csv", index=False)

# 오류가 발생한 경로들을 텍스트 파일로 저장
with open("./data/usou/broken_files.txt", "w") as f:
    for path in broken_files:
        f.write(path + "\n")

# 최종 처리 결과 출력
print(f"정상 처리된 JSON 수: {len(df)}")
print(f"에러 발생 수: {len(broken_files)}")


정상 처리된 JSON 수: 815491
에러 발생 수: 0


# MFCC 추출
-  MFCC 추출이란
    - 음성에서 특징을 뽑아낸 백터
-  데이터 형태
    - 2차원 배열(시간 프레임수, 13)
- 배치
    - 배치 : 전체 데이터를 나누어 처리하는 단위
- 나누는 이유
    - 메모리 부족으로 컴퓨터 프리징 발생





In [1]:
import os
import librosa
import pandas as pd
import numpy as np
from tqdm import tqdm

# ============================
# 1. 메타데이터 로드
# ============================
# 사전에 정제된 메타데이터 CSV 파일 경로
csv_path = "/media/usou/PortableSSD/mldl_project/data/metadata_cleaned.csv"
df = pd.read_csv(csv_path)

# ============================
# 2. 설정값 정의
# ============================
sample_rate = 16000            # 음성 파일 샘플링 레이트 (Hz)
max_duration = 5.0             # WAV 파일 최대 로딩 시간 (초) → 너무 긴 파일 방지
save_interval = 10000          # 몇 개마다 배치로 저장할지 설정

# 저장용 리스트 초기화
mfcc_features = []             # MFCC 벡터 리스트
labels = []                    # 감정 레이블 리스트
error_files = []               # 처리 중 실패한 파일 목록
save_counter = 0               # 배치 저장 인덱스

# 저장 디렉토리 설정
save_dir = "/media/usou/PortableSSD/mldl_project/data/mfcc_batches"
os.makedirs(save_dir, exist_ok=True)

# ============================
# 3. MFCC 추출 루프
# ============================
for idx, row in tqdm(df.iterrows(), total=len(df)):
    wav_path = row["wav_path"]  # 메타데이터에 포함된 wav 파일 전체 경로
    try:
        # WAV 파일 로딩 (최대 max_duration 초까지만 로드)
        y, sr = librosa.load(wav_path, sr=sample_rate, duration=max_duration)

        # MFCC 13차원 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

        # 시간 축 기준으로 전치 (time_step, n_mfcc)
        mfcc_features.append(mfcc.T)
        labels.append(row["emotion"])

    except Exception as e:
        # 에러 발생 시 파일 경로 저장
        print(f"Error processing {wav_path}: {e}")
        error_files.append(wav_path)

    # 일정 수 이상 쌓이면 배치 저장 후 메모리 초기화
    if len(mfcc_features) >= save_interval:
        np.save(os.path.join(save_dir, f"mfcc_batch_{save_counter}.npy"), np.array(mfcc_features, dtype=object))
        np.save(os.path.join(save_dir, f"label_batch_{save_counter}.npy"), np.array(labels))
        save_counter += 1
        mfcc_features = []
        labels = []

# 남은 데이터가 있다면 마지막 배치 저장
if mfcc_features:
    np.save(os.path.join(save_dir, f"mfcc_batch_{save_counter}.npy"), np.array(mfcc_features, dtype=object))
    np.save(os.path.join(save_dir, f"label_batch_{save_counter}.npy"), np.array(labels))

# ============================
# 4. 에러 파일 저장
# ============================
error_log_path = "/media/usou/PortableSSD/mldl_project/data/broken_audio_files.txt"
with open(error_log_path, "w") as f:
    for path in error_files:
        f.write(path + "\n")

# ============================
# 5. 처리 결과 출력
# ============================
print(f"성공적으로 저장된 배치 수: {save_counter + 1}")
print(f"실패한 파일 수: {len(error_files)}")


  df = pd.read_csv(csv_path)
100%|██████████| 815491/815491 [2:58:19<00:00, 76.22it/s]   


성공적으로 저장된 배치 수: 82
실패한 파일 수: 0


# 레이블 인코딩

In [None]:
import os
import numpy as np
import glob
import pickle
from sklearn.preprocessing import LabelEncoder

# ============================
# 1. 설정
# ============================
# 레이블 배치가 저장된 경로
label_dir = "/media/usou/PortableSSD/mldl_project/data/mfcc_batches"

# 인코딩된 레이블 저장 경로
encoded_label_dir = os.path.join(label_dir, "encoded_labels")
os.makedirs(encoded_label_dir, exist_ok=True)

# ============================
# 2. 모든 배치 레이블 수집
# ============================
# label_batch_*.npy 파일 경로 리스트
label_files = sorted(glob.glob(os.path.join(label_dir, "label_batch_*.npy")))

# 전체 레이블 리스트 생성
all_labels = []
batch_label_data = []  # 배치별 데이터도 임시 저장
for label_file in label_files:
    labels = np.load(label_file, allow_pickle=True)
    batch_label_data.append(labels)
    all_labels.extend(labels)

# ============================
# 3. 레이블 인코딩
# ============================
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

# 인코더 저장 (추후 예측 결과 복원용)
with open(os.path.join(label_dir, "label_encoder.pkl"), "wb") as f:
    pickle.dump(label_encoder, f)

# ============================
# 4. 인코딩된 레이블 배치별로 저장
# ============================
for i, labels in enumerate(batch_label_data):
    encoded = label_encoder.transform(labels)
    save_path = os.path.join(encoded_label_dir, f"label_batch_{i}.npy")
    np.save(save_path, encoded)

print(f"총 레이블 개수: {len(all_labels)}")
print(f"인코딩된 클래스 목록: {label_encoder.classes_}")
print(f"배치 수: {len(label_files)}")
print("레이블 인코딩 및 저장 완료")


총 레이블 개수: 815491
인코딩된 클래스 목록: ['Angry' 'Anxious' 'Embarrassed' 'Happy' 'Hurt' 'Neutrality' 'Sad' 'nan']
배치 수: 82
✅ 레이블 인코딩 및 저장 완료


# CNN 모델 정의

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    """
    CNN 기반 음성 감정 분류 모델 정의

    Parameters:
        input_shape (tuple): 입력 데이터 형태 (예: (시간축 길이, MFCC 차원 수, 채널 수))
        num_classes (int): 분류할 감정 클래스 수

    Returns:
        tensorflow.keras.Model: 컴파일 완료된 CNN 모델
    """
    model = models.Sequential()

    # 첫 번째 컨볼루션 레이어: 필터 수 32, 커널 사이즈 3x3, 활성화 함수 ReLU
    model.add(layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    # 배치 정규화: 학습 안정성과 속도 개선
    model.add(layers.BatchNormalization())
    # 최대 풀링: 출력 크기 절반으로 줄임 (특징 추출과 과적합 방지)
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    # 두 번째 컨볼루션 레이어: 필터 수 64
    model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    # 세 번째 컨볼루션 레이어: 필터 수 128
    model.add(layers.Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    # 전역 평균 풀링: 전체 피처 맵의 평균을 계산하여 1D 벡터로 변환
    model.add(layers.GlobalAveragePooling2D())

    # 완전 연결층(Dense Layer) 추가
    model.add(layers.Dense(128, activation='relu'))
    # 과적합 방지를 위한 드롭아웃 (30%)
    model.add(layers.Dropout(0.3))
    # 출력층: softmax로 감정 클래스 확률 예측
    model.add(layers.Dense(num_classes, activation='softmax'))

    # 모델 컴파일: Adam 옵티마이저, sparse_categorical_crossentropy 손실 함수, 정확도 지표
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model


2025-03-27 14:35:02.667534: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743053702.684248   21141 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743053702.688507   21141 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743053702.700707   21141 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743053702.700724   21141 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743053702.700725   21141 computation_placer.cc:177] computation placer alr

# Validation 전처리

# 메타데이터 csv로 저장

In [5]:
import os
import json
import pandas as pd

# ========================================
# 1. 경로 설정
# ========================================

# 라벨링 JSON 파일이 저장된 루트 폴더
label_root = "/media/usou/PortableSSD/mldl/015.감성 및 발화 스타일별 음성합성 데이터/01.데이터/2.Validation/라벨링데이터/VL1"

# 실제 음성 WAV 파일이 있는 루트 폴더
wav_root = "/media/usou/PortableSSD/mldl/015.감성 및 발화 스타일별 음성합성 데이터/01.데이터/2.Validation/원천데이터/VS1"

# ========================================
# 2. 결과 저장 리스트 초기화
# ========================================
data = []             # 메타데이터 저장용 리스트
broken_files = []     # 에러 발생한 파일 로그용 리스트

# ========================================
# 3. JSON 파일 순회 및 정보 추출
# ========================================
for folder_path, _, files in os.walk(label_root):
    for file_name in files:
        if file_name.endswith(".json"):
            json_path = os.path.join(folder_path, file_name)
            try:
                # JSON 파일 열기
                with open(json_path, 'r', encoding='utf-8') as f:
                    content = json.load(f)

                # 화자 정보에서 감정, 스타일, 세부 감정 추출
                emotion = content["화자정보"]["Emotion"]
                style = content["화자정보"].get("SpeechStyle", "N/A")
                sensitivity = content["화자정보"].get("Sensitivity", "N/A")

                # WAV 파일 이름 추출
                wav_file = content["파일정보"]["FileName"]

                # 현재 JSON 경로에서 라벨 루트를 기준으로 상대 경로 추출
                relative_path = os.path.relpath(folder_path, start=label_root)

                # 실제 WAV 파일 경로 생성
                wav_path = os.path.join(wav_root, relative_path, wav_file)

                # WAV 파일이 존재하면 메타데이터에 추가
                if os.path.exists(wav_path):
                    data.append({
                        "wav_path": wav_path,
                        "emotion": emotion,
                        "style": style,
                        "sensitivity": sensitivity
                    })
                else:
                    # WAV 파일이 없는 경우 기록
                    print(f"WAV 파일 없음: {wav_path}")
                    broken_files.append(wav_path)

            except Exception as e:
                # JSON 파싱 실패 시 기록
                print(f"JSON 읽기 오류: {json_path}, 에러: {e}")
                broken_files.append(json_path)

# ========================================
# 4. 결과 저장
# ========================================

# DataFrame 생성
df = pd.DataFrame(data)

# 저장 경로 생성
os.makedirs("/media/usou/PortableSSD/mldl_project/data/validation", exist_ok=True)

# 메타데이터 CSV 저장
df.to_csv("/media/usou/PortableSSD/mldl_project/data/validation/metadata_cleaned_val.csv", index=False)

# 에러 파일 로그 저장
with open("/media/usou/PortableSSD/mldl_project/data/validation/broken_val_files.txt", "w") as f:
    for path in broken_files:
        f.write(path + "\n")

# 요약 출력
print(f"정상 처리된 JSON 수: {len(df)}")
print(f"에러 발생 수: {len(broken_files)}")


정상 처리된 JSON 수: 112157
에러 발생 수: 0


# MFCC 추출 Validation 용

In [6]:
import os
import librosa
import pandas as pd
import numpy as np
from tqdm import tqdm

# ========================================
# 1. 메타데이터 로드
# ========================================

# validation용 정제된 메타데이터 CSV 경로
csv_path = "/media/usou/PortableSSD/mldl_project/data/validation/metadata_cleaned_val.csv"
df = pd.read_csv(csv_path)

# ========================================
# 2. 설정값 정의
# ========================================

sample_rate = 16000             # 음성 샘플링 레이트 (16kHz)
max_duration = 5.0              # WAV 최대 로딩 시간 (초)
save_interval = 10000           # 배치 저장 기준 개수
save_dir = "/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches"
os.makedirs(save_dir, exist_ok=True)

# 저장용 리스트 초기화
mfcc_features = []              # 추출된 MFCC 벡터 리스트
labels = []                     # 감정 레이블 리스트
error_files = []                # 실패한 파일 목록
save_counter = 0                # 배치 파일 번호

# ========================================
# 3. MFCC 추출 루프
# ========================================

for idx, row in tqdm(df.iterrows(), total=len(df)):
    wav_path = row["wav_path"]

    try:
        # WAV 파일 로딩 (최대 max_duration 초까지만 로드)
        y, sr = librosa.load(wav_path, sr=sample_rate, duration=max_duration)

        # MFCC 추출 (13차원)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

        # 시간 축 기준으로 전치 (time_step, 13)
        mfcc_features.append(mfcc.T)
        labels.append(row["emotion"])

    except Exception as e:
        # 로딩 실패 시 에러 출력 및 로그 저장
        print(f"Error processing {wav_path}: {e}")
        error_files.append(wav_path)

    # 일정 개수 이상이면 배치 저장
    if len(mfcc_features) >= save_interval:
        np.save(os.path.join(save_dir, f"mfcc_batch_{save_counter}.npy"), np.array(mfcc_features, dtype=object))
        np.save(os.path.join(save_dir, f"label_batch_{save_counter}.npy"), np.array(labels))
        save_counter += 1
        mfcc_features = []
        labels = []

# 루프 종료 후 남은 데이터 저장
if mfcc_features:
    np.save(os.path.join(save_dir, f"mfcc_batch_{save_counter}.npy"), np.array(mfcc_features, dtype=object))
    np.save(os.path.join(save_dir, f"label_batch_{save_counter}.npy"), np.array(labels))

# ========================================
# 4. 에러 파일 저장
# ========================================

error_log_path = "/media/usou/PortableSSD/mldl_project/data/validation/broken_audio_files_val.txt"
with open(error_log_path, "w") as f:
    for path in error_files:
        f.write(path + "\n")

# ========================================
# 5. 처리 결과 출력
# ========================================

print(f"성공적으로 저장된 배치 수: {save_counter + 1}")
print(f"실패한 파일 수: {len(error_files)}")


100%|██████████| 112157/112157 [27:27<00:00, 68.06it/s]

성공적으로 저장된 배치 수: 12
실패한 파일 수: 0





# Validation 데이터용 레이블 인코딩

In [7]:
import os
import numpy as np
import glob
import pickle
from sklearn.preprocessing import LabelEncoder

# ============================
# 1. 설정
# ============================
# 레이블 배치가 저장된 경로
label_dir = "/media/usou/PortableSSD/mldl_project/data/mfcc_batches"

# 인코딩된 레이블 저장 경로
encoded_label_dir = os.path.join(label_dir, "encoded_labels")
os.makedirs(encoded_label_dir, exist_ok=True)

# ============================
# 2. 모든 배치 레이블 수집
# ============================
# label_batch_*.npy 파일 경로 리스트
label_files = sorted(glob.glob(os.path.join(label_dir, "label_batch_*.npy")))

# 전체 레이블 리스트 생성
all_labels = []
batch_label_data = []  # 배치별 데이터도 임시 저장
for label_file in label_files:
    labels = np.load(label_file, allow_pickle=True)
    batch_label_data.append(labels)
    all_labels.extend(labels)

# ============================
# 3. 레이블 인코딩
# ============================
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

# 인코더 저장 (추후 예측 결과 복원용)
with open(os.path.join(label_dir, "label_encoder.pkl"), "wb") as f:
    pickle.dump(label_encoder, f)

# ============================
# 4. 인코딩된 레이블 배치별로 저장
# ============================
for i, labels in enumerate(batch_label_data):
    encoded = label_encoder.transform(labels)
    save_path = os.path.join(encoded_label_dir, f"label_batch_{i}.npy")
    np.save(save_path, encoded)

print(f"총 레이블 개수: {len(all_labels)}")
print(f"인코딩된 클래스 목록: {label_encoder.classes_}")
print(f"배치 수: {len(label_files)}")
print("레이블 인코딩 및 저장 완료")


총 레이블 개수: 815491
인코딩된 클래스 목록: ['Angry' 'Anxious' 'Embarrassed' 'Happy' 'Hurt' 'Neutrality' 'Sad' 'nan']
배치 수: 82
레이블 인코딩 및 저장 완료


# MFCC DataGenerator 클래스 생성

In [None]:
import os
import numpy as np
import tensorflow as tf

class MFCCDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, batch_dir, prefix, batch_size=1, shuffle=True):
        self.batch_dir = batch_dir
        self.prefix = prefix
        self.batch_size = batch_size
        self.shuffle = shuffle

        # 배치 파일 목록 생성
        self.mfcc_files = sorted([
            f for f in os.listdir(batch_dir) if f.startswith(f"{prefix}_batch_")
        ])
        self.indices = list(range(len(self.mfcc_files)))
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_mfccs = []
        batch_labels = []

        for idx in batch_indices:
            mfcc_path = os.path.join(self.batch_dir, f"{self.prefix}_batch_{idx}.npy")
            label_path = os.path.join(self.batch_dir, "encoded_labels", f"label_batch_{idx}.npy")

            mfcc_data = np.load(mfcc_path, allow_pickle=True)
            label_data = np.load(label_path)

            # 가장 긴 시퀀스 기준으로 padding
            max_len = max([x.shape[0] for x in mfcc_data])
            padded = tf.keras.preprocessing.sequence.pad_sequences(mfcc_data, maxlen=max_len, dtype='float32', padding='post')
            padded = np.expand_dims(padded, -1)  # (batch, time, n_mfcc, 1)

            batch_mfccs.append(padded)
            batch_labels.append(label_data)

        X = np.concatenate(batch_mfccs, axis=0)
        y = np.concatenate(batch_labels, axis=0)
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


# GPU 활성화 및 안정 설정 코드

In [3]:
import tensorflow as tf

# 1. GPU 장치 목록 출력
gpus = tf.config.list_physical_devices('GPU')
print("사용 가능한 GPU:", gpus)

# 2. 메모리 자동 증가 설정 (안정성을 위해 권장)
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✔ GPU 메모리 자동 증가 설정 완료")
    except RuntimeError as e:
        print("⚠ 메모리 설정 중 오류 발생:", e)
else:
    print("❌ GPU를 찾을 수 없습니다. CPU로 진행됩니다.")


사용 가능한 GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
✔ GPU 메모리 자동 증가 설정 완료


# label_encoder.pkl을 로드해 동일하게 인코딩하는 코드

In [7]:
import os
import numpy as np
import glob
import pickle

# ============================
# 1. 설정
# ============================
val_label_dir = "/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches"
encoded_label_dir = os.path.join(val_label_dir, "encoded_labels")
os.makedirs(encoded_label_dir, exist_ok=True)

# 학습 데이터에서 저장한 LabelEncoder 로드
with open("/media/usou/PortableSSD/mldl_project/data/mfcc_batches/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# ============================
# 2. 모든 validation 레이블 수집
# ============================
label_files = sorted(glob.glob(os.path.join(val_label_dir, "label_batch_*.npy")))

for i, label_file in enumerate(label_files):
    labels = np.load(label_file, allow_pickle=True)
    encoded = label_encoder.transform(labels)
    save_path = os.path.join(encoded_label_dir, f"label_batch_{i}.npy")
    np.save(save_path, encoded)

print(f"✅ Validation 레이블 인코딩 및 저장 완료 (배치 수: {len(label_files)})")


✅ Validation 레이블 인코딩 및 저장 완료 (배치 수: 12)


# CNN 모델 정의

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    """
    CNN 기반 음성 감정 분류 모델 정의

    Parameters:
        input_shape (tuple): 입력 데이터 형태 (예: (시간축 길이, MFCC 차원 수, 채널 수))
        num_classes (int): 분류할 감정 클래스 수

    Returns:
        tensorflow.keras.Model: 컴파일 완료된 CNN 모델
    """
    model = models.Sequential()

    # [1] 첫 번째 컨볼루션 블록
    model.add(layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.BatchNormalization())               # 학습 안정성 향상
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))     # 공간 크기 감소

    # [2] 두 번째 컨볼루션 블록
    model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    # [3] 세 번째 컨볼루션 블록
    model.add(layers.Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.GlobalAveragePooling2D())           # 피처맵 전체 평균값

    # [4] 완전 연결층 + 드롭아웃
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.3))                       # 과적합 방지

    # [5] 출력층 - 클래스 수만큼 softmax 출력
    model.add(layers.Dense(num_classes, activation='softmax'))

    # 모델 컴파일
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model


# 학습

In [8]:
import tensorflow as tf

# ============================
# 1. 데이터 제너레이터 생성
# ============================
train_generator = MFCCDataGenerator(
    batch_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches",
    prefix="mfcc",
    batch_size=1  # 메모리 안정 위해 소량으로 시작
)

val_generator = MFCCDataGenerator(
    batch_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches",
    prefix="mfcc",
    batch_size=1
)

# ============================
# 2. 클래스 수 설정
# ============================
import pickle

# 학습 데이터의 레이블 인코더를 불러와 클래스 수 확인
with open("/media/usou/PortableSSD/mldl_project/data/mfcc_batches/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

num_classes = len(label_encoder.classes_)

# ============================
# 3. 입력 형태 설정
# ============================
# 예시 입력 크기 지정 (임의 값, 실제 학습 데이터 확인 후 조정 가능)
# 이 부분은 train_generator[0][0].shape 로 확인 가능
sample_input = train_generator[0][0]  # shape: (batch, time, n_mfcc, 1)
input_shape = sample_input.shape[1:]  # (time, n_mfcc, 1)

# ============================
# 4. 모델 생성 및 요약
# ============================
model = create_cnn_model(input_shape=input_shape, num_classes=num_classes)
model.summary()

# ============================
# 5. 콜백 정의 (모델 저장 및 EarlyStopping)
# ============================
checkpoint_path = "/media/usou/PortableSSD/mldl_project/models/best_model.h5"
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True
    )
]

# ============================
# 6. 학습 실행
# ============================
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=callbacks
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1743057731.960520   22337 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4550 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5


ValueError: Computed output size would be negative. Received `inputs shape=(None, 37, 1, 64)`, `kernel shape=(3, 3, 64, 128)`, `dilation_rate=[1 1]`.

- 학습 실패한 이유 
- Conv2D 커널이 너무 커서 작은 입력에 비해 작동을 못함 - kernel size 축소 or padding ="same" 적용 -> 모델 재정의

# 안정적인 CNN 모델 정의

In [9]:
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    """
    음성 감정 분류를 위한 CNN 모델 정의
    
    Parameters:
        input_shape (tuple): 입력 데이터의 형태 (시간축, MFCC 차원, 채널 수)
        num_classes (int): 분류할 감정 클래스 수
        
    Returns:
        keras.models.Sequential: 컴파일된 모델 객체
    """

    model = models.Sequential()

    # ===============================
    # [1] 첫 번째 컨볼루션 블록
    # ===============================
    # Conv2D: 32개의 필터, 3x3 커널, relu 활성화 함수 사용
    # padding='same'으로 출력 크기 감소 방지
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())  # 정규화로 학습 안정화
    model.add(layers.MaxPooling2D((2, 2)))  # 출력 크기 절반으로 축소

    # ===============================
    # [2] 두 번째 컨볼루션 블록
    # ===============================
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # ===============================
    # [3] 세 번째 컨볼루션 블록
    # ===============================
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    
    # GlobalAveragePooling2D: 각 채널의 평균을 취해 1D 벡터로 변환
    model.add(layers.GlobalAveragePooling2D())

    # ===============================
    # [4] 완전 연결층 + 출력층
    # ===============================
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.3))  # 과적합 방지
    model.add(layers.Dense(num_classes, activation='softmax'))  # 감정 클래스 확률 출력

    # ===============================
    # [5] 모델 컴파일
    # ===============================
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


# 학습
- 학습 도중 시스템이 멈추지 않도록 작은 배치 크기와 적절한 콜백 설정 포함

In [10]:
import os
import pickle
import tensorflow as tf

# ============================
# 1. 데이터 제너레이터 생성
# ============================
train_generator = MFCCDataGenerator(
    batch_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches",
    prefix="mfcc",
    batch_size=1  # 메모리 절약을 위한 작은 배치
)

val_generator = MFCCDataGenerator(
    batch_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches",
    prefix="mfcc",
    batch_size=1
)

# ============================
# 2. 레이블 인코더 로드 및 클래스 수 설정
# ============================
# 학습 데이터용 레이블 인코더를 통해 클래스 수 파악
with open("/media/usou/PortableSSD/mldl_project/data/mfcc_batches/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

num_classes = len(label_encoder.classes_)

# ============================
# 3. 입력 형태 설정
# ============================
# 첫 배치에서 입력 형태 파악
sample_input = train_generator[0][0]  # shape: (batch, time, n_mfcc, 1)
input_shape = sample_input.shape[1:]  # (time, n_mfcc, 1)

# ============================
# 4. 모델 생성
# ============================
model = create_cnn_model(input_shape=input_shape, num_classes=num_classes)
model.summary()

# ============================
# 5. 콜백 설정
# ============================
checkpoint_path = "/media/usou/PortableSSD/mldl_project/models/best_model.h5"
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True
    )
]

# ============================
# 6. 학습 실행
# ============================
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=callbacks
)


  self._warn_if_super_not_called()


Epoch 1/30


I0000 00:00:1743058856.274355   22463 service.cc:152] XLA service 0x7e45fc006740 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743058856.274372   22463 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 2060, Compute Capability 7.5
2025-03-27 16:00:56.357242: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
E0000 00:00:1743058856.801990   22463 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was compiled with: 9.3.0.  CuDNN library needs to have matching major version and equal or higher minor version. If using a binary install, upgrade your CuDNN library.  If building from sources, make sure the library loaded at runtime is compatible with the version specified during compile configuration.
E0000 00:00:1743058856.922577   22463 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was c

FailedPreconditionError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/usr/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3047, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3102, in _run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3306, in run_cell_async

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3489, in run_ast_nodes

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3549, in run_code

  File "/tmp/ipykernel_22337/4153134984.py", line 65, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

DNN library initialization failed. Look at the errors above for more details.
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_3523]

- GPU 메모리 부족

# 모델 경량화 프루닝 라이브러리 및 모델 정의

In [4]:
import tensorflow_model_optimization as tfmot
from tensorflow.keras import layers, models

def create_pruned_cnn_model(input_shape, num_classes):
    # 프루닝 설정: 가중치의 50%를 0으로 만듦 (비율은 조절 가능)
    pruning_params = {
        'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
            initial_sparsity=0.0,
            final_sparsity=0.5,
            begin_step=0,
            end_step=1000  # 조절 가능
        )
    }

    model = models.Sequential()

    # 첫 번째 블록
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        **pruning_params
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # 두 번째 블록
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Conv2D(64, (3, 3), activation='relu'),
        **pruning_params
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # 세 번째 블록
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Conv2D(128, (3, 3), activation='relu'),
        **pruning_params
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.GlobalAveragePooling2D())

    # 밀집층
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Dense(128, activation='relu'),
        **pruning_params
    ))
    model.add(layers.Dropout(0.3))
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Dense(num_classes, activation='softmax'),
        **pruning_params
    ))

    # 컴파일
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model


RecursionError: maximum recursion depth exceeded

In [6]:
import tensorflow_model_optimization as tfmot
from tensorflow.keras import layers, models

def create_pruned_cnn_model(input_shape, num_classes):
    # 프루닝 설정: 가중치의 50%를 0으로 만듦 (비율은 조절 가능)
    pruning_params = {
        'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
            initial_sparsity=0.0,
            final_sparsity=0.5,
            begin_step=0,
            end_step=1000  # 조절 가능
        )
    }

    model = models.Sequential()

    # 첫 번째 블록
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        **pruning_params
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # 두 번째 블록
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Conv2D(64, (3, 3), activation='relu'),
        **pruning_params
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # 세 번째 블록
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Conv2D(128, (3, 3), activation='relu'),
        **pruning_params
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.GlobalAveragePooling2D())

    # 밀집층
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Dense(128, activation='relu'),
        **pruning_params
    ))
    model.add(layers.Dropout(0.3))
    model.add(tfmot.sparsity.keras.prune_low_magnitude(
        layers.Dense(num_classes, activation='softmax'),
        **pruning_params
    ))

    # 컴파일
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model


RecursionError: maximum recursion depth exceeded

- 프루닝 실패
1. Sequential 모델 안에 잘못된 레이어 구조를 넣었거나

2. 프루닝 대상에 이미 프루닝된 레이어를 다시 적용하려고 하거나

3. 모델 구조에서 무한 루프가 생겼거나

4. 너무 많은 프루닝 wrapper가 중첩된 경우



# 여기서 부터 

# MFCCDataGenerator 클래스
- tf.keras.utils.Sequence를 상속받아, 저장된 MFCC 및 레이블 배치 데이터를 Keras 모델 학습에 적합하게 동적으로 불러오고 전처리해주는 제너레이터

In [None]:
import os
import numpy as np
import tensorflow as tf

class MFCCDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, batch_dir, prefix, batch_size=1, shuffle=True):
        self.batch_dir = batch_dir
        self.prefix = prefix
        self.batch_size = batch_size
        self.shuffle = shuffle

        # 배치 리스트 구성
        self.mfcc_files = sorted([
            f for f in os.listdir(batch_dir) if f.startswith(f"{prefix}_batch_")
        ])
        self.indices = list(range(len(self.mfcc_files)))
        if shuffle:
            np.random.shuffle(self.indices)

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        # 현재 배치 인덱스 범위 계산
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]

        batch_mfccs = []
        batch_labels = []

        for idx in batch_indices:
            mfcc_path = os.path.join(self.batch_dir, f"{self.prefix}_batch_{idx}.npy")
            label_path = os.path.join(self.batch_dir, "encoded_labels", f"label_batch_{idx}.npy")

            mfcc_data = np.load(mfcc_path, allow_pickle=True)
            label_data = np.load(label_path)

            # 시퀀스 길이 맞추기 (Zero-padding)
            max_len = max([x.shape[0] for x in mfcc_data])
            padded = tf.keras.preprocessing.sequence.pad_sequences(mfcc_data, maxlen=max_len, dtype='float32', padding='post')
            padded = np.expand_dims(padded, -1)  # CNN 입력 형식 맞추기

            batch_mfccs.append(padded)
            batch_labels.append(label_data)

        X = np.concatenate(batch_mfccs, axis=0)
        y = np.concatenate(batch_labels, axis=0)
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


2025-03-28 10:05:11.283826: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743123911.301970    4800 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743123911.306197    4800 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743123911.319183    4800 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743123911.319198    4800 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743123911.319199    4800 computation_placer.cc:177] computation placer alr

# 음성 감정 분류를 위한 CNN 모델을 정의한 함수

In [2]:
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    """
    음성 감정 인식을 위한 CNN 모델 정의
    - 입력: MFCC 시퀀스 (batch, time, n_mfcc, 1)
    - 출력: 감정 클래스 확률 (softmax)
    """

    model = models.Sequential()

    # [1] 첫 번째 컨볼루션 블록
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # [2] 두 번째 컨볼루션 블록
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # [3] 세 번째 컨볼루션 블록
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.GlobalAveragePooling2D())  # 피처맵 전체 평균값

    # [4] 완전 연결층
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(num_classes, activation='softmax'))  # 감정 클래스 개수만큼 출력

    # 모델 컴파일
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


# 학습

In [3]:
import os
import pickle
import tensorflow as tf

# ===============================
# [0] GPU 메모리 설정 (OOM 방지)
# ===============================
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✅ GPU memory growth enabled")
    except RuntimeError as e:
        print("❌ RuntimeError:", e)

# ===============================
# [1] 데이터 제너레이터 생성
# ===============================
train_generator = MFCCDataGenerator(
    batch_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches",
    prefix="mfcc",
    batch_size=1  # 메모리 안전 위해 최소 배치
)

val_generator = MFCCDataGenerator(
    batch_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches",
    prefix="mfcc",
    batch_size=1
)

# ===============================
# [2] 레이블 인코더 로드 및 클래스 수
# ===============================
with open("/media/usou/PortableSSD/mldl_project/data/mfcc_batches/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

num_classes = len(label_encoder.classes_)

# ===============================
# [3] 입력 형태 확인
# ===============================
sample_input = train_generator[0][0]  # shape: (batch, time, n_mfcc, 1)
input_shape = sample_input.shape[1:]

# ===============================
# [4] 모델 생성
# ===============================
model = create_cnn_model(input_shape=input_shape, num_classes=num_classes)
model.summary()

# ===============================
# [5] 콜백 설정 (모델 저장 + 조기 종료)
# ===============================
checkpoint_path = "/media/usou/PortableSSD/mldl_project/models/best_model.h5"
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True
    )
]

# ===============================
# [6] 모델 학습
# ===============================
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=callbacks
)


✅ GPU memory growth enabled


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1743069273.900138   11267 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4738 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5


  self._warn_if_super_not_called()


Epoch 1/30


I0000 00:00:1743069277.256819   11330 service.cc:152] XLA service 0x7572800028e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743069277.256835   11330 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 2060, Compute Capability 7.5
2025-03-27 18:54:37.327430: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
E0000 00:00:1743069277.696580   11330 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was compiled with: 9.3.0.  CuDNN library needs to have matching major version and equal or higher minor version. If using a binary install, upgrade your CuDNN library.  If building from sources, make sure the library loaded at runtime is compatible with the version specified during compile configuration.
E0000 00:00:1743069277.792812   11330 cuda_dnn.cc:522] Loaded runtime CuDNN library: 9.1.0 but source was c

FailedPreconditionError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/usr/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3047, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3102, in _run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3306, in run_cell_async

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3489, in run_ast_nodes

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3549, in run_code

  File "/tmp/ipykernel_11267/3582064655.py", line 75, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

DNN library initialization failed. Look at the errors above for more details.
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_3439]

# 모델 변경  PyTorch 기반 간단한 CNN 모델 예시

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AudioEmotionCNN(nn.Module):
    def __init__(self, num_classes):
        super(AudioEmotionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


 # 데이터 로더 정의 (PyTorch용)

In [15]:
from torch.utils.data import Dataset
import torch
import numpy as np
import os

class MFCCDataset(Dataset):
    def __init__(self, batch_dir, prefix):
        self.batch_dir = batch_dir
        self.prefix = prefix

        self.mfcc_files = sorted([
            f for f in os.listdir(batch_dir) if f.startswith(f"{prefix}_batch_")
        ])

    def __len__(self):
        return len(self.mfcc_files)

    def __getitem__(self, idx):
        mfcc_path = os.path.join(self.batch_dir, f"{self.prefix}_batch_{idx}.npy")
        label_path = os.path.join(self.batch_dir, "encoded_labels", f"label_batch_{idx}.npy")

        # 여기서도 배치 데이터임
        mfcc_batch = np.load(mfcc_path, allow_pickle=True)
        label_batch = np.load(label_path)

        # 리스트로 묶어서 반환 (collate_fn에서 처리)
        return list(zip(mfcc_batch, label_batch))


# collate_fn 추가 (패딩과 텐서 변환)

In [16]:
def collate_fn(batch):
    batch = sum(batch, [])  # [(mfcc1, label1), ..., (mfccN, labelN)]로 평탄화
    seq_lens = [x[0].shape[0] for x in batch]
    max_len = max(seq_lens)
    n_mfcc = batch[0][0].shape[1]

    padded_mfccs = []
    labels = []

    for mfcc, label in batch:
        padded = np.zeros((max_len, n_mfcc), dtype=np.float32)
        padded[:mfcc.shape[0], :] = mfcc
        padded_mfccs.append(padded)
        labels.append(label)

    X = torch.tensor(padded_mfccs).unsqueeze(1)  # (batch, 1, time, n_mfcc)
    y = torch.tensor(labels, dtype=torch.long)
    return X, y


# DataLoader 생성

In [17]:
train_dataset = MFCCDataset("/media/usou/PortableSSD/mldl_project/data/mfcc_batches", "mfcc")
val_dataset = MFCCDataset("/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches", "mfcc")

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)


# 모델 학습

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import os

# ============================
# 0. 기본 설정
# ============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

num_classes = len(label_encoder.classes_)
model = AudioEmotionCNN(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 30

# ============================
# 1. 학습 루프
# ============================
best_val_acc = 0.0
save_path = "/media/usou/PortableSSD/mldl_project/models/best_model_pt.pth"
os.makedirs(os.path.dirname(save_path), exist_ok=True)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == targets).sum().item()
        total += targets.size(0)

    train_acc = correct / total
    print(f"🟢 Epoch {epoch+1}: Train Loss: {running_loss:.4f} | Train Acc: {train_acc:.4f}")

    # ============================
    # 2. 검증 루프
    # ============================
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, targets in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]"):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == targets).sum().item()
            val_total += targets.size(0)

    val_acc = val_correct / val_total
    print(f"🔵 Epoch {epoch+1}: Val Acc: {val_acc:.4f}")

    # ============================
    # 3. 모델 저장
    # ============================
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved with Val Acc: {val_acc:.4f}")

print("✅ 학습 완료")


✅ Using device: cuda


Epoch 1/30 [Train]:   0%|          | 0/21 [00:01<?, ?it/s]


RuntimeError: stack expects each tensor to be equal size, but got [5491, 1, 157, 13] at entry 0 and [10000, 1, 157, 13] at entry 1

- 에러 :  배치 안의 샘플들이 시퀀스 길이(time step)가 서로 달라서 torch.stack() 실패.
PyTorch DataLoader는 collate_fn이 내부에서 torch.stack()을 사용하기 때문에, 입력 데이터들 크기가 다르면 에러가 납니다.
TensorFlow에서는 padding으로 해결됐던 부분

- 해결 방안 : 배치 크기를 맞추는 collate_fn 함수 구현
    - collate_fn을 사용하여 배치 내 데이터 크기를 맞추는 방법을 추가

In [18]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import pickle

# ============================
# 0. 기본 설정
# ============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

# ============================
# 1. 레이블 인코더 로드 및 클래스 수 확인
# ============================
with open("/media/usou/PortableSSD/mldl_project/data/mfcc_batches/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

num_classes = len(label_encoder.classes_)

# ============================
# 2. 데이터셋 클래스 정의
# ============================
class MFCCDataset(Dataset):
    def __init__(self, batch_dir, prefix):
        self.batch_dir = batch_dir
        self.prefix = prefix
        self.mfcc_files = sorted([f for f in os.listdir(batch_dir) if f.startswith(f"{prefix}_batch_")])

    def __len__(self):
        return len(self.mfcc_files)

    def __getitem__(self, idx):
        mfcc_path = os.path.join(self.batch_dir, f"{self.prefix}_batch_{idx}.npy")
        label_path = os.path.join(self.batch_dir, "encoded_labels", f"label_batch_{idx}.npy")

        mfcc_data = np.load(mfcc_path, allow_pickle=True)
        label_data = np.load(label_path)

        # 리스트 형태일 경우 numpy 배열로 변환
        if isinstance(mfcc_data, list):
            mfcc_data = np.array(mfcc_data)

        # 2D 배열인지 확인 (time, n_mfcc)
        if mfcc_data.ndim != 2:
            raise ValueError(f"[{mfcc_path}] shape 오류: (time, n_mfcc) 형식이 아님 → 실제 shape: {mfcc_data.shape}")

        return mfcc_data, label_data


# 패딩을 위한 collate_fn 정의
def collate_fn(batch):
    # 각 샘플은 (sequence_len, n_mfcc)
    seq_lens = [sample[0].shape[0] for sample in batch]
    max_len = max(seq_lens)

    padded_batch = []
    labels = []

    for mfcc_data, label_data in batch:
        # mfcc_data shape: (time, n_mfcc)
        time_len = mfcc_data.shape[0]
        n_mfcc = mfcc_data.shape[1]

        # (time, n_mfcc) → (max_len, n_mfcc)
        padded = np.zeros((max_len, n_mfcc), dtype=np.float32)
        padded[:time_len, :] = mfcc_data

        padded_batch.append(padded)
        labels.append(label_data)

    # (batch, 1, time, n_mfcc)
    X = torch.tensor(padded_batch).unsqueeze(1)
    y = torch.tensor(labels, dtype=torch.long)
    return X, y


# 데이터셋 로딩
train_dataset = MFCCDataset(batch_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches", prefix="mfcc")
val_dataset = MFCCDataset(batch_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches", prefix="mfcc")

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

# ============================
# 3. 모델 정의
# ============================
class AudioEmotionCNN(nn.Module):
    def __init__(self, num_classes):
        super(AudioEmotionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# 모델과 손실 함수, 옵티마이저 설정
model = AudioEmotionCNN(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 30

# ============================
# 4. 학습 루프
# ============================
best_val_acc = 0.0
save_path = "/media/usou/PortableSSD/mldl_project/models/best_model_pt.pth"
os.makedirs(os.path.dirname(save_path), exist_ok=True)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == targets).sum().item()
        total += targets.size(0)

    train_acc = correct / total
    print(f"🟢 Epoch {epoch+1}: Train Loss: {running_loss:.4f} | Train Acc: {train_acc:.4f}")

    # ============================
    # 5. 검증 루프
    # ============================
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, targets in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]"):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == targets).sum().item()
            val_total += targets.size(0)

    val_acc = val_correct / val_total
    print(f"🔵 Epoch {epoch+1}: Val Acc: {val_acc:.4f}")

    # ============================
    # 6. 모델 저장
    # ============================
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved with Val Acc: {val_acc:.4f}")

print("✅ 학습 완료")


✅ Using device: cuda


Epoch 1/30 [Train]:   0%|          | 0/21 [00:00<?, ?it/s]

Epoch 1/30 [Train]:   0%|          | 0/21 [00:01<?, ?it/s]


ValueError: [/media/usou/PortableSSD/mldl_project/data/mfcc_batches/mfcc_batch_1.npy] shape 오류: (time, n_mfcc) 형식이 아님 → 실제 shape: (10000,)

- 에러
allow_pickle=True 옵션으로 불러온 데이터를 np.load() 하면, 원래는 List[np.ndarray] 혹은 (time, n_mfcc) 구조여야 합니다.

만약 이전에 이 mfcc_batch_1.npy를 배치 단위 리스트 형태로 저장했었다면:

지금처럼 __getitem__에서 개별 .npy를 꺼낼 경우 배치 전체가 한 개의 1D 배열로 저장되어 있을 수 있습니다.

- 해결 방법: 각 샘플을 개별 .npy 파일로 저장

In [53]:
import numpy as np
import os

# 원본 다중샘플 npy 경로
input_path = "/media/usou/PortableSSD/mldl_project/data/mfcc_batches/mfcc_batch_1.npy"

# 저장할 경로
output_dir = "/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_samples"
os.makedirs(output_dir, exist_ok=True)

# 데이터 로드
data = np.load(input_path, allow_pickle=True)

# 각 샘플 저장
for i, sample in enumerate(data):
    save_path = os.path.join(output_dir, f"sample_{i:04d}.npy")
    np.save(save_path, sample)

print(f"✅ 총 {len(data)}개 샘플 저장 완료: {output_dir}")


✅ 총 10000개 샘플 저장 완료: /media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_samples


# 레이블 분할 저장 코드

In [54]:
import numpy as np
import os

# 레이블 경로
label_path = "/media/usou/PortableSSD/mldl_project/data/mfcc_batches/encoded_labels/label_batch_1.npy"
labels = np.load(label_path)

# 저장할 디렉토리
label_output_dir = "/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_labels"
os.makedirs(label_output_dir, exist_ok=True)

# 분할 저장
for i, label in enumerate(labels):
    save_path = os.path.join(label_output_dir, f"label_{i:04d}.npy")
    np.save(save_path, label)

print(f"✅ 총 {len(labels)}개 레이블 저장 완료: {label_output_dir}")


✅ 총 10000개 레이블 저장 완료: /media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_labels


# MFCCSampleDataset 정의 (샘플 단위)

In [55]:
from torch.utils.data import Dataset
import numpy as np
import os
import torch

class MFCCSampleDataset(Dataset):
    def __init__(self, sample_dir, label_dir):
        self.sample_paths = sorted([
            os.path.join(sample_dir, f) for f in os.listdir(sample_dir) if f.endswith(".npy")
        ])
        self.label_paths = sorted([
            os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.endswith(".npy")
        ])

    def __len__(self):
        return len(self.sample_paths)

    def __getitem__(self, idx):
        mfcc = np.load(self.sample_paths[idx])          # shape: (time, n_mfcc)
        label = np.load(self.label_paths[idx])          # shape: ()

        # 텐서로 변환 (채널 추가)
        mfcc_tensor = torch.tensor(mfcc, dtype=torch.float32).unsqueeze(0)  # (1, time, n_mfcc)
        label_tensor = torch.tensor(label, dtype=torch.long)

        return mfcc_tensor, label_tensor


# collate_fn 정의

In [56]:
def collate_fn(batch):
    seq_lens = [x[0].shape[0] for x in batch]
    max_len = max(seq_lens)

    # n_mfcc 추정 시 더 안전하게
    n_mfcc = None
    for mfcc_data, _ in batch:
        if isinstance(mfcc_data, np.ndarray) and mfcc_data.ndim == 2:
            n_mfcc = mfcc_data.shape[1]
            break

    if n_mfcc is None:
        raise ValueError("모든 샘플에서 유효한 2D MFCC 데이터를 찾을 수 없습니다.")

    padded_batch = []
    labels = []

    for mfcc_data, label_data in batch:
        time_len = mfcc_data.shape[0]
        padded = np.zeros((max_len, n_mfcc), dtype=np.float32)
        padded[:time_len, :] = mfcc_data
        padded_batch.append(padded)
        labels.append(label_data)

    X = torch.tensor(padded_batch).unsqueeze(1)  # (batch, 1, time, n_mfcc)
    y = torch.tensor(labels, dtype=torch.long)
    return X, y


# DataLoader 생성

In [57]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=collate_fn
)

val_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    collate_fn=collate_fn
)


#  기본 설정 및 모델 불러오기

In [58]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

# 모델 정의 (이전에 정의한 AudioEmotionCNN 사용)
model = AudioEmotionCNN(num_classes=8).to(device)  # 클래스 수에 맞게 수정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 30


✅ Using device: cuda


# 학습 루프

In [59]:
best_val_acc = 0.0
save_path = "/media/usou/PortableSSD/mldl_project/models/best_model_pt.pth"
os.makedirs(os.path.dirname(save_path), exist_ok=True)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == targets).sum().item()
        total += targets.size(0)

    train_acc = correct / total
    print(f"🟢 Epoch {epoch+1}: Train Loss: {running_loss:.4f} | Train Acc: {train_acc:.4f}")


Epoch 1/30 [Train]:   0%|          | 0/2500 [00:00<?, ?it/s]


ValueError: 모든 샘플에서 유효한 2D MFCC 데이터를 찾을 수 없습니다.

# Tensorflow 로 변경

# 1. 샘플 단위 .npy 파일을 위한 DataGenerator 정의

In [60]:
import os
import numpy as np
import tensorflow as tf

class SampleMFCCDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, mfcc_dir, label_dir, batch_size=32, shuffle=True):
        self.mfcc_paths = sorted([
            os.path.join(mfcc_dir, f) for f in os.listdir(mfcc_dir) if f.endswith(".npy")
        ])
        self.label_paths = sorted([
            os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.endswith(".npy")
        ])
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.mfcc_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.mfcc_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = [], []

        for i in batch_indices:
            mfcc = np.load(self.mfcc_paths[i])  # (time, n_mfcc)
            label = np.load(self.label_paths[i])  # 정수 인코딩 레이블

            X.append(mfcc)
            y.append(label)

        # Zero-padding
        max_len = max(x.shape[0] for x in X)
        X_pad = tf.keras.preprocessing.sequence.pad_sequences(
            X, maxlen=max_len, padding='post', dtype='float32'
        )
        X_pad = np.expand_dims(X_pad, -1)  # (batch, time, n_mfcc, 1)
        y = np.array(y)

        return X_pad, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


# CNN 모델 정의

In [61]:
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.GlobalAveragePooling2D())

    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


# 2.1  validation 데이터도 split 

In [65]:
import numpy as np
import os

val_input_path = "/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/mfcc_batch_0.npy"
val_label_path = "/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/label_batch_0.npy"

# 저장할 폴더
val_sample_dir = "/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/split_samples"
val_label_dir = "/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/split_labels"
os.makedirs(val_sample_dir, exist_ok=True)
os.makedirs(val_label_dir, exist_ok=True)

# 데이터 로드
mfcc_data = np.load(val_input_path, allow_pickle=True)
label_data = np.load(val_label_path)

# 저장
for i, (sample, label) in enumerate(zip(mfcc_data, label_data)):
    np.save(os.path.join(val_sample_dir, f"sample_{i:04d}.npy"), sample)
    np.save(os.path.join(val_label_dir, f"label_{i:04d}.npy"), label)

print(f"✅ validation용 {len(mfcc_data)}개 샘플 및 레이블 저장 완료")


✅ validation용 10000개 샘플 및 레이블 저장 완료


# 3. 콜백 설정 및 모델 학습

In [None]:
import os
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# ===============================
# [1] 학습 및 검증용 DataGenerator 정의
# ===============================
train_generator = SampleMFCCDataGenerator(
    mfcc_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_samples",
    label_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_labels",
    batch_size=8
)

val_generator = SampleMFCCDataGenerator(
    mfcc_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/split_samples",
    label_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/split_labels",
    batch_size=8
)

# ===============================
# [2] 입력 형태 확인 및 모델 생성
# ===============================
sample_input, _ = train_generator[0]
input_shape = sample_input.shape[1:]  # (time, n_mfcc, 1)

# 클래스 수 확인
import glob
label_paths = sorted(glob.glob("/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_labels/*.npy"))
all_labels = [int(np.load(p)) for p in label_paths]
num_classes = len(set(all_labels))

model = create_cnn_model(input_shape=input_shape, num_classes=num_classes)
model.summary()

# ===============================
# [3] 콜백 설정
# ===============================
checkpoint_path = "/media/usou/PortableSSD/mldl_project/models/best_model_tf.h5"
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

callbacks = [
    ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True
    )
]

# GPU 초기화를 위한 예열용 더미 실행 (CPU에서 실행)
with tf.device("/CPU:0"):
    model(tf.random.normal((1,) + input_shape))

# ===============================
# [4] 모델 학습 실행
# ===============================
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=callbacks
)

print("✅ 모델 학습 완료 및 저장 완료")


총 클래스 수: 1


Epoch 1/30


2025-03-28 11:16:45.087514: W tensorflow/core/framework/op_kernel.cc:1857] OP_REQUIRES failed at sparse_xent_op.cc:103 : INVALID_ARGUMENT: Received a label value of 3 which is outside the valid range of [0, 1).  Label values: 3 3 3 3 3 3 3 3
2025-03-28 11:16:45.087548: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: INVALID_ARGUMENT: Received a label value of 3 which is outside the valid range of [0, 1).  Label values: 3 3 3 3 3 3 3 3
	 [[{{function_node __inference_one_step_on_data_6795}}{{node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]]


InvalidArgumentError: Graph execution error:

Detected at node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/usr/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3047, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3102, in _run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3306, in run_cell_async

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3489, in run_ast_nodes

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3549, in run_code

  File "/tmp/ipykernel_11170/2579500336.py", line 62, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 113, in one_step_on_data

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 60, in train_step

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/trainer.py", line 383, in _compute_loss

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/trainer.py", line 351, in compute_loss

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/compile_utils.py", line 690, in __call__

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/compile_utils.py", line 699, in call

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/losses/loss.py", line 67, in __call__

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/losses/losses.py", line 33, in call

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/losses/losses.py", line 2246, in sparse_categorical_crossentropy

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/ops/nn.py", line 1964, in sparse_categorical_crossentropy

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/nn.py", line 744, in sparse_categorical_crossentropy

Received a label value of 3 which is outside the valid range of [0, 1).  Label values: 3 3 3 3 3 3 3 3
	 [[{{node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_multi_step_on_iterator_6922]

# gpu 사용 확인

In [69]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


# cpu로 학습 시도

In [1]:
# 0. GPU 완전 비활성화 (가장 먼저 실행)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# 1. 필수 라이브러리 임포트
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import layers, models
import glob

# 2. 사용자 정의 DataGenerator
class SampleMFCCDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, mfcc_dir, label_dir, batch_size=32, shuffle=True):
        self.mfcc_paths = sorted([
            os.path.join(mfcc_dir, f) for f in os.listdir(mfcc_dir) if f.endswith(".npy")
        ])
        self.label_paths = sorted([
            os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.endswith(".npy")
        ])
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.mfcc_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.mfcc_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_mfcc = [np.load(self.mfcc_paths[i]) for i in batch_indexes]
        batch_label = [np.load(self.label_paths[i]).item() for i in batch_indexes]  # .item() 추가

        batch_mfcc = [np.expand_dims(x, axis=-1) for x in batch_mfcc]  # (time, n_mfcc, 1)

        max_len = max(x.shape[0] for x in batch_mfcc)
        padded_mfcc = np.array([
            np.pad(x, ((0, max_len - x.shape[0]), (0, 0), (0, 0)), mode='constant')
            for x in batch_mfcc
        ])

        labels = np.array(batch_label, dtype=np.int32)
        return padded_mfcc, labels

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

# 3. CNN 모델 생성 함수
def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.GlobalAveragePooling2D())

    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# 4. DataGenerator 설정
train_generator = SampleMFCCDataGenerator(
    mfcc_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_samples",
    label_dir="/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_labels",
    batch_size=8
)

val_generator = SampleMFCCDataGenerator(
    mfcc_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/split_samples",
    label_dir="/media/usou/PortableSSD/mldl_project/data/validation/mfcc_batches/split_labels",
    batch_size=8
)

# 5. 입력 형상 및 클래스 수 확인
sample_input, _ = train_generator[0]
input_shape = sample_input.shape[1:]

label_paths = sorted(glob.glob("/media/usou/PortableSSD/mldl_project/data/mfcc_batches/split_labels/*.npy"))
all_labels = [np.load(p).item() for p in label_paths]  # .item()으로 스칼라 추출
num_classes = len(set(all_labels))
print(f"클래스 수: {num_classes}")

# 6. 모델 생성 및 콜백 설정
model = create_cnn_model(input_shape=input_shape, num_classes=num_classes)

checkpoint_path = "/media/usou/PortableSSD/mldl_project/models/best_model_tf.h5"
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

callbacks = [
    ModelCheckpoint(filepath=checkpoint_path, monitor='val_accuracy', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
]

# 7. 모델 학습
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=callbacks
)

print("모델 학습 완료 및 저장 완료")


2025-03-28 11:18:00.012173: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743128280.028215   11313 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743128280.033166   11313 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743128280.045344   11313 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743128280.045397   11313 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743128280.045399   11313 computation_placer.cc:177] computation placer alr

클래스 수: 1
Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-03-28 11:18:03.401681: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-03-28 11:18:03.401701: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:167] env: CUDA_VISIBLE_DEVICES="-1"
2025-03-28 11:18:03.401706: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:170] CUDA_VISIBLE_DEVICES is set to -1 - this hides all GPUs from CUDA
2025-03-28 11:18:03.401708: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:178] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module
2025-03-28 11:18:03.401711: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] retrieving CUDA diagnostic information for host: usou-GP75-Leopa

InvalidArgumentError: Graph execution error:

Detected at node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/usr/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3047, in run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3102, in _run_cell

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3306, in run_cell_async

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3489, in run_ast_nodes

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3549, in run_code

  File "/tmp/ipykernel_11313/2616878548.py", line 105, in <module>

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 113, in one_step_on_data

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 60, in train_step

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/trainer.py", line 383, in _compute_loss

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/trainer.py", line 351, in compute_loss

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/compile_utils.py", line 690, in __call__

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/trainers/compile_utils.py", line 699, in call

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/losses/loss.py", line 67, in __call__

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/losses/losses.py", line 33, in call

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/losses/losses.py", line 2246, in sparse_categorical_crossentropy

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/ops/nn.py", line 1964, in sparse_categorical_crossentropy

  File "/home/usou/venv/superbad/lib/python3.12/site-packages/keras/src/backend/tensorflow/nn.py", line 744, in sparse_categorical_crossentropy

Received a label value of 3 which is outside the valid range of [0, 1).  Label values: 3 3 3 3 3 3 3 3
	 [[{{node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_multi_step_on_iterator_3421]

# 다시 처음부터 정리