In [1]:
import os
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.image import resize
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data_dir = '/content/drive/MyDrive/GDSC/donateacry_corpus'
classes = ['belly_pain','burping','discomfort','hungry','tired']

In [4]:
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
    data = []
    labels = []

    for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(class_dir, filename)
                audio_data, sample_rate = librosa.load(file_path, sr=None)
                mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
                mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
                data.append(mel_spectrogram)
                labels.append(i)

    return np.array(data), np.array(labels)


In [5]:
data, labels = load_and_preprocess_data(data_dir, classes)
labels = to_categorical(labels, num_classes=len(classes))  # Convert labels to one-hot encoding
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [None]:
input_shape = X_train[0].shape
input_layer = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.25)(x)  # 드롭아웃 추가
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
output_layer = Dense(len(classes), activation='softmax')(x)
model = Model(input_layer, output_layer)

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# 콜백 설정
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_accuracy')  # 모델 체크포인트 설정

# 모델 훈련
model.fit(X_train, y_train, epochs=35, batch_size=32, validation_data=(X_test, y_test),
          callbacks=[model_checkpoint])  # 콜백 추가

Epoch 1/35
Epoch 2/35


  saving_api.save_model(


Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


<keras.src.callbacks.History at 0x7ddb2c706860>

In [None]:
best_model = load_model('best_model.h5')

# 불러온 모델로 검증 데이터에서 평가
loss, accuracy = best_model.evaluate(X_test, y_test)

# 검증 데이터에서의 정확도 출력
print(f"검증 데이터 정확도: {accuracy}")

In [None]:
# 불러온 모델로 검증 데이터에서 평가
loss, accuracy = model.evaluate(X_test, y_test)

# 검증 데이터에서의 정확도 출력
print(f"검증 데이터 정확도: {accuracy}")

검증 데이터 정확도: 0.79347825050354


In [None]:
model.save('/content/drive/MyDrive/GDSC/complete_model.h5')

  saving_api.save_model(


# 모델 사용 법

In [23]:
# 음성 파일 불러오기
def predict_data(path, target_shape=(128, 128)):
    audio_data, sample_rate = librosa.load(path, sr=None)
    mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
    mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
    return np.array([mel_spectrogram])

In [21]:
model = load_model('/content/drive/MyDrive/GDSC/complete_model.h5')

In [None]:
classes = ['belly_pain','burping','discomfort','hungry','tired']

In [25]:
path = "/content/drive/MyDrive/GDSC/donateacry_corpus/belly_pain/549a46d8-9c84-430e-ade8-97eae2bef787-1430130772174-1.7-m-48-bp.wav"

X_new = predict_data(path)
# 새로운 데이터에 대한 예측 수행
predictions = model.predict(X_new)

# 예측 결과 출력
for i, prediction in enumerate(predictions):
    predicted_class = np.argmax(prediction)
    print(f"샘플 {i}: 예측된 클래스 = {classes[predicted_class]}")

샘플 0: 예측된 클래스 = hungry
