In [None]:
import os
import random
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import layers
from keras import models
from keras import optimizers
from keras.utils import to_categorical
from IPython import display
import matplotlib.pyplot as plt

data_dir = 'data_picture'
class_names = os.listdir(data_dir)
class_names.sort()
class_to_index = dict(zip(class_names, range(len(class_names))))

images = []
labels = []
for class_name in class_names:
    class_dir = os.path.join(data_dir, class_name)
    for dirpath, _, filenames in os.walk(class_dir):
        for filename in filenames:
            if filename.endswith('.png'):
                filepath = os.path.join(dirpath, filename)
                image = plt.imread(filepath)
                images.append(image)
                labels.append(class_to_index[class_name])

# Chia dữ liệu thành các bộ train, validation và test
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

# Tiền xử lý dữ liệu hình ảnh để chuẩn hóa kích thước và định dạng hình ảnh
IMG_SIZE = 128

def preprocess_image(image):
    # Kiểm tra xem ảnh có 4 kênh màu hay không
    if image.shape[2] == 4:
        # Nếu có, chuyển đổi sang RGB bằng cách bỏ qua kênh alpha
        image = image[:,:,:3]
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.image.convert_image_dtype(image, tf.float32)
    return image

X_train = np.array([preprocess_image(image) for image in X_train])
X_val = np.array([preprocess_image(image) for image in X_val])
X_test = np.array([preprocess_image(image) for image in X_test])

# Chuyển đổi nhãn sang định dạng one-hot
y_train = to_categorical(y_train, len(class_names))
y_val = to_categorical(y_val, len(class_names))
y_test = to_categorical(y_test, len(class_names))

# Xây dựng mô hình CNN
model = models.Sequential([
    layers.Conv2D(16, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(32, (3, 3),activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(class_names), activation='softmax')
])

model.summary()

# Compile model with categorical crossentropy loss and Adam optimizer
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(learning_rate=1e-4), metrics=['accuracy'])

# Train model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val, y_val))

# Evaluate model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test loss: {test_loss:.3f}')
print(f'Test accuracy: {test_acc:.3f}')

# Visualize accuracy and loss over epochs
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()



KeyboardInterrupt: 

In [32]:
model.save('CNNs_for_ARS.h5')

In [83]:
import pyaudio
import wave
import librosa
import numpy as np
from keras.models import load_model
import cv2

# Tải file trọng số đã huấn luyện
model = load_model('CNNs_for_ARS.h5')

# Thiết lập tham số ghi âm
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 1.5

# Khởi tạo đối tượng PyAudio
p = pyaudio.PyAudio()

# Ghi âm đoạn âm thanh
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("Đang ghi âm...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("Ghi âm xong!")

# Dừng stream và terminate PyAudio
stream.stop_stream()
stream.close()
p.terminate()

# Lưu đoạn ghi âm vào file WAV
wf = wave.open("output.wav", "wb")
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b"".join(frames))
wf.close()

# Sử dụng librosa để xử lý tín hiệu âm thanh và rút trích đặc trưng
y, sr = librosa.load('output.wav', sr=16000)

# Trích xuất đặc trưng Mel spectrogram
spec = librosa.feature.melspectrogram(y=y, sr=sr)

# Chuyển đổi sang độ phân giải đồ họa cao hơn
S_dB = librosa.power_to_db(spec, ref=np.max)

plt.figure(figsize=(10,4))
librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='jet')
plt.axis('off')
plt.tight_layout()
plt.savefig('test4.png', bbox_inches='tight', pad_inches=0)
plt.close()

image = cv2.imread("gtdq.png")
prediction = model.predict(np.expand_dims(preprocess_image(image),0))

predicted_label = np.argmax(prediction)

print(predicted_label)

Đang ghi âm...
Ghi âm xong!
0


In [59]:
print(class_names)

['batden', 'batdieuhoa', 'batquat', 'dongcua', 'giamtocdoquat', 'mocua', 'tangtocdoquat', 'tatden', 'tatdieuhoa', 'tatquat']
