In [1]:
import tensorflow as tf
import os

In [23]:
dataset_path = "dataset"

train_data = tf.keras.utils.audio_dataset_from_directory(
    dataset_path,
    batch_size=4,
    shuffle=True,
    validation_split=0.2,
    subset='training',
    output_sequence_length=48000,
    label_mode="categorical",
    labels="inferred",
    seed=59
)

validation_data = tf.keras.utils.audio_dataset_from_directory(
    dataset_path,
    batch_size=4,
    shuffle=True,
    validation_split=0.2,
    subset='validation',
    output_sequence_length=48000,
    label_mode="categorical",
    labels="inferred",
    seed=59
)

Found 1615 files belonging to 18 classes.
Using 1292 files for training.
Found 1615 files belonging to 18 classes.
Using 323 files for validation.


In [30]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(32, kernel_size=40, strides=16, activation='relu',input_shape=(48000,1)),
    tf.keras.layers.MaxPool1D(4),
    tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPool1D(4),
    tf.keras.layers.Conv1D(64, kernel_size=10, activation='relu'), 
    tf.keras.layers.MaxPool1D(4),  
    tf.keras.layers.Conv1D(64, kernel_size=10, activation='relu'), 
    tf.keras.layers.MaxPool1D(4),  
    tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu'), 
    tf.keras.layers.MaxPool1D(4),  

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dense(18,activation='softmax')
])

In [31]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss="categorical_crossentropy",
    metrics=['accuracy']
)

checkpoint = tf.keras.callbacks.ModelCheckpoint("best_model_voice",
                                            save_best_only=True, monitor='val_accuracy')
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=50)

In [None]:
model.fit(
    train_data,
    validation_data=validation_data,
    epochs=150,
    callbacks=[stop_early, checkpoint]
)

In [34]:
loaded_model = tf.keras.models.load_model('best_model_voice')
loaded_model.evaluate(validation_data)



[0.8015758395195007, 0.9102166891098022]

In [98]:

data = tf.keras.utils.audio_dataset_from_directory(
    "javad",
    # subset='training',
)
# print(validation_data)
loaded_model.predict(data)

Found 1 files belonging to 1 classes.


array([[1.1512031e-16, 3.4618918e-27, 1.7685301e-22, 1.0000000e+00,
        7.3078217e-37, 3.3823621e-08, 1.3705251e-21, 9.1422744e-16,
        0.0000000e+00, 4.0546998e-28, 0.0000000e+00, 1.3558781e-26,
        0.0000000e+00, 7.2986915e-20, 0.0000000e+00, 0.0000000e+00,
        2.1297467e-25, 2.5654458e-35]], dtype=float32)

## inference

In [7]:
import pydub
import tensorflow as tf
import os
import numpy as np

path = "javad/jav/javad_voice_test2.ogg"
audio = pydub.AudioSegment.from_file(path)
os.makedirs("sample/voice_wav",exist_ok=True)
audio = audio.set_sample_width(2)
audio = audio.set_frame_rate(48000)
audio = audio.set_channels(1)
chunks = pydub.silence.split_on_silence(audio, min_silence_len=2000, silence_thresh=-45)
result = sum(chunks)
# chunks = pydub.utils.make_chunks(result, 1000)
# for i,chunk in enumerate(chunks):
#     if len(chunk) >= 1000:
#         chunk.export(f"sample/voice_wav/sample_{i}.wav", format="wav")

loaded_model = tf.keras.models.load_model('best_model_voice')

persons = []
for f in os.listdir("data"):
    persons.append(f.split('.')[0])

result.export(f"new_file.wav", format="wav")
path = "new_file.wav"
x = tf.io.read_file(path)
x, sample_rate = tf.audio.decode_wav(x, desired_channels=1, desired_samples=48000,)
x = tf.squeeze(x, axis=-1)
x = x[tf.newaxis,...]

pred = loaded_model(x)
print(np.argmax(pred))
print("someone send a voice: ",persons[np.argmax(pred)])

Found 11 files belonging to 1 classes.
['abdollah', 'azra', 'davood', 'javad', 'kiana', 'matin', 'mohamad', 'mohamadd', 'mona', 'nima', 'omid', 'parisa', 'parsa', 'saeedi', 'sajedeh', 'shima', 'tara', 'valipour']
median:  3.0
[3, 3, 7, 5, 0, 2, 3, 2, 3, 2, 14]


'javad'

In [6]:
persons = []
import os
for f in os.listdir("singer_voices\dataset"):
    persons.append(f)
# persons.sort()
persons

['chavoshi', 'ebi', 'rezasadeghi', 'shadmehr', 'yegane']