# Training


In [None]:
import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
import pickle

def load_data(vector_length=128):
    """A function to load gender recognition dataset from `data` folder
    After the second run, this will load from results/features.npy and results/labels.npy files
    as it is much faster!"""
    # make sure results folder exists
    #if not os.path.isdir("results"):
    #    os.mkdir("results")
    # if features & labels already loaded individually and bundled, load them from there instead
    if os.path.isfile("GenderRecognition/features.npy") and os.path.isfile("GenderRecognition/labels.npy"):
        X = np.load("GenderRecognition/features.npy")
        y = np.load("GenderRecognition/labels.npy")
        return X, y
    
X, y = load_data()

def split_data(X, y, test_size=0.1, valid_size=0.1):
    # split training set and testing set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=7)
    # split training set and validation set
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=valid_size, random_state=7)
    # return a dictionary of values
    return {
        "X_train": X_train,
        "X_valid": X_valid,
        "X_test": X_test,
        "y_train": y_train,
        "y_valid": y_valid,
        "y_test": y_test
    }

# load the dataset
X, y = load_data()
# split the data into training, validation and testing sets
data = split_data(X, y, test_size=0.1, valid_size=0.1)

def create_model(vector_length=128):
    """5 hidden dense layers from 256 units to 64, not the best model."""
    model = Sequential()
    model.add(Dense(256, input_shape=(vector_length,)))
    model.add(Dropout(0.3))
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.3))
    # one output neuron with sigmoid activation function, 0 means female, 1 means male
    model.add(Dense(1, activation="sigmoid"))
    # using binary crossentropy as it's male/female classification (binary)
    model.compile(loss="binary_crossentropy", metrics=["accuracy"], optimizer="adam")
    # print summary of the model
    print(model.summary())
    return model

# construct the model
model = create_model()

# use tensorboard to view metrics
tensorboard = TensorBoard(log_dir="logs")
# define early stopping to stop training after 5 epochs of not improving
early_stopping = EarlyStopping(mode="min", patience=5, restore_best_weights=True)

batch_size = 64
epochs = 100
# train the model using the training set and validating using validation set
model.fit(data["X_train"], data["y_train"], epochs=epochs, batch_size=batch_size, validation_data=(data["X_valid"], data["y_valid"]),
          callbacks=[tensorboard, early_stopping])

# save the model to a file
#model.save("results/model.h5")
pickle.dump(model, open("GenderRecognition/TrainedModels/gender.pickle", 'wb'))


# evaluating the model using the testing set
print(f"Evaluating the model using {len(data['X_test'])} samples...")#6694
loss, accuracy = model.evaluate(data["X_test"], data["y_test"], verbose=0)
print(f"Loss: {loss:.4f}")
#0.2143
print(f"Accuracy: {accuracy*100:.2f}%")
#92.02%

def conf_matrix():
    # Create the confusion matrix values
    cm = confusion_matrix(data["y_test"][:10000], svm_predictions)

    # Create the confusion matrix display
    plt.figure(figsize=(8,8))
    plt.title('Confusion matrix on test data')
    sns.heatmap(cm, annot=True, fmt='d', 
                cmap=plt.cm.Blues, cbar=False, annot_kws={'size':14})
    #to visualise a confusion matrix, time-series movements, temperature changes, correlation matrix and SHAP interaction values
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

In [None]:
import argparse
import librosa
import pickle
import os
"""
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
def extract_feature(file_name, **kwargs):
    
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    X, sample_rate = librosa.core.load(file_name)
    if chroma or contrast:
        stft = np.abs(librosa.stft(X)) #analyze the frequency content of a signal over time
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
        result = np.hstack((result, mel))
    if contrast:
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, contrast))
    if tonnetz:
        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
        result = np.hstack((result, tonnetz))
    return result

parser = argparse.ArgumentParser(description="""Gender recognition script, this will load the model you trained, 
                                    and perform inference on a sample you provide (either using your voice or a file)""")
parser.add_argument("-f", "--file", help="The path to the file, preferred to be in WAV format")
args = parser.parse_args()


In [None]:
def test_sample(file):
    path_to_model = "GenderRecognition/TrainedModels/"
    # if not file or not os.path.isfile(file):
    #     return "file doesn't exist"
    # else:
    loaded_model = pickle.load(open(path_to_model+"gender.pickle", 'rb'))
    features = extract_feature(file, mel=True).reshape(1, -1)
    # predict the gender!
    print(loaded_model.predict(features))
    male_prob = loaded_model.predict(features)[0][0]
    female_prob = 1 - male_prob
    gender = "male" if male_prob > female_prob else "female"
    # show the result!
    #print("Result:", gender)
    #print(f"Probabilities::: Male: {male_prob*100:.2f}%    Female: {female_prob*100:.2f}%")
    result = "Result: " + gender + ", Probabilities::: Male: " + f"{male_prob*100:.2f}" + "   Female: " + f"{female_prob*100:.2f}"
    print(result)
    return result

In [None]:
import gradio as gr
def gender_recoginition(test_example):
    return test_sample(test_example)

gender_prediction_with_file = gr.Interface(
    fn = gender_recoginition,    
    inputs = [
        gr.Audio(sources=["upload"], show_download_button=True, format="wav", type="filepath")
        #gr.File(label="Upload audio file...")
    ],
    outputs = "text",
    live=True,
)
gender_prediction_with_file.launch()