In [None]:
! pip install datasets

In [None]:
import glob
import os
import pandas as pd
import numpy as np
import shutil
import librosa
from tqdm import tqdm
from datasets import load_dataset
from huggingface_hub import login
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

In [None]:
login(token = token)

## Dataset

In [None]:
dataset = load_dataset('mozilla-foundation/common_voice_13_0', 'uz', split = 'train+validation')

## Data analyse

In [None]:
df = dataset.to_pandas()

In [None]:
df.head()

In [None]:
new_df = df[['path', 'gender']]

In [None]:
new_df.head()

In [None]:
new_df = new_df[(new_df['gender']=='male') | (new_df['gender']=='female')]

In [None]:
new_df.head()

In [None]:
male = new_df[new_df['gender']=='male']
male

In [None]:
female = new_df[new_df['gender']=='female']
female

In [None]:
male = male.iloc[0:12000]

In [None]:
female = female.append(male)
female.shape

In [None]:
female.iloc[20002].path

In [None]:
def extract_feature(file_name):
  X, sample_rate = librosa.core.load(file_name)
  result = np.array([])
  mel = np.mean(librosa.feature.melspectrogram(y = X, sr=sample_rate).T, axis=0)
  result = np.hstack((result,mel))

  return result

In [None]:
dirname = 'data'

In [None]:
if not os.path.isdir(dirname):
  os.mkdir(dirname)

In [None]:
female.to_csv("common_voice_uz.csv", index=False)


In [None]:
folder_name = "/root/.cache/huggingface/datasets/downloads/extracted/fbd7acc8601b294a139eddae148d842e1e608fedb3f908e9ffa20843ace27515/uz_train_0/"
audio_files = glob.glob(f"{folder_name}*")
all_audio_filename = set(female["path"])

In [None]:
len(all_audio_filename)

In [None]:
for i, audio_file in tqdm(list(enumerate(audio_files)), f"Extracting features of {folder_name}"):
    splited = os.path.split(audio_file)
    # audio_filename = os.path.join(os.path.split(splited[0])[-1], splited[-1])
    audio_filename = f"{os.path.split(splited[0])[-1]}/{splited[-1]}"
    # print("audio_filename:", audio_filename)
    audio_filename = audio_filename.split("/")[-1]

    # print("Copyying", audio_filename, "...")
    src_path = f"{folder_name}/{audio_filename}"
    target_path = f"{dirname}/{audio_filename}"
    #create that folder if it doesn't exist
    if not os.path.isdir(os.path.dirname(target_path)):
        os.mkdir(os.path.dirname(target_path))
    features = extract_feature(src_path)
    target_filename = target_path.split(".")[0]
    np.save(target_filename, features)

In [None]:
!unzip results.zip

In [None]:
def load_data(vector_length=128):
  if not os.path.isdir("results"):
    print("Results directory not found, please run the preprocessing script first.")
    return None, None

  # if features & labels already loaded individually and bundled, load them from there instead
  if os.path.isfile("results/features.npy") and os.path.isfile("results/labels.npy"):
    X = np.load("results/features.npy")
    y = np.load("results/labels.npy")
    return X, y

In [None]:
X, y = load_data()

In [None]:
print(X)
print('\n',y)

In [None]:
print(len(X), len(y))

In [None]:
def split_data(X, y, test_size=0.1, valid_size=0.1):
  # split training set and testing set
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=7)
  # split training set andvalidation set
  X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=valid_size, random_state=7)
  # return dictionary of values
  return {
      "X_train":X_train,
      "X_valid":X_valid,
      "X_test":X_test,
      "y_train":y_train,
      "y_valid":y_valid,
      "y_test":y_test
  }

In [None]:
data = split_data(X, y, test_size=0.1, valid_size=0.1)
data

## Training

In [None]:
model = Sequential()
model.add(Dense(226, input_shape=(128,)))
model.add(Dropout(0.3))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))

#one output neuron with sigmoid activation function, 0 means female, 1 means male
model.add(Dense(1, activation="sigmoid"))



In [None]:
# using binary crossentropy as it's male/female classification (binary)
model.compile(loss="binary_crossentropy", metrics=["accuracy"], optimizer="adam")
# print summary of model
model.summary()

In [None]:
batch_size = 64
epochs = 100

In [None]:
# EarlyStopping-bu o'qitayotgan modelimizning aniqli ma'lum qiymatdan keyin o'zishdan to'xtasa modelni o'qitishni to'xtatadi
#early_stopping = EarlyStopping(mode="min", patience=5, restore_best_weights=True)

In [None]:
model.fit(data["X_train"], data["y_train"], epochs=epochs, batch_size=batch_size, validation_data=(data["X_valid"], data["y_valid"]))

In [None]:
model.save("results/model.h5")