In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from concurrent.futures import ThreadPoolExecutor
import time
from sklearn.preprocessing import StandardScaler

In [3]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
from matplotlib import pyplot as plt
import seaborn as sns
import librosa
import IPython.display as ipd
import librosa.display
from IPython.display import Audio

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
#audio path
dataset_path = '/kaggle/input/common-language/CommonLanguage/common_voice_kpd'

## Fungsi visualized_audio, ektraksi fitur, trim_audio, load_data

In [7]:
# Fungsi untuk memvisualisasikan data audio dalam subplot
def visualize_audio(audio_data, sample_rate):
    try:
        fig, axs = plt.subplots(5, 1, figsize=(12, 15))
        
        # Plot waveplot
        librosa.display.waveshow(audio_data, sr=sample_rate, ax=axs[0])
        axs[0].set(title='Waveplot')
        
        # Plot mel-spectrogram
        mel_spect = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate, n_mels=128)
        mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)
        img = librosa.display.specshow(mel_spect_db, sr=sample_rate, x_axis='time', y_axis='mel', ax=axs[1])
        fig.colorbar(img, ax=axs[1], format='%+2.0f dB')
        axs[1].set(title='Mel-Spectrogram')
        
        # Plot chroma feature
        stft = np.abs(librosa.stft(audio_data))
        chroma = librosa.feature.chroma_stft(S=stft, sr=sample_rate)
        img = librosa.display.specshow(chroma, sr=sample_rate, x_axis='time', y_axis='chroma', ax=axs[2])
        fig.colorbar(img, ax=axs[2])
        axs[2].set(title='Chroma Feature')
        
        # Plot spectral contrast
        spectral_contrast = librosa.feature.spectral_contrast(S=stft, sr=sample_rate)
        img = librosa.display.specshow(spectral_contrast, sr=sample_rate, x_axis='time', ax=axs[3])
        fig.colorbar(img, ax=axs[3])
        axs[3].set(title='Spectral Contrast')
        
        # Plot tonnetz feature
        harmonic = librosa.effects.harmonic(audio_data)
        tonnetz = librosa.feature.tonnetz(y=harmonic, sr=sample_rate)
        img = librosa.display.specshow(tonnetz, sr=sample_rate, x_axis='time', ax=axs[4])
        fig.colorbar(img, ax=axs[4])
        axs[4].set(title='Tonnetz')
        
        plt.tight_layout()
        plt.show()
    
    except Exception as e:
        print(f"Error encountered while parsing audio data. Error message: {str(e)}")

In [8]:
# Fungsi untuk ekstraksi fitur MFCC dan SDC dengan normalisasi
def extract_mfcc_and_sdc_normalized(audio_data, sample_rate, n_mfcc=13, n_mels=26, n_fft=512, hop_length=256):
    try:
        # Ekstraksi MFCC
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
        
        # Ekstraksi SDC (Spectral Delta Coefficients)
        mfcc_delta = librosa.feature.delta(mfccs)
        mfcc_delta2 = librosa.feature.delta(mfccs, order=2)
        
        # Gabungkan hasilnya
        features = np.vstack([mfccs, mfcc_delta, mfcc_delta2])
        
        # Normalisasi menggunakan StandardScaler
        scaler = StandardScaler()
        features_normalized = scaler.fit_transform(features.T).T
        
        # Return normalized features
        return np.mean(features_normalized, axis=1)
    
    except Exception as e:
        print(f"Error encountered while extracting features. Error message: {str(e)}")
        return None

# Fungsi untuk ekstraksi fitur dengan multithreading
def extract_features_multithreaded(trimmed_audio_list):
    features = []
    
    def extract_features_single(audio_data, sample_rate):
        return extract_mfcc_and_sdc_normalized(audio_data, sample_rate)
    
    with ThreadPoolExecutor(max_workers=4) as executor:  # Sesuaikan jumlah max_workers sesuai kebutuhan
        futures = []
        start_time = time.time()
        for audio_data, sample_rate in trimmed_audio_list:
            futures.append(executor.submit(extract_features_single, audio_data, sample_rate))
        
        for future in futures:
            feature_vector = future.result()
            if feature_vector is not None:
                features.append(feature_vector)
        
        end_time = time.time()
        print(f"Total time taken for feature extraction: {end_time - start_time:.2f} seconds")
    
    return np.array(features)

In [9]:
# Fungsi untuk melakukan trimming pada data audio
def trim_silence(audio_data, sample_rate):
    trimmed_audio, index = librosa.effects.trim(audio_data,top_db=15, hop_length=32)
    return trimmed_audio

In [10]:
# Fungsi untuk load data audio dengan multithreading dan trim audio
def load_and_trim_audio_multithreaded(dataset_path):
    trimmed_audio_list = []
    labels = []
    
    def load_and_trim_audio(file_path):
        try:
            audio_data, sample_rate = librosa.load(file_path, sr=None)
            trimmed_audio = trim_silence(audio_data, sample_rate)
            return trimmed_audio, sample_rate
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
            return None, None
    
    def process_folder(folder_path, label):
        if os.path.isdir(folder_path):
            for subfolder in os.listdir(folder_path):
                subfolder_path = os.path.join(folder_path, subfolder)
                if os.path.isdir(subfolder_path):
                    for file in os.listdir(subfolder_path):
                        file_path = os.path.join(subfolder_path, file)
                        if file_path.endswith(".wav"):
                            futures.append(executor.submit(load_and_trim_audio, file_path))
                            labels.append(label)
    
    with ThreadPoolExecutor(max_workers=4) as executor:  # Sesuaikan jumlah max_workers sesuai kebutuhan
        futures = []
        start_time = time.time()
        for language_folder in os.listdir(dataset_path):
            language_folder_path = os.path.join(dataset_path, language_folder)
            if os.path.isdir(language_folder_path):
                train_path = os.path.join(language_folder_path, 'train')
                test_path = os.path.join(language_folder_path, 'test')
                
                # Process files in train and test folders
                process_folder(train_path, language_folder)
                process_folder(test_path, language_folder)
        
        # Retrieve results from futures
        for future in futures:
            trimmed_audio, sample_rate = future.result()
            if trimmed_audio is not None:
                trimmed_audio_list.append((trimmed_audio, sample_rate))
        
        end_time = time.time()
        print(f"Total time taken to load and trim audio data: {end_time - start_time:.2f} seconds")
    
    return trimmed_audio_list, labels

In [11]:
trimmed_audio_list, labels = load_and_trim_audio_multithreaded(dataset_path)

Total time taken to load and trim audio data: 1132.18 seconds


In [12]:
print(f"Loaded {len(trimmed_audio_list)} audio samples with {len(set(labels))} unique labels.")

Loaded 28159 audio samples with 45 unique labels.


In [13]:
# Menghitung jumlah unique labels
unique_labels = np.unique(labels)
num_unique_labels = len(unique_labels)

print(f"Unique labels: {unique_labels}")
print(f"Number of unique labels: {num_unique_labels}")

Unique labels: ['Arabic' 'Basque' 'Breton' 'Catalan' 'Chinese_China' 'Chinese_Hongkong'
 'Chinese_Taiwan' 'Chuvash' 'Czech' 'Dhivehi' 'Dutch' 'English'
 'Esperanto' 'Estonian' 'French' 'Frisian' 'Georgian' 'German' 'Greek'
 'Hakha_Chin' 'Indonesian' 'Interlingua' 'Italian' 'Japanese' 'Kabyle'
 'Kinyarwanda' 'Kyrgyz' 'Latvian' 'Maltese' 'Mangolian' 'Persian' 'Polish'
 'Portuguese' 'Romanian' 'Romansh_Sursilvan' 'Russian' 'Sakha' 'Slovenian'
 'Spanish' 'Swedish' 'Tamil' 'Tatar' 'Turkish' 'Ukranian' 'Welsh']
Number of unique labels: 45


In [14]:
# Ekstrak fitur
features = extract_features_multithreaded(trimmed_audio_list)

Total time taken for feature extraction: 432.72 seconds


In [15]:
# Membuat DataFrame pandas dari fitur dan label
df_features = pd.DataFrame(features)
df_labels = pd.DataFrame(labels, columns=['label'])
df = pd.concat([df_features, df_labels], axis=1)

In [16]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29,30,31,32,33,34,35,36,37,38
count,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,...,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0,28159.0
mean,6.910079e-11,1.408553e-10,-7.043115e-12,-1.162183e-10,1.327661e-10,-2.198298e-11,-4.717356e-11,6.172987e-12,-1.097931e-10,-5.282985e-11,...,4.090202e-11,-1.389069e-11,2.214312e-11,4.102461e-11,1.67883e-11,9.512254e-11,2.156271e-11,1.330559e-11,-6.03409e-11,-1.052752e-11
std,1.373152e-08,1.125222e-08,1.192937e-08,1.192134e-08,1.170986e-08,1.151631e-08,1.169257e-08,1.137766e-08,1.133786e-08,1.156238e-08,...,1.093332e-08,1.119896e-08,1.137219e-08,1.143677e-08,1.13626e-08,1.153731e-08,1.143402e-08,1.157721e-08,1.159128e-08,1.150562e-08
min,-8.923269e-08,-6.119835e-08,-7.243096e-08,-6.253602e-08,-7.629394e-08,-6.692451e-08,-7.629394e-08,-7.629394e-08,-6.287963e-08,-8.60458e-08,...,-5.20186e-08,-6.061489e-08,-6.502325e-08,-5.683233e-08,-5.10897e-08,-5.960464e-08,-6.087283e-08,-5.629327e-08,-6.953875e-08,-6.737916e-08
25%,-8.914034e-09,-6.961126e-09,-7.509247e-09,-7.629395e-09,-7.392824e-09,-7.450581e-09,-7.450581e-09,-7.279957e-09,-7.36428e-09,-7.450581e-09,...,-7.193664e-09,-7.609104e-09,-7.641622e-09,-7.670303e-09,-7.774519e-09,-7.817002e-09,-7.757957e-09,-7.690922e-09,-7.947286e-09,-7.774519e-09
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,9.070272e-09,7.210239e-09,7.579383e-09,7.279957e-09,7.649794e-09,7.36428e-09,7.392824e-09,7.279957e-09,7.224806e-09,7.335956e-09,...,7.361439e-09,7.529008e-09,7.70461e-09,7.791457e-09,7.774519e-09,7.991685e-09,7.836885e-09,7.762631e-09,7.715812e-09,7.629395e-09
max,9.266471e-08,6.031142e-08,9.137e-08,8.38395e-08,6.287963e-08,6.653542e-08,5.7508e-08,8.15978e-08,6.85275e-08,6.894031e-08,...,6.19269e-08,7.586046e-08,5.74372e-08,5.960464e-08,5.796415e-08,5.82801e-08,5.960464e-08,5.739707e-08,5.383645e-08,7.256217e-08


In [17]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit LabelEncoder with your actual labels
label_encoder.fit(df['label'])

# Transform actual labels to numeric labels
df['numeric_labels'] = label_encoder.transform(df['label'])

In [18]:
X = df.drop(columns=['label','numeric_labels'])
y = df['numeric_labels']

# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [25]:
from sklearn.ensemble import RandomForestClassifier

# Inisialisasi model Random Forest
rf_model = RandomForestClassifier(random_state=42)

# Latih model pada data latih
rf_model.fit(X_train, y_train)

In [26]:
from sklearn.metrics import accuracy_score, classification_report

# Prediksi dengan data uji
y_pred = rf_model.predict(X_test)

# Evaluasi akurasi
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi model Random Forest: {accuracy:.2f}")

# Laporan klasifikasi
print(classification_report(y_test, y_pred))

Akurasi model Random Forest: 0.03
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       226
           1       0.00      0.00      0.00       143
           2       0.03      1.00      0.07       292
           3       0.00      0.00      0.00       153
           4       0.00      0.00      0.00       149
           5       0.00      0.00      0.00       176
           6       0.00      0.00      0.00       234
           7       0.00      0.00      0.00       176
           8       0.00      0.00      0.00       206
           9       0.00      0.00      0.00       147
          10       0.00      0.00      0.00       207
          11       0.00      0.00      0.00       160
          12       0.00      0.00      0.00       175
          13       0.00      0.00      0.00       143
          14       0.00      0.00      0.00       139
          15       0.00      0.00      0.00       180
          16       0.00      0.00      0.00    

In [21]:
import xgboost as xgb

#Inisialisasi model XGBoost
xgbmodel = xgb.XGBClassifier(
    objective='multi:softmax',  # untuk klasifikasi multi-kelas
    num_class=len(np.unique(y_train)),  # jumlah kelas
    max_depth=6,  # kedalaman maksimum dari setiap pohon
    n_estimators=100  # jumlah pohon
)

In [22]:
# Melatih model
xgbmodel.fit(X_train, y_train)

In [27]:
from sklearn.metrics import accuracy_score, classification_report

# Prediksi dengan data uji
y_pred = xgbmodel.predict(X_test)

# Evaluasi akurasi
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi model Random Forest: {accuracy:.2f}")

# Laporan klasifikasi
print(classification_report(y_test, y_pred))

Akurasi model Random Forest: 0.03
              precision    recall  f1-score   support

           0       0.04      0.04      0.04       226
           1       0.02      0.02      0.02       143
           2       0.06      0.13      0.09       292
           3       0.03      0.03      0.03       153
           4       0.03      0.03      0.03       149
           5       0.02      0.01      0.01       176
           6       0.03      0.04      0.04       234
           7       0.02      0.04      0.03       176
           8       0.03      0.03      0.03       206
           9       0.02      0.03      0.02       147
          10       0.01      0.01      0.01       207
          11       0.04      0.03      0.03       160
          12       0.04      0.02      0.03       175
          13       0.01      0.01      0.01       143
          14       0.03      0.02      0.02       139
          15       0.02      0.01      0.01       180
          16       0.03      0.02      0.02    