In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
from IPython.display import Javascript, display
from google.colab import output
from base64 import b64decode
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib

RECORD = '''
const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
  const reader = new FileReader()
  reader.onloadend = e => resolve(e.srcElement.result)
  reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  recorder = new MediaRecorder(stream)
  chunks = []
  recorder.ondataavailable = e => chunks.push(e.data)
  recorder.start()
  await sleep(time)
  recorder.onstop = async ()=>{
    blob = new Blob(chunks)
    text = await b2text(blob)
    resolve(text)
  }
  recorder.stop()
})
'''

def record(sec=5):
    display(Javascript(RECORD))
    s = output.eval_js('record(%d)' % (sec*1000))
    b = b64decode(s.split(',')[1])
    with open('audio.wav','wb') as f:
        f.write(b)
    return 'audio.wav'

In [16]:
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, duration=5)

    features = {}

    stft = np.abs(librosa.stft(y))
    features['chroma_stft'] = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr))
    features['rms'] = np.mean(librosa.feature.rms(y=y))
    features['spectral_centroid'] = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    features['spectral_bandwidth'] = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    features['spectral_rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    features['zero_crossing_rate'] = np.mean(librosa.feature.zero_crossing_rate(y=y))

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    for i, mfcc in enumerate(mfccs):
        features[f'mfcc_{i+1}'] = np.mean(mfcc)

    return features

In [6]:
# Load your trained models
rf = joblib.load('/content/drive/MyDrive/ML/random_forest_model.joblib')
knn = joblib.load('/content/drive/MyDrive/ML/knn_model.joblib')
dt = joblib.load('/content/drive/MyDrive/ML/decision_tree_model.joblib')
final_estimator = joblib.load('/content/drive/MyDrive/ML/final_estimator.joblib')
scaler = joblib.load('/content/drive/MyDrive/ML/scaler.joblib')


In [7]:
label_encoder = joblib.load('/content/drive/MyDrive/ML/label_encoder.joblib')

In [22]:
def predict_language(audio_file):
    # Extract features
    features = extract_features(audio_file)

    # Convert features dictionary to a DataFrame
    features_df = pd.DataFrame([features])

    # Normalize features
    features_normalized = scaler.transform(features_df)

    # Get predictions from base models
    rf_pred = rf.predict_proba(features_normalized)
    knn_pred = knn.predict_proba(features_normalized)
    dt_pred = dt.predict_proba(features_normalized)

    # Combine predictions
    X_meta = np.hstack([rf_pred, knn_pred, dt_pred])

    # Get probabilities for each language
    probabilities = final_estimator.predict_proba(X_meta)[0]

    # Print probabilities for each language
    print("Probabilitas untuk setiap bahasa:")
    for lang, prob in zip(label_encoder.classes_, probabilities):
        print(f"{lang}: {prob:.4f}")

    # Final prediction
    final_pred = final_estimator.predict(X_meta)

    # Convert numeric prediction to language label
    language = label_encoder.inverse_transform(final_pred)[0]

    return language

# Main function to record and predict
def record_and_predict():
    print("Recording ...")
    audio_file = record(5)
    print("Recording finished. Predicting language...")
    language = predict_language(audio_file)
    print(f"The predicted language is: {language}")

In [25]:
import ipywidgets as widgets
from IPython.display import display, Audio
import time

def record_with_button(sec=5):
    record_button = widgets.Button(description="Start Recording")
    output = widgets.Output()

    def on_button_clicked(b):
        with output:
            print("Recording for 5 seconds...")
            audio_file = record(sec)
            print("Recording finished.")

            # Play the recorded audio
            display(Audio(filename=audio_file))

            # Predict language
            print("Predicting language...")
            language = predict_language(audio_file)
            print(f"The predicted language is: {language}")

    record_button.on_click(on_button_clicked)
    display(record_button, output)

# Use this function to record with a button and predict
record_with_button()

Button(description='Start Recording', style=ButtonStyle())

Output()