[Onsei: Japanese pitch accent training tool](https://github.com/itsupera/onsei)
================================================================================

<details>
<summary>Click here for instructions !</summary>

- Select a sample to mimick and listen to it
  
- Record yourself mimicking using the record button below "Your recording:"

- Click on the "Compare" button

- Check out the Pitch comparison graph below to see where your mistakes

- Try again to match the teacher's pitch !

- Any feedback or suggestion ? Please tell me in [this Gitter chat](https://gitter.im/itsupera-onsei/community)
</details>

In [None]:
import os

import ipywidgets as widgets;
from ipywebrtc import AudioRecorder, CameraStream, AudioStream;

from onsei.utils import SpeechRecord, segment_speech;
from onsei.figures import ViewRecordFigure, CompareFigure;
from onsei.sentence import Sentence;

# Globals


def get_jsut_samples():
    samples = {}
    basepath = "data/jsut_basic5000_sample"
    with open(os.path.join(basepath, "transcript_utf8.txt")) as f:
        for line in f:
            basename, sentence = line.rstrip().split(':')
            filename = os.path.join(basepath, f"{basename}.wav")
            samples[sentence] = {
                "filename": filename,
                "sentence": Sentence(sentence),
            }
    return samples

samples = {
    "JSUT": get_jsut_samples(),
    "Test": {
        "僕の知人の経営者に": {
            "filename": "data/ps/ps1_boku_no_chijin-teacher2.wav",
            "sentence": Sentence("僕の知人の経営者に"),
        },
    }
}

default_sample_collection = list(samples.keys())[0]
default_sample_key = list(samples[default_sample_collection].keys())[0]
default_sample = samples[default_sample_collection][default_sample_key]

teacher_rec = None
student_rec = None

default_autoplay = True


# Create widgets


w_select_collection = widgets.Dropdown(
    options=samples.keys(),
    value=default_sample_collection,
    description='Sample collection:',
    disabled=False,
    layout=widgets.Layout(width='100%'),
)

w_select_sentence = widgets.Dropdown(
    options=samples[default_sample_collection].keys(),
    value=default_sample_key,
    description='Sentence:',
    disabled=False,
    layout=widgets.Layout(width='100%'),
)

w_autoplay_tick = widgets.Checkbox(
    value=default_autoplay,
    description='Autoplay',
    disabled=False,
    indent=False
)
w_options_accordion = widgets.Accordion(children=[w_autoplay_tick], selected_index=None)
w_options_accordion.set_title(0, "Options")

w_audio = widgets.Audio(value=b'', format='wav', autoplay=default_autoplay, loop=False)

w_sentence = widgets.HTML(value='')

camera = CameraStream(constraints={'audio': True, 'video': False})
w_recorder = AudioRecorder(stream=camera)

w_compare_btn = widgets.Button(description="Compare")

w_cmp_result = widgets.Label(value='')

fig_teacher = ViewRecordFigure(title="Teacher's recording")
fig_student = ViewRecordFigure(title="Your recording")

fig_cmp = CompareFigure()


# Callbacks


def update_autoplay(change):
    w_audio.autoplay = change['new']

w_autoplay_tick.observe(update_autoplay, 'value')


def get_sample_audio_data(sample):
    return open(sample['filename'], 'rb').read()


def update_sample(sample):
    global teacher_rec

    with w_sentence.hold_sync():
        w_sentence.value = f'<p style="font-size: xx-large">{sample["sentence"].to_html()}</p>'

    teacher_rec = SpeechRecord(sample['filename'], sentence=sample['sentence'], name="Teacher");

    w_audio.value = get_sample_audio_data(sample);
    
    fig_teacher.update_data(teacher_rec);
    fig_student.clear();
    fig_cmp.clear();
    
    w_cmp_result.value = ""

    
update_sample(default_sample);


def load_selected_collection(change):
    collection = change["new"]
    sentences = list(samples[collection].keys())
    with w_select_sentence.hold_sync():
        w_select_sentence.options = sentences
        w_select_sentence.value = sentences[0]
    update_sample(samples[collection][sentences[0]])

def load_selected_sentence(change):
    sentence = change["new"]
    collection = w_select_collection.value
    update_sample(samples[collection][sentence])

    
w_select_collection.observe(load_selected_collection, 'value');

w_select_sentence.observe(load_selected_sentence, 'value');


def get_student_wav_filename():
    try:
        w_recorder.save('test.webm')
    except ValueError as exc:
        if str(exc).startswith('No data'):
            w_cmp_result.value = f"Record something first !"
        raise exc
            
    !ffmpeg -hide_banner -loglevel error -y -i test.webm -ar 16000 -ac 1 test.wav
    return 'test.wav'


def run_compare(_):
    global teacher_rec
    global student_rec

    sample = samples[w_select.value]

    student_wav_filename = get_student_wav_filename()
    # Alternatively, here is a sample:
    #student_wav_filename = "data/mizo_wo_student.wav"

    student_rec = SpeechRecord(student_wav_filename, sample['sentence'], name="Student");
    fig_student.update_data(student_rec);
    
    try:
        student_rec.align_with(teacher_rec);
        mean_distance = student_rec.compare_pitch();
        w_cmp_result.value = f"Success !\nMean distance = {mean_distance:.2f}"
    except Exception as exc:
        w_cmp_result.value = "FAILED !"
        raise exc

    fig_cmp.update_data(teacher_rec, student_rec)


w_compare_btn.on_click(run_compare)


# Layout

box = widgets.Box([
    widgets.Box([
        widgets.VBox([w_select_collection, w_select_sentence]),
        w_options_accordion
    ]),
    w_sentence,
    widgets.Box([
        widgets.VBox([widgets.Label(value="Teacher's recording:"), w_audio], layout=widgets.Layout(width='33%')),
        widgets.VBox([widgets.Label(value="Your recording:"), w_recorder], layout=widgets.Layout(width='33%')),
        widgets.VBox([w_compare_btn, w_cmp_result], layout=widgets.Layout(width='33%')),
    ]),
    fig_cmp,
    fig_teacher,
    fig_student,
], layout=widgets.Layout(display="flex", flex_flow="column", align_items="stretch", align_content="center")
)

display(box)