# **Default Setting**

In [1]:
import tensorflow as tf
tf.__version__

'2.2.0'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


# **Unzip & Format Change**

In [3]:
%%time
import os

kor_pcm_path = "./kor_pcm_dataset"
kor_wav_path = "./kor_wav_dataset"

eng_pcm_path = "./eng_pcm_dataset"
eng_wav_path = "./eng_wav_dataset"

# mkdir
if not os.path.exists(kor_pcm_path): os.mkdir(kor_pcm_path)
if not os.path.exists(kor_wav_path): os.mkdir(kor_wav_path)

if not os.path.exists(eng_pcm_path): os.mkdir(eng_pcm_path)
if not os.path.exists(eng_wav_path): os.mkdir(eng_wav_path)

# unsip
!unzip -q "/content/drive/My Drive/ETRI/9_voice_download_4_Korean_English_by_Korean_part2.zip" -d "./kor_pcm_dataset"
!unzip -q "/content/drive/My Drive/ETRI/8_voice_download_4_Korean_English_by_Korean_part1.zip" -d "./eng_pcm_dataset"

In [4]:
print(os.listdir(kor_pcm_path)[:5], len(os.listdir(kor_pcm_path)))
print(os.listdir(eng_pcm_path)[:5], len(os.listdir(eng_pcm_path)))

['GSF03031LJH0', 'I4F03543KSY0', 'GSF09431SHY0', 'GSF05432KSY0', 'GSM03224CHD0'] 50
['GSF03031LJH0', 'I4F03543KSY0', 'GSF09431SHY0', 'GSF05432KSY0', 'GSM03224CHD0'] 50


In [8]:
%%time
import wave

for pcm_path, wav_path in zip([kor_pcm_path, eng_pcm_path], [kor_wav_path, eng_wav_path]):

    for i, folder_path in enumerate(sorted(os.listdir(pcm_path))):
        
        file_list = os.listdir(os.path.join(pcm_path, folder_path)) # label's path
        file_list_pcm = [files for files in file_list if files.endswith(".pcm")] # *.pcm files

        for j, file_path in enumerate(sorted(file_list_pcm)):
            with open(os.path.join(pcm_path, folder_path, file_path), "rb") as pcm_file:
                pcm_data = pcm_file.read()
            # save as {label}_{features}.wav format
            with wave.open(os.path.join(wav_path, "{}_{}.wav".format(i, j)), 'wb') as wav_file:
                wav_file.setparams((1, 2, 16000, 0, 'NONE', 'NONE')) # 16kHz, mono channel
                wav_file.writeframes(pcm_data)

CPU times: user 940 ms, sys: 3.47 s, total: 4.41 s
Wall time: 20.7 s


# **Generate Dataset**

In [None]:
num_labels = 50
num_features = 100

data_length_1 = 3 ** 8
data_length_2 = 2 * (3 ** 8)
data_length_3 = 3 ** 9

train_rate = 0.20
num_train_data = int(num_features * train_rate) # 20

select_train_data = [True] * num_train_data + [False] * (num_features - num_train_data)
select_test_data = [False] * num_train_data + [True] * (num_features - num_train_data)

In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot

output_notebook()

def draw_figure_1(
    x, y, title, show_figure = True):
    p = figure(width = 1200, height = 320, title = title)
    p.line(x, y, line_width = .5, color = "navy")
    p.yaxis.visible = False
    p.title.text_font_size = "14pt"

    if show_figure:
        show(p)
    return p

In [None]:
from bokeh.io import export_svgs

import numpy as np
from scipy.io import wavfile

data_length_1 = 3 ** 9
gp = np.zeros((3, 1), dtype = object)

for i in range(3):
    for j in range(1):
        file_name = "{}_{}.wav".format(i, j)
        sample_rate, samples = wavfile.read(os.path.join(kor_wav_path, file_name))
        
        start = int(len(samples) - data_length_1) // 2
        end = start + data_length_1
        x = np.arange(len(samples))
        y = samples
        gp[i, j] = draw_figure_1(x, y, "Full Length Waveforms (class {} / Korean  utterances #{})".format(i, j), show_figure = False)

grid = gridplot([[gp[i, 0]] for i in range(3)])
show(grid)

In [21]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.models.annotations import BoxAnnotation
from bokeh.models import Range1d

output_notebook()

def draw_figure_2(samples, title, show_figure = True):
    p = figure(width = 600, height = 250, title = title)

    start = int(len(samples) - (3 ** 9)) // 2
    end = start + (3 ** 9)
    x = np.arange(len(samples[start:end]))
    y = samples[start:end]

    p.line(np.arange(len(samples)), samples, line_width = .5, color = "navy")
    # p.line(x, y, line_width = 1, color = "navy")
    # p.xaxis.ticker = [0, ((3**9)-2*(3**8))//2, 3 ** 8, 2 * (3 ** 8), ((3**9)-2*(3**8))//2 + 2*(3**8), 3 ** 9]
    p.yaxis.visible = False
    p.x_range = Range1d(start - 1000, end + 1000)
    p.y_range = Range1d(1.4 * np.min(y), 1.4 * np.max(y))
    p.title.text_font_size = "14pt"

    for i, (color, data_length) in enumerate(zip(["firebrick", "olive", "indigo"], [3 ** 8, 2 * (3 ** 8), 3 ** 9])):
        top = (1 + 0.1 * (i+1)) * np.max(y)
        bottom = (1 + 0.1 * (i+1)) * np.min(y)
        left = (len(samples) - data_length) // 2
        right = left + data_length
        center = BoxAnnotation(
            top = top, bottom = bottom, left = left, right = right, fill_alpha = 0.08 * (3 - i), fill_color = color)
        p.add_layout(center)

    if show_figure:
        show(p)
    return p

In [None]:
from scipy.io import wavfile

gp = np.zeros((20, 5), dtype = object)

for i in range(5):
    for j in range(20):
        file_name = "{}_{}.wav".format(i, j)
        sample_rate, samples = wavfile.read(os.path.join(kor_wav_path, file_name))

        gp[j, i] = draw_figure_2(samples, "Class {} / Korean  Utterances #{}".format(i, j), show_figure = False)

grid = gridplot([gp[j] for j in range(20)])
show(grid)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
gp = np.zeros((20, 5), dtype = object)

for i in range(5):
    for j in range(20):
        file_name = "{}_{}.wav".format(i, j)
        sample_rate, samples = wavfile.read(os.path.join(eng_wav_path, file_name))

        gp[j, i] = draw_figure_2(samples, "Class {} / English Utterances #{}".format(i, j), show_figure = False)

grid = gridplot([gp[j] for j in range(20)])
show(grid)

In [22]:
from scipy.io import wavfile
import numpy as np

aa = []

In [23]:
file_name = "{}_{}.wav".format(2, 18)
sample_rate, samples = wavfile.read(os.path.join(kor_wav_path, file_name))

aa.append(draw_figure_2(samples, "Assumed to be Good Case (class {} / Korean  utterances #{})".format(2, 0), show_figure = False))

In [24]:
file_name = "{}_{}.wav".format(2, 0)
sample_rate, samples = wavfile.read(os.path.join(eng_wav_path, file_name))

aa.append(draw_figure_2(samples, "Assumed to be Good Case (class {} / Korean  utterances #{})".format(2, 18), show_figure = False))

In [25]:
file_name = "{}_{}.wav".format(2, 10)
sample_rate, samples = wavfile.read(os.path.join(kor_wav_path, file_name))

aa.append(draw_figure_2(samples, "Assumed to be Bad Case (class {} / Korean  utterances #{})".format(2, 9), show_figure = False))

In [26]:
file_name = "{}_{}.wav".format(2, 9)
sample_rate, samples = wavfile.read(os.path.join(eng_wav_path, file_name))

aa.append(draw_figure_2(samples, "Assumed to be Bad Case (class {} / English  utterances #{})".format(2, 10), show_figure = False))

In [27]:
grid = gridplot([[aa[0], aa[1]], [aa[2], aa[3]]])
show(grid)

Output hidden; open in https://colab.research.google.com to view.