In [1]:
# ライブラリ読み込み
import numpy as np
import matplotlib.pyplot as plt
import librosa
import glob
import json
from pydub import AudioSegment
from IPython.display import Audio
%matplotlib inline

from typing import List
from modules.network_interface import *

sampling_rate = 50000



In [2]:
# 音源読み込み
def load_audio(file_name: str) -> np.ndarray:
    return librosa.load(file_name, sr=sampling_rate, mono=True)[0]


# 読みデータ一括読み込み
def load_audios(lang: str) -> List[np.ndarray]:
    if lang != 'J' and lang != 'E':
        print("ファイルが見つかりません")
        return []
    
    audio_dir = './JKspeech/'
    file_names = glob.glob(f'{audio_dir}/{lang}*.wav')
    return [load_audio(file_name) for file_name in file_names]


# AudioSegmentに変換
def to_AudioSegment(file_name):
    try:
        audio_segment = AudioSegment.from_file(file_name, format="wav")
        return audio_segment
    except FileNotFoundError as e:
        print(e)
    return None

In [3]:
def kana_to_number(kana):
    kana_ = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん"
    return kana_.index(kana) + 1

def number_to_kana(number):
    kana_ = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん"
    return kana_[number - 1]

In [4]:
# セットアップ
ja_sounds = load_audios('J')
en_sounds = load_audios('E')
submitted = []

print("Done")

Done


---
##### ここまで済ませておく
---

In [90]:
# wav設定
N = 3
ID = "敗者復活戦1"
file_path = f"./problems/{ID}/"
chunks = [f"problem{i+1}.wav" for i in range(N)]
# 分割データ連結
seg = to_AudioSegment(file_path + chunks.pop())[100:]
if N > 1:
    for chunk in chunks:
        seg += to_AudioSegment(file_path + chunk)[0:]
    
seg.export(f"./problems/{ID}/tmp.wav", format="wav")
origin = load_audio(f"./problems/{ID}/tmp.wav")

In [99]:
# # 畳み込みで適合率を求める
# def get_precision(origin_init: np.ndarray, target_init: np.ndarray) -> np.ndarray:
#     # サイズ調整
#     start = int(np.abs(len(origin_init) - len(target_init)) / 500)
#     start = 500
#     end = np.min([len(origin_init), len(target_init)]) - 500
#     target = target_init[start:end]
#     origin = origin_init[start:end]
    
#     # フーリエ変換
#     fft_O = np.fft.fft(origin)
#     fft_T = np.fft.fft(target)

#     # 畳み込み処理
#     fft_Oc = np.conj(fft_O)        # 複素共役を取る
#     sigma_C = fft_Oc * fft_T       # 要素ごとに乗算
#     sigma_T = np.fft.ifft(sigma_C) # 逆フーリエ変換
#     sigma_T = np.abs(sigma_T)      # 絶対値を取りスカラー値に変換
#     return np.max(sigma_T)         # スカラー値の最大値 

# 畳み込みで適合率を求める
def get_precision(origin_init: np.ndarray, target_init: np.ndarray) -> np.ndarray:
    # サイズ調整
    start = np.abs(len(origin_init) - len(target_init)) / 180
    start = np.min([int(start), 640])
    end = np.min([len(origin_init), len(target_init)])
    target = target_init[start:end]
    origin = origin_init[start:end]

    # フーリエ変換
    fft_O = np.fft.fft(origin)
    fft_T = np.fft.fft(target)

    # 畳み込み処理
    fft_Oc = np.conj(fft_O)        # 複素共役を取る
    sigma_C = fft_Oc * fft_T       # 要素ごとに乗算
    sigma_T = np.fft.ifft(sigma_C) # 逆フーリエ変換
    sigma_T = np.abs(sigma_T)      # 絶対値を取りスカラー値に変換
    
    return np.max(sigma_T)         # スカラー値の最大値 

In [101]:
# 畳み込み
precision = [get_precision(origin, sound) 
             for sound in ja_sounds + en_sounds]

In [102]:
# ソート
N = 6
answers = []
for i in np.argsort(precision)[::-1][:N]:
    print((i%44)+1, precision[i])
    answers.append((i%44)+1)
    
answers = sorted(answers)
correct_numbers = [kana_to_number(c) for c in "えさとねらりろ"]
matches = [n for n in answers if n in correct_numbers]
shortages = [n for n in correct_numbers if n not in answers]

# print("正答:", correct_numbers)
# print("回答:", answers)
# print("一致:", sorted(matches))
# print("不足:", sorted(shortages))
# print(f"正答率: {(len(matches) / N) * 100:.02f}%")

1 253.08586879086056
20 192.89180526846766
30 151.1527751449203
16 141.64202959926232
24 114.40768105912495
28 110.3082422502296


In [None]:
# 提出
formatted_answer = list(map(lambda n: f"{n:02d}", answers))
res = await answer(problem["id"], formatted_answer)

submitted.append(formatted_answer)
print(formatted_answer)
print(res)
print(submitted)

---

In [None]:
# # wav設定
# chunks = chunks_init.copy()
# chunks.reverse()
    
# # 分割データ連結
# seg = to_AudioSegment(file_path + chunks.pop())
# if N > 1:
#     for chunk in chunks:
#         seg += to_AudioSegment(file_path + chunk)
    
# seg.export("./problems/tmp.wav", format="wav")
# origin = load_audio("./problems/tmp.wav")

# # 畳み込み
# precision = [get_precision(origin, sound) 
#              for sound in ja_sounds + en_sounds]
    
# # ソート
# answers = []
# for i in np.argsort(precision)[::-1][:problem["data"]]:
#     print((i%44)+1, precision[i])
#     answers.append((i%44)+1)

# answers = sorted(list(set(answers)))
# print(answers)