In [1]:
from pathlib import Path
import json

from rich import print as rprint

import midii

import preprocess_svs as ps
from preprocess_svs import gv

# GV File Correction

In [2]:
gv_path = "D:/dataset/177.다음색 가이드보컬 데이터"
gv_json_sample = "sample/gv/json"
gv_mid_sample = "sample/gv/midi"
gv_sample_preprocessed = "sample/gv/json_preprocessed"
gv_json_time_adjusted = "D:/dataset/다음색 가이드보컬 데이터 time_adjusted"
gv_json_preprocessed = "D:/dataset/다음색 가이드보컬 데이터 json preprocessed"
midi_filepath = "sample/gv/midi/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.mid"
time_adjusted_json_filepath = "sample/gv/json_time_adjusted/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json"
filled_time_gaps_json_filepath = "sample/gv/json_filled_time_gaps/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json"

In [3]:
print(len(list(ps.get_files(gv_path, "mid"))))

3605


In [4]:
mid = midii.MidiFile(midi_filepath, convert_1_to_0=True)
tempo_rank = mid.tempo_rank()
print(tempo_rank)
print(ps.calculate_top_tempo_percentage(tempo_rank))

[(857142, 790)]
100.0


## Analysis Tempo Deviation

- json 을 처리하려면 quantize 를 위한 tempo 가 필요한데 json 에는 tempo 정보가 없음 
- -> tempo rank 검사 
- -> tempo 가 변하지 않는다는 충분한 보장
- -> dominate tempo 를 채택하여 quantize 해도 된다

In [5]:
# ps.tempo_statistics(gv_path, parallel=True, verbose=True)

- -> 이전 end_time 이 현재 start_time 보다 큰 경우가 있음 
- -> 이전 end_time 에 현재 start_time 을 맞추면, 뒤따라오는 메시지들의 sync 가 다 틀어짐 
- -> 이전 end_time 을 현재 start_time 에 맞춰주는 게 더 나음

## Verify notes sorted by time 

In [6]:
# gv.verify_json_notes_sorted_by_time(gv_path, parallel=True)

In [7]:
def adjust_note_times_sample():
    gv_path = "sample/gv/json"
    for json_path in ps.get_files(gv_path, "json"):
        p_orig = Path(json_path)
        out_path = p_orig.parent.parent / "json_time_adjusted" / p_orig.name
        out_path.parent.mkdir(exist_ok=True, parents=True)
        print(f"adjust time of \n{json_path}")
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        notes = data.get("notes")
        processed_notes = gv.adjust_note_times(notes)
        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(processed_notes, f, ensure_ascii=False, indent=4)
        print(f"saved to \n{out_path}")

In [8]:
adjust_note_times_sample()

adjust time of 
sample\gv\json\SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json
saved to 
sample\gv\json_time_adjusted\SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json
adjust time of 
sample\gv\json\SINGER_66_30TO49_HUSKY_MALE_DANCE_C2835.json
saved to 
sample\gv\json_time_adjusted\SINGER_66_30TO49_HUSKY_MALE_DANCE_C2835.json


## fill silence note between notes

In [9]:
print(time_adjusted_json_filepath)
print(filled_time_gaps_json_filepath)

sample/gv/json_time_adjusted/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json
sample/gv/json_filled_time_gaps/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json


In [10]:
gv.fill_time_gaps_save(time_adjusted_json_filepath, filled_time_gaps_json_filepath)

save:
sample\gv\json_filled_time_gaps\SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json


## verify correspondence json vs wav vs mid 

In [11]:
jsons = ps.get_files(gv_path, "json", sort=True)
mids = ps.get_files(gv_path, "mid", sort=True)
wavs = ps.get_files(gv_path, "wav", sort=True)
rprint(gv.verify_files_coherent(jsons, mids))
rprint(gv.verify_files_coherent(wavs, mids))
rprint(gv.verify_files_coherent(jsons, wavs))

## Remove abnormal files

In [12]:
gv.remove_abnormal_file(gv_path)

([], [])

# GV Preprocessing

In [13]:
json_filepath = "sample/gv/json_preprocessed/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json"
split_json_filepath = "sample/gv/split_json/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json"
preprocessed_gv_path = "preprocessed_gv/"
preprocessed_gv_duration_path = "preprocessed_gv/duration"
preprocessed_gv_pitch_path = "preprocessed_gv/pitch"
preprocessed_gv_wav_path = "preprocessed_gv/wav"

## Step 1 - preprocess gv json

- gv json -> adjust note times + fill time gaps + quantization + frames
- embed coherent json format(sharing with mssv)

In [14]:
print(gv_json_sample, gv_mid_sample, gv_sample_preprocessed)

sample/gv/json sample/gv/midi sample/gv/json_preprocessed


In [15]:
gv.preprocess_json(
    gv_json_sample, gv_mid_sample, gv_sample_preprocessed, parallel=True
)

In [16]:
# gv.preprocess_json(
#     gv,
#     gv,
#     gv_json_preprocessed,
#     parallel=True,
# )

## Step 2 - split notes by silence

In [17]:
split_json = ps.split_json_by_silence(json_filepath, min_length=6)
split_json_filepath = Path(split_json_filepath)
split_json_filepath.parent.mkdir(exist_ok=True, parents=True)
with open(split_json_filepath, "w", encoding="utf-8") as f:
    json.dump(split_json, f, indent=4, ensure_ascii=False)

## Step 3 or Step 4

- regularization korean (metadata.txt 의 가사의 글자 갯수가 split 된 duration/pitch/wav 의 갯수와 일치해야 하는지? 만약 일치하지 않아도 된다면, step 4 에 해도 되고, json 이 아니라 kor seq/pitch seq/GT 만 받아도 해도 된다)

In [18]:
split_json_filepath

WindowsPath('sample/gv/split_json/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.json')

## Step 4 - save duration, pitch as npy file, split audio, save metadata

In [19]:
wav_filepath = "sample/gv/wav/SINGER_16_10TO29_CLEAR_FEMALE_BALLAD_C0632.wav"

In [20]:
metadata_list = []
metadata_list.append(
    gv.preprocess_one(
        wav_filepath,
        split_json_filepath,
        preprocessed_gv_pitch_path,
        preprocessed_gv_duration_path,
        preprocessed_gv_wav_path,
    )
)
preprocessed_gv_path = Path(preprocessed_gv_path)
preprocessed_gv_path.mkdir(exist_ok=True, parents=True)
with open(f"{preprocessed_gv_path}/metadata.txt", "w", encoding="utf-8") as f:
    f.write("".join(metadata_list))