In [1]:
import json
from pathlib import Path
import time
from datetime import datetime

from rich import print as rprint
import numpy as np
import whisper
from rich import print as rprint
import midii

import preprocess_svs as ps 
from preprocess_svs import mssv 
from preprocess_svs import LyricNormalizer, SVS_Preprocessor

# MSSV File Correction

In [2]:
mssv_path = "D:/dataset/004.다화자 가창 데이터"

In [3]:
sample_mssv_midi = "sample/mssv/midi/ba_05688_-4_a_s02_m_02.mid"
mid = midii.MidiFile(sample_mssv_midi, convert_1_to_0=True)
tempo_rank = mid.tempo_rank()
rprint(tempo_rank)
rprint(ps.calculate_top_tempo_percentage(tempo_rank))

## Analysis Tempo Deviation

In [4]:
# ps.tempo_statistics(mssv_path, parallel=True)

- --> MSSV 데이터셋의 tempo 의 편차가 커서, ticks 단위에서 음표 길이 정규화를 해야 함.

## Verify notes sorted by time

- mssv 의 json 은 MIDI 에서 직접 변환했으므로 time 정렬이 본질적으로 내재되어 있으므로 이 단계를 skip

## fill silence note between notes

- mssv 의 json 은 MIDI 에서 변환하는 과정에서 notes 사이에 공백을 채웠음

## verify correspondence wav vs mid 

In [5]:
rprint(mssv.find_exclusive_two_type_files("*.mid", "*.wav", mssv_path))

## Check abnormal files

In [6]:
rprint(mssv.check_abnormal_file(mssv_path))

## Rename abnormal files

In [7]:
rprint(mssv.rename_abnormal_file(mssv_path))

## Remove abnormal files

In [8]:
rprint(mssv.remove_abnormal_file(mssv_path))

## Verify midi pattern(on-lyrics-off)

In [9]:
# mssv.verify_midi_files_pattern_on_lyrics_off(mssv_path, parallel=True)

## Verify lyrics has no time

In [10]:
# mssv.verify_midi_files_lyrics_has_no_time(mssv_path, parallel=True)

# MSSV Preprocessing

In [11]:
midi_filepath = "sample/mssv/midi/ba_05688_-4_a_s02_m_02.mid"
wav_filepath = "sample/mssv/wav/ba_05688_-4_a_s02_m_02.wav"
json_filepath = "sample/mssv/json/ba_05688_-4_a_s02_m_02.json"
split_json_filepath = "sample/mssv/split_json/ba_05688_-4_a_s02_m_02.json"
preprocessed_mssv_path = "preprocessed_mssv/"
preprocessed_mssv_duration_path = "preprocessed_mssv/duration"
preprocessed_mssv_pitch_path = "preprocessed_mssv/pitch"
preprocessed_mssv_wav_path = "preprocessed_mssv/wav"

## Step 1 - midi to json 

- note duration quantization
- duration conversion [ticks --> seconds -> frames]

In [12]:
# notes = mssv.midi_to_note_list(midi_filepath)
# mssv.preprocess_notes(notes, json_filepath)
# mssv.midi_to_json(midi_filepath, json_filepath)

In [16]:
midi_dirpath = 'sample/mssv/midi'
json_dirpath = 'sample/mssv/json'
ps.get_files(midi_dirpath, 'mid', sort=True)

[PosixPath('sample/mssv/midi/ba_05688_-4_a_s02_m_02.mid'),
 PosixPath('sample/mssv/midi/ba_09303_+0_a_s02_m_02.mid')]

In [None]:
mssv.midis_to_jsons(midi_dirpath, json_dirpath)

[]


## Step 2 - split notes by silence

In [14]:
split_json = ps.split_json_by_silence(json_filepath, min_length=6)
split_json_filepath = Path(split_json_filepath)
split_json_filepath.parent.mkdir(exist_ok=True, parents=True)
with open(split_json_filepath, "w", encoding="utf-8") as f:
    json.dump(split_json, f, indent=4, ensure_ascii=False)

FileNotFoundError: [Errno 2] No such file or directory: 'sample/mssv/json/ba_05688_-4_a_s02_m_02.json'

## Step 3 or Step 4 

- regularization korean (metadata.txt 의 가사의 글자 갯수가 split 된 duration/pitch/wav 의 갯수와 일치해야 하는지? 만약 일치하지 않아도 된다면, step 4 에 해도 되고, json 이 아니라 kor seq/pitch seq/GT 만 받아도 해도 된다)

In [None]:
split_json_filepath

PosixPath('sample/mssv/split_json/ba_05688_-4_a_s02_m_02.json')

## Step 4 - save duration, pitch as npy file, split audio, save metadata

In [None]:
metadata_list = []
metadata_list.append(
    mssv.preprocess_one(
        wav_filepath,
        split_json_filepath,
        preprocessed_mssv_pitch_path,
        preprocessed_mssv_duration_path,
        preprocessed_mssv_wav_path,
    )
)
with open(f"{preprocessed_mssv_path}/metadata.txt", "w", encoding="utf-8") as f:
    f.write("".join(metadata_list))

## Normalizer 사용 설명

### 1. lyric_normalizer.py의 LyricNormalizer 클래스 import
### 2. LyricNormalizer 객체 생성
### 3. LyricNormalizer.normalize_lyrics() 함수 사용
#### &emsp; Input: GT(whisper result), 원본 가사, pitch sequence, duration sequence
#### &emsp; Output: 정규화 가사, pitch sequence, duration sequence, 정규화 정보를 담은 dictionary


In [None]:
preprocessor = SVS_Preprocessor(
    base_path="preprocessed_mssv",
    model_name="large-v3",
    device="cpu",
    language="ko",
)

In [None]:
preprocessor.process_all_files()

KeyboardInterrupt: 

In [None]:
preprocessor.verify_dataset_consistency()


=== Starting Dataset Consistency Verification ===

=== Verification Results ===

No errors found!





# Apply G2pk 

In [None]:
file_path = 'preprocessed_mssv/metadata.txt'
ps.g2p_metadata(file_path)

mecab installed
mecab installed
mecab installed
mecab installed
mecab installed
mecab installed
mecab installed
mecab installed
mecab installed
mecab installedmecab installed

mecab installed
