Skip to content

Commit

Permalink
Merge branch 'master' into pt1.13.1
Browse files Browse the repository at this point in the history
  • Loading branch information
kamo-naoyuki committed Feb 3, 2023
2 parents 3575725 + a3a9121 commit 66db9b3
Show file tree
Hide file tree
Showing 108 changed files with 20 additions and 140 deletions.
1 change: 0 additions & 1 deletion egs/an4/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
) as wav_scp_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs/csj/align1/local/gather_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import sys

if __name__ == "__main__":

texts = {}

with open(sys.argv[1], "r", encoding="utf-8") as f:
Expand Down
1 change: 0 additions & 1 deletion egs/iwslt19/st1/local/filter_offlimit.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@


def main():

offlimit_spk_list = []
with codecs.open(args.offlimit_list, "r", encoding="utf-8") as f:
for line in f:
Expand Down
1 change: 0 additions & 1 deletion egs/iwslt21/asr1/local/filter_parentheses.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@


def main():

p_kanji = regex.compile(r".*\p{Script=Han}+.*")
p_hiragana = regex.compile(r".*\p{Block=Hiragana}+.*")
p_katakana = regex.compile(r".*\p{Block=Katakana}+.*")
Expand Down
1 change: 0 additions & 1 deletion egs/iwslt21/asr1/local/merge_short_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@


def merge(segments, segments_dict):

while True:
num_merge = 0
new_segments = deque([])
Expand Down
2 changes: 1 addition & 1 deletion egs/lrs/avsr1/local/se_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def Batch(fdir, snr_l=[]):
wav_np = np.zeros(
[len(wav_l), maxlen], np.int16
) # numpy array for waveform matrix.
for (i, wav) in zip(range(len(wav_l)), wav_l):
for i, wav in zip(range(len(wav_l)), wav_l):
wav_np[i, : len(wav)] = wav # add waveform to numpy array.
len_l.append(len(wav)) # append length of waveform to list.
return wav_np, np.array(len_l, np.int32), np.array(snr_test_l, np.int32), fname_l
2 changes: 0 additions & 2 deletions egs/mgb2/asr1/local/add_to_datadir.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,10 @@
text_file = open(outdir + "/text", "a")

for line in sys.stdin:

m = re.match(r"\w+speaker(\d+)\w+\s+(.*)", line)
# print line

if m:

spk = int(m.group(1))

t = m.group(2).split()
Expand Down
1 change: 0 additions & 1 deletion egs/mini_an4/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
) as wav_scp_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs/mini_an4/asr_mix1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
) as wav_scp_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs/puebla_nahuatl/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def ExtractAudioID(audioname, wav_spk_info=None):


def XMLRefine(input_xml, output_xml, readable=False):

"""refine trs file into
:param input_xml: original transcriber xml
Expand Down
1 change: 0 additions & 1 deletion egs/ru_open_stt/asr1/local/ru_open_stt_prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def get_uttid(wav):
with open(odir + "/text", "w", encoding="utf-8") as text, open(
odir + "/wav.scp", "w"
) as wavscp, open(odir + "/utt2spk", "w") as utt2spk:

for utt in subsets[subset]["all"]:
[uttid, words, wav] = utt
text.write("{} {}\n".format(uttid, words))
Expand Down
1 change: 0 additions & 1 deletion egs/wsj_mix/asr1/local/merge_scp2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ def shape(x):
("output", output_lines, output_infos),
("other", lines, infos),
]:

lis = []
for idx, (line_list, info_list) in enumerate(zip(_lines, _infos), 1):
if inout == "input":
Expand Down
1 change: 0 additions & 1 deletion egs/yoloxochitl_mixtec/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def ExtractAudioID(audioname, wav_spk_info=None):


def XMLRefine(input_xml, output_xml, readable=False):

if readable:
append = "\n"
else:
Expand Down
1 change: 0 additions & 1 deletion egs2/TEMPLATE/asr1/pyscripts/feats/mean_pool_scp.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def main():
with open(save_path + ".lengths", "w") as lengths_out, open(
scp_file, "w"
) as sf:

for length, utt_id in tqdm.tqdm(zip(lengths, utt_ids)):
utt_id = utt_id.rstrip()
length = int(length)
Expand Down
1 change: 0 additions & 1 deletion egs2/TEMPLATE/asr1/pyscripts/utils/extract_xvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def main(argv):
device = "cpu"

if args.toolkit in ("speechbrain", "rawnet"):

# Prepare spk2utt for mean x-vector
spk2utt = dict()
with open(os.path.join(args.in_folder, "spk2utt"), "r") as reader:
Expand Down
2 changes: 1 addition & 1 deletion egs2/TEMPLATE/asr1/pyscripts/utils/score_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

nlg = NLGEval() # loads the models
print("Key", "\t", "METEOR", "\t", "ROUGE-L")
for (key, ref, hyp) in zip(keys, labels, decoded_preds):
for key, ref, hyp in zip(keys, labels, decoded_preds):
metrics_dict = nlg.compute_individual_metrics([ref], hyp)
print(key, "\t", metrics_dict["METEOR"], "\t", metrics_dict["ROUGE_L"])
refs = [[x] for x in labels]
Expand Down
1 change: 0 additions & 1 deletion egs2/TEMPLATE/ssl1/pyscripts/sklearn_km.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def get_mfcc_feature(feats_dir, fs, nj, portion):


def get_hubert_feature(feats_dir, fs, portion, url, dir, layer):

reader = HubertFeatureReader(fs, url, dir, layer)
generator, num = get_path_iterator(f"{feats_dir}/wav.scp", portion)
iterator = generator()
Expand Down
1 change: 0 additions & 1 deletion egs2/clarity21/enh1/local/prep_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@


def prepare_data(clarity_root, samplerate):

output_folder = "./data"
ids = {"train": set(), "dev": set()}

Expand Down
2 changes: 0 additions & 2 deletions egs2/cmu_arctic/tts1/local/get_utt_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
fdev = open(utt_dev_list, "w")
feval = open(utt_eval_list, "w")
for spk in ["slt", "clb", "bdl", "rms", "jmk", "awb", "ksp"]:

for w in glob.glob("downloads/cmu_us_all_arctic/wav/{}_*.wav".format(spk, spk)):

# check if in train list
file_suffix = os.path.basename(w).split("_")[2]

Expand Down
1 change: 0 additions & 1 deletion egs2/dcase22_task1/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
with open(os.path.join("data", x, "text"), "w") as text_f, open(
os.path.join("data", x, "wav.scp"), "w"
) as wav_scp_f, open(os.path.join("data", x, "utt2spk"), "w") as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
3 changes: 0 additions & 3 deletions egs2/dns_icassp22/enh1/local/noisyspeech_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@


def add_pyreverb(clean_speech, rir):

reverb_speech = signal.fftconvolve(clean_speech, rir, mode="full")

# make reverb_speech same length as clean_speech
Expand Down Expand Up @@ -86,7 +85,6 @@ def build_audio(is_clean, params, index, audio_samples_length=-1):
# iterate through multiple clips until we have a long enough signal
tries_left = MAXTRIES
while remaining_length > 0 and tries_left > 0:

# read next audio file and resample if necessary

idx = (idx + 1) % np.size(source_files)
Expand Down Expand Up @@ -583,5 +581,4 @@ def main_body():


if __name__ == "__main__":

main_body()
1 change: 0 additions & 1 deletion egs2/dsing/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def __init__(self, name, workspace, db_path):
self.db_path = db_path

def add_utterance(self, utt, recording):

text = utt["text"]
arrangement, performance, country, gender, user = recording[:-4].split("-")

Expand Down
1 change: 0 additions & 1 deletion egs2/fsc/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/slu1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/slu1/local/data_prep_gigaspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/slu1/local/data_prep_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_unseen/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/harpervalley/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def process_data(target_dir, source_dir, audio_dir, filename, min_length):
) as segments, open(
os.path.join(target_dir, "text"), "a", encoding="utf-8"
) as text:

metadata_f = load_json(os.path.join(source_dir, "metadata", filename + ".json"))
transcript_f = load_json(
os.path.join(source_dir, "transcript", filename + ".json")
Expand Down
3 changes: 0 additions & 3 deletions egs2/iam/ocr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def prepare_text(lines_file_path, output_dir, split_ids):
skipped_ids = []
with open(lines_file_path) as lines_file:
for line in lines_file.readlines():

# skip comment lines
if line[0] == "#":
continue
Expand All @@ -59,7 +58,6 @@ def prepare_text(lines_file_path, output_dir, split_ids):
line_id = line_split[0]

if line_id in split_ids:

# extract and format transcription into Kaldi style
transcription = " ".join(line_split[8:])
transcription = transcription.replace("|", " ")
Expand Down Expand Up @@ -175,7 +173,6 @@ def prepare_feats(


if __name__ == "__main__":

downloads_dir = "downloads/"
data_dir = "data/"

Expand Down
1 change: 0 additions & 1 deletion egs2/l3das22/enh1/local/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def wer(clean_speech, denoised_speech):
"""

def _transcription(clean_speech, denoised_speech):

# transcribe clean audio
input_values = wer_tokenizer(clean_speech, return_tensors="pt").input_values
logits = wer_model(input_values).logits
Expand Down
9 changes: 6 additions & 3 deletions egs2/librispeech/ssl1/local/hubert_feature_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,12 @@ def get_feats(self, data, ref_len=None):
x = torch.from_numpy(x).float().to(self.device)
x = x.view(1, -1)

feat = self.model.wav2vec2.extract_features(x, num_layers=self.layer,)[0][
-1
][
feat = self.model.wav2vec2.extract_features(
x,
num_layers=self.layer,
)[
0
][-1][
0
] # (time, feat_dim)
return feat.cpu()
1 change: 0 additions & 1 deletion egs2/librispeech/ssl1/local/learn_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def get_km_model(


def load_feature_shard(rspecifier, in_filetype, percent):

feats = []
for utt, feat in file_reader_helper(rspecifier, in_filetype):
feats.append(feat)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def _make_layer(self, block, planes, blocks, stride=1):
return nn.Sequential(*layers)

def forward(self, x):

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
Expand Down
2 changes: 0 additions & 2 deletions egs2/lrs3/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,9 @@ def generate_espnet_data(
wav = []

for speaker_folder in speaker_folders:

spk_id = os.path.basename(speaker_folder)

for wav_file in os.listdir(speaker_folder):

if not wav_file.endswith(".wav"):
continue
text_file = wav_file.replace("wav", "txt")
Expand Down
2 changes: 1 addition & 1 deletion egs2/microsoft_speech/asr1/local/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def prepare_files(dest_folder, audio_folder, transcriptions, fnames, utt_ids):
f.write(line)
f.write("\n")
with open(os.path.join(dest_folder, "wav.scp"), "w", encoding="utf-8") as f:
for (idx, fname) in zip(utt_ids, fnames):
for idx, fname in zip(utt_ids, fnames):
fpath = os.path.join(audio_folder, fname + ".wav")
line = idx + " " + fpath
f.write(line)
Expand Down
1 change: 0 additions & 1 deletion egs2/ofuton_p_utagoe_db/svs1/local/prep_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def make_segment(file_id, labels, threshold=30, sil=["pau", "br", "sil"]):
segments = []
segment = SegInfo()
for label in labels:

if label.label_id in sil:
if len(segment.segs) > 0:
segments.extend(segment.split(threshold=threshold))
Expand Down
1 change: 0 additions & 1 deletion egs2/reazonspeech/asr1/local/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def save_kaldi_format(outdir, ds):
) as fp_wav, open(os.path.join(outdir, "utt2spk"), "w") as fp_utt2spk, open(
os.path.join(outdir, "spk2utt"), "w"
) as fp_spk2utt:

for item in ds.sort("name"):
path = item["audio"]["path"]

Expand Down
1 change: 0 additions & 1 deletion egs2/sinhala/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ def read_sinhala_data(audio_csv, sentences_csv, export_csv):
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/slue-voxceleb/asr1/local/data_prep_slue.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
with open(os.path.join("data", x, "text"), "w") as text_f, open(
os.path.join("data", x, "wav.scp"), "w"
) as wav_scp_f, open(os.path.join("data", x, "utt2spk"), "w") as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
) as transcript_f, open(os.path.join("data", x, "wav.scp"), "w") as wav_scp_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/slurp/asr1/local/prepare_slurp_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
) as text, open(os.path.join(odir, "wav.scp"), "w") as wavscp, open(
os.path.join(odir, "utt2spk"), "w"
) as utt2spk:

for line in meta:
prompt = json.loads(line.strip())
transcript = prompt["sentence"]
Expand Down

0 comments on commit 66db9b3

Please sign in to comment.