Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

standardized inference and number of iterations for mSuperb single lang track #4905

Merged
merged 7 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion egs/an4/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
) as wav_scp_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs/csj/align1/local/gather_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import sys

if __name__ == "__main__":

texts = {}

with open(sys.argv[1], "r", encoding="utf-8") as f:
Expand Down
1 change: 0 additions & 1 deletion egs/iwslt19/st1/local/filter_offlimit.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@


def main():

offlimit_spk_list = []
with codecs.open(args.offlimit_list, "r", encoding="utf-8") as f:
for line in f:
Expand Down
1 change: 0 additions & 1 deletion egs/iwslt21/asr1/local/filter_parentheses.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@


def main():

p_kanji = regex.compile(r".*\p{Script=Han}+.*")
p_hiragana = regex.compile(r".*\p{Block=Hiragana}+.*")
p_katakana = regex.compile(r".*\p{Block=Katakana}+.*")
Expand Down
1 change: 0 additions & 1 deletion egs/iwslt21/asr1/local/merge_short_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@


def merge(segments, segments_dict):

while True:
num_merge = 0
new_segments = deque([])
Expand Down
2 changes: 1 addition & 1 deletion egs/lrs/avsr1/local/se_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def Batch(fdir, snr_l=[]):
wav_np = np.zeros(
[len(wav_l), maxlen], np.int16
) # numpy array for waveform matrix.
for (i, wav) in zip(range(len(wav_l)), wav_l):
for i, wav in zip(range(len(wav_l)), wav_l):
wav_np[i, : len(wav)] = wav # add waveform to numpy array.
len_l.append(len(wav)) # append length of waveform to list.
return wav_np, np.array(len_l, np.int32), np.array(snr_test_l, np.int32), fname_l
2 changes: 0 additions & 2 deletions egs/mgb2/asr1/local/add_to_datadir.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,10 @@
text_file = open(outdir + "/text", "a")

for line in sys.stdin:

m = re.match(r"\w+speaker(\d+)\w+\s+(.*)", line)
# print line

if m:

spk = int(m.group(1))

t = m.group(2).split()
Expand Down
1 change: 0 additions & 1 deletion egs/mini_an4/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
) as wav_scp_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs/mini_an4/asr_mix1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
) as wav_scp_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs/puebla_nahuatl/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def ExtractAudioID(audioname, wav_spk_info=None):


def XMLRefine(input_xml, output_xml, readable=False):

"""refine trs file into

:param input_xml: original transcriber xml
Expand Down
1 change: 0 additions & 1 deletion egs/ru_open_stt/asr1/local/ru_open_stt_prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def get_uttid(wav):
with open(odir + "/text", "w", encoding="utf-8") as text, open(
odir + "/wav.scp", "w"
) as wavscp, open(odir + "/utt2spk", "w") as utt2spk:

for utt in subsets[subset]["all"]:
[uttid, words, wav] = utt
text.write("{} {}\n".format(uttid, words))
Expand Down
1 change: 0 additions & 1 deletion egs/wsj_mix/asr1/local/merge_scp2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ def shape(x):
("output", output_lines, output_infos),
("other", lines, infos),
]:

lis = []
for idx, (line_list, info_list) in enumerate(zip(_lines, _infos), 1):
if inout == "input":
Expand Down
1 change: 0 additions & 1 deletion egs/yoloxochitl_mixtec/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def ExtractAudioID(audioname, wav_spk_info=None):


def XMLRefine(input_xml, output_xml, readable=False):

if readable:
append = "\n"
else:
Expand Down
1 change: 0 additions & 1 deletion egs2/TEMPLATE/asr1/pyscripts/feats/mean_pool_scp.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def main():
with open(save_path + ".lengths", "w") as lengths_out, open(
scp_file, "w"
) as sf:

for length, utt_id in tqdm.tqdm(zip(lengths, utt_ids)):
utt_id = utt_id.rstrip()
length = int(length)
Expand Down
20 changes: 12 additions & 8 deletions egs2/TEMPLATE/asr1/pyscripts/utils/plot_sinc_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,9 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args):
f_mins = np.abs(pre_filters[:, 0])
f_maxs = np.abs(pre_filters[:, 0]) + np.abs(pre_filters[:, 1] - pre_filters[:, 0])
F_mins, F_maxs = f_mins * sample_rate, f_maxs * sample_rate
pre_F_mins, pre_F_maxs = np.round(F_mins).astype(np.int64), np.round(F_maxs).astype(
np.int64
pre_F_mins, pre_F_maxs = (
np.round(F_mins).astype(np.int64),
np.round(F_maxs).astype(np.int64),
)

# learned
Expand All @@ -180,11 +181,13 @@ def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args):
f_mins = np.abs(filters[:, 0])
f_maxs = np.abs(filters[:, 0]) + np.abs(filters[:, 1] - filters[:, 0])
F_mins, F_maxs = f_mins * sample_rate, f_maxs * sample_rate
F_mins, F_maxs = np.round(F_mins).astype(np.int64), np.round(F_maxs).astype(
np.int64
F_mins, F_maxs = (
np.round(F_mins).astype(np.int64),
np.round(F_maxs).astype(np.int64),
)
F_mins, F_maxs = np.clip(F_mins, 0, sample_rate / 2.0), np.clip(
F_maxs, 0, sample_rate / 2.0
F_mins, F_maxs = (
np.clip(F_mins, 0, sample_rate / 2.0),
np.clip(F_maxs, 0, sample_rate / 2.0),
)

x_f = np.linspace(0.0, np.max(F_maxs), int(np.max(F_maxs)) + 1)
Expand Down Expand Up @@ -304,8 +307,9 @@ def main(argv):
f_mins = np.abs(filters[:, 0])
f_maxs = np.abs(filters[:, 0]) + np.abs(filters[:, 1] - filters[:, 0])
F_mins, F_maxs = f_mins * sample_rate, f_maxs * sample_rate
F_mins, F_maxs = np.round(F_mins).astype(np.int64), np.round(F_maxs).astype(
np.int64
F_mins, F_maxs = (
np.round(F_mins).astype(np.int64),
np.round(F_maxs).astype(np.int64),
)

# Create output folder if it does not yet exist
Expand Down
2 changes: 1 addition & 1 deletion egs2/TEMPLATE/asr1/pyscripts/utils/score_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

nlg = NLGEval() # loads the models
print("Key", "\t", "METEOR", "\t", "ROUGE-L")
for (key, ref, hyp) in zip(keys, labels, decoded_preds):
for key, ref, hyp in zip(keys, labels, decoded_preds):
metrics_dict = nlg.compute_individual_metrics([ref], hyp)
print(key, "\t", metrics_dict["METEOR"], "\t", metrics_dict["ROUGE_L"])
refs = [[x] for x in labels]
Expand Down
1 change: 0 additions & 1 deletion egs2/TEMPLATE/ssl1/pyscripts/sklearn_km.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def get_mfcc_feature(feats_dir, fs, nj, portion):


def get_hubert_feature(feats_dir, fs, portion, url, dir, layer):

reader = HubertFeatureReader(fs, url, dir, layer)
generator, num = get_path_iterator(f"{feats_dir}/wav.scp", portion)
iterator = generator()
Expand Down
1 change: 0 additions & 1 deletion egs2/clarity21/enh1/local/prep_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@


def prepare_data(clarity_root, samplerate):

output_folder = "./data"
ids = {"train": set(), "dev": set()}

Expand Down
2 changes: 0 additions & 2 deletions egs2/cmu_arctic/tts1/local/get_utt_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
fdev = open(utt_dev_list, "w")
feval = open(utt_eval_list, "w")
for spk in ["slt", "clb", "bdl", "rms", "jmk", "awb", "ksp"]:

for w in glob.glob("downloads/cmu_us_all_arctic/wav/{}_*.wav".format(spk, spk)):

# check if in train list
file_suffix = os.path.basename(w).split("_")[2]

Expand Down
1 change: 0 additions & 1 deletion egs2/dcase22_task1/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
with open(os.path.join("data", x, "text"), "w") as text_f, open(
os.path.join("data", x, "wav.scp"), "w"
) as wav_scp_f, open(os.path.join("data", x, "utt2spk"), "w") as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
11 changes: 7 additions & 4 deletions egs2/dirha_wsj/asr1/local/prepare_dirha_wsj.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,13 @@ def prepare(
if data_dir is not None:
# Create single channel dir
env2info = {}
for (env, real_sim, mic), (
_info,
_spk2utt,
_spk2gender,
for (
(env, real_sim, mic),
(
_info,
_spk2utt,
_spk2gender,
),
) in info.items():
if not mic.startswith("Beam_"):
env2info.setdefault((env, real_sim), []).append(
Expand Down
3 changes: 0 additions & 3 deletions egs2/dns_icassp22/enh1/local/noisyspeech_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@


def add_pyreverb(clean_speech, rir):

reverb_speech = signal.fftconvolve(clean_speech, rir, mode="full")

# make reverb_speech same length as clean_speech
Expand Down Expand Up @@ -86,7 +85,6 @@ def build_audio(is_clean, params, index, audio_samples_length=-1):
# iterate through multiple clips until we have a long enough signal
tries_left = MAXTRIES
while remaining_length > 0 and tries_left > 0:

# read next audio file and resample if necessary

idx = (idx + 1) % np.size(source_files)
Expand Down Expand Up @@ -583,5 +581,4 @@ def main_body():


if __name__ == "__main__":

main_body()
1 change: 0 additions & 1 deletion egs2/dsing/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def __init__(self, name, workspace, db_path):
self.db_path = db_path

def add_utterance(self, utt, recording):

text = utt["text"]
arrangement, performance, country, gender, user = recording[:-4].split("-")

Expand Down
1 change: 0 additions & 1 deletion egs2/fsc/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/slu1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/slu1/local/data_prep_gigaspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_challenge/slu1/local/data_prep_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/fsc_unseen/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
) as transcript_f, open(
os.path.join("data", x, "utt2spk"), "w"
) as utt2spk_f:

text_f.truncate()
wav_scp_f.truncate()
utt2spk_f.truncate()
Expand Down
1 change: 0 additions & 1 deletion egs2/harpervalley/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def process_data(target_dir, source_dir, audio_dir, filename, min_length):
) as segments, open(
os.path.join(target_dir, "text"), "a", encoding="utf-8"
) as text:

metadata_f = load_json(os.path.join(source_dir, "metadata", filename + ".json"))
transcript_f = load_json(
os.path.join(source_dir, "transcript", filename + ".json")
Expand Down
3 changes: 0 additions & 3 deletions egs2/iam/ocr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def prepare_text(lines_file_path, output_dir, split_ids):
skipped_ids = []
with open(lines_file_path) as lines_file:
for line in lines_file.readlines():

# skip comment lines
if line[0] == "#":
continue
Expand All @@ -59,7 +58,6 @@ def prepare_text(lines_file_path, output_dir, split_ids):
line_id = line_split[0]

if line_id in split_ids:

# extract and format transcription into Kaldi style
transcription = " ".join(line_split[8:])
transcription = transcription.replace("|", " ")
Expand Down Expand Up @@ -175,7 +173,6 @@ def prepare_feats(


if __name__ == "__main__":

downloads_dir = "downloads/"
data_dir = "data/"

Expand Down
1 change: 0 additions & 1 deletion egs2/l3das22/enh1/local/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def wer(clean_speech, denoised_speech):
"""

def _transcription(clean_speech, denoised_speech):

# transcribe clean audio
input_values = wer_tokenizer(clean_speech, return_tensors="pt").input_values
logits = wer_model(input_values).logits
Expand Down
9 changes: 6 additions & 3 deletions egs2/librispeech/ssl1/local/hubert_feature_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,12 @@ def get_feats(self, data, ref_len=None):
x = torch.from_numpy(x).float().to(self.device)
x = x.view(1, -1)

feat = self.model.wav2vec2.extract_features(x, num_layers=self.layer,)[0][
-1
][
feat = self.model.wav2vec2.extract_features(
x,
num_layers=self.layer,
)[
0
][-1][
0
] # (time, feat_dim)
return feat.cpu()
1 change: 0 additions & 1 deletion egs2/librispeech/ssl1/local/learn_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def get_km_model(


def load_feature_shard(rspecifier, in_filetype, percent):

feats = []
for utt, feat in file_reader_helper(rspecifier, in_filetype):
feats.append(feat)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def _make_layer(self, block, planes, blocks, stride=1):
return nn.Sequential(*layers)

def forward(self, x):

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
Expand Down
2 changes: 0 additions & 2 deletions egs2/lrs3/asr1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,9 @@ def generate_espnet_data(
wav = []

for speaker_folder in speaker_folders:

spk_id = os.path.basename(speaker_folder)

for wav_file in os.listdir(speaker_folder):

if not wav_file.endswith(".wav"):
continue
text_file = wav_file.replace("wav", "txt")
Expand Down
2 changes: 1 addition & 1 deletion egs2/microsoft_speech/asr1/local/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def prepare_files(dest_folder, audio_folder, transcriptions, fnames, utt_ids):
f.write(line)
f.write("\n")
with open(os.path.join(dest_folder, "wav.scp"), "w", encoding="utf-8") as f:
for (idx, fname) in zip(utt_ids, fnames):
for idx, fname in zip(utt_ids, fnames):
fpath = os.path.join(audio_folder, fname + ".wav")
line = idx + " " + fpath
f.write(line)
Expand Down