Skip to content

Commit

Permalink
Fix text NER evaluation (#7)
Browse files Browse the repository at this point in the history
* run black

* fix gitignore

* fix text ner pipeline

* fix formatting

* fix text ner evaluation

* fix merge conflicts
  • Loading branch information
siddalmia committed Feb 16, 2022
1 parent 78d06f3 commit 065c951
Show file tree
Hide file tree
Showing 11 changed files with 41 additions and 26 deletions.
7 changes: 6 additions & 1 deletion .gitignore
@@ -1,2 +1,7 @@

# general
.DS_Store
dataset/
manifest/
save/
slue_toolkit.egg-info/
__pycache__/
3 changes: 2 additions & 1 deletion baselines/ner/nlp_scripts/eval-deberta.sh
Expand Up @@ -8,4 +8,5 @@ python slue_toolkit/text_ner/ner_deberta.py eval \
--model_type $model_type \
--eval_asr False \
--eval_subset $eval_set \
--eval_label $eval_label
--eval_label $eval_label \
--save_results True
2 changes: 1 addition & 1 deletion baselines/sentiment/README.md
Expand Up @@ -31,6 +31,6 @@ To evaluate the fine-tuned nlp model, run following command or run `baselines/se

First, ASR transcription need to be prepared in manifest dir, and then evalution can be done using the same evaluation script with nlp topline.
```sh
python slue_toolkit/prepare/prepare_voxceleb_asr_pred.py --data manifest/slue-voxceleb --pred-data datasets/slue-voxceleb/preds/vc1/w2v2-large-lv60k-ft-slue-vc1-12h-lr1e-5-s1-mt800000-8gpu-update280000
python slue_toolkit/prepare/prepare_voxceleb_asr_pred.py --data manifest/slue-voxceleb --pred-data dataset/slue-voxceleb/preds/vc1/w2v2-large-lv60k-ft-slue-vc1-12h-lr1e-5-s1-mt800000-8gpu-update280000
python slue_toolkit/eval/eval_nlp_sentiment.py --save-dir save/sentiment/nlp_topline_bert-base-cased --data manifest/slue-voxceleb --subset test.asr-pred
```
4 changes: 2 additions & 2 deletions baselines/sentiment/pipeline_scripts/eval.sh
@@ -1,11 +1,11 @@
#!/bin/bash

python3 slue_toolkit/prepare/prepare_voxceleb_asr_pred.py --data manifest/slue-voxceleb --pred-data datasets/slue-voxceleb/preds/vc1/w2v2-large-lv60k-ft-slue-vc1-12h-lr1e-5-s1-mt800000-8gpu-update280000
python3 slue_toolkit/prepare/prepare_voxceleb_asr_pred.py --data manifest/slue-voxceleb --pred-data dataset/slue-voxceleb/preds/vc1/w2v2-large-lv60k-ft-slue-vc1-12h-lr1e-5-s1-mt800000-8gpu-update280000

python3 slue_toolkit/eval/eval_nlp_sentiment.py \
--data manifest/slue-voxceleb \
--subset test.asr-pred \
--save-dir save/sentiment/nlp_topline_bert-base-cased \
--use-gpu \
--eval \


8 changes: 4 additions & 4 deletions scripts/download_datasets.sh
@@ -1,12 +1,12 @@
#!/bin/bash

#1. Download
wget https://papers-slue.awsdev.asapp.com/slue-voxceleb_blind.tar.gz -P datasets/
wget https://papers-slue.awsdev.asapp.com/slue-voxpopuli_blind.tar.gz -P datasets/
wget https://papers-slue.awsdev.asapp.com/slue-voxceleb_blind.tar.gz -P dataset/
wget https://papers-slue.awsdev.asapp.com/slue-voxpopuli_blind.tar.gz -P dataset/

#2. Extract
tar -xzvf datasets/slue-voxceleb_blind.tar.gz -C datasets/
tar -xzvf datasets/slue-voxpopuli_blind.tar.gz -C datasets/
tar -xzvf dataset/slue-voxceleb_blind.tar.gz -C dataset/
tar -xzvf dataset/slue-voxpopuli_blind.tar.gz -C dataset/

#3. preprocess

Expand Down
3 changes: 3 additions & 0 deletions setup.py
Expand Up @@ -23,6 +23,9 @@
"fire",
"editdistance",
"soundfile",
"transformers",
"datasets",
"seqeval",
],
entry_points={},
include_package_data=True,
Expand Down
2 changes: 1 addition & 1 deletion slue_toolkit/prepare/prepare_voxceleb.py
Expand Up @@ -106,7 +106,7 @@ def create_split(


def create_manifest(
data_dir="datasets/slue-voxceleb",
data_dir="dataset/slue-voxceleb",
manifest_dir="manifest/slue-voxceleb",
is_blind=True,
):
Expand Down
2 changes: 1 addition & 1 deletion slue_toolkit/prepare/prepare_voxceleb_asr_pred.py
Expand Up @@ -18,7 +18,7 @@ def main():
"--pred-data",
type=str,
required=True,
default="datasets/slue-voxceleb/preds/vc1/w2v2-large-lv60k-ft-slue-vc1-12h-lr1e-5-s1-mt800000-8gpu-update280000",
default="dataset/slue-voxceleb/preds/vc1/w2v2-large-lv60k-ft-slue-vc1-12h-lr1e-5-s1-mt800000-8gpu-update280000",
help="Root directory containing voxceleb1_slue data files,"
"This dir should contain audio/ voxceleb1_slue_{finetune,dev,test} folders ",
)
Expand Down
2 changes: 1 addition & 1 deletion slue_toolkit/prepare/prepare_voxpopuli.py
Expand Up @@ -31,7 +31,7 @@ def create_split(


def create_manifest(
data_dir="datasets/slue-voxpopuli",
data_dir="dataset/slue-voxpopuli",
manifest_dir="manifest/slue-voxpopuli",
is_blind=True,
):
Expand Down
11 changes: 7 additions & 4 deletions slue_toolkit/text_ner/ner_deberta.py
Expand Up @@ -38,8 +38,8 @@ def eval(
label_list = read_lst(os.path.join(data_dir, f"{train_label}_tag_lst_ordered"))
if save_results:
ner_results_dir = os.path.join(log_dir, "error_analysis")
os.makedirs(ner_results_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)
os.makedirs(ner_results_dir, exist_ok=True)

data_obj = NDM.DataSetup(data_dir, model_type)
_ = data_obj.prep_data(
Expand All @@ -49,17 +49,20 @@ def eval(
"fine-tune", "combined", get_map_files=True
) # prepare tag-id mapping files

# TODO: please verify this function
if "combined" in eval_label:
tag_lst = read_lst(os.path.join(data_dir, "combined_tag_lst_ordered"))

val_texts, val_tags, _, _, _, _ = data_obj.prep_data(eval_subset, "raw")
val_texts, val_tags, _, _, _ = data_obj.prep_data(eval_subset)
if eval_asr:
asr_val_texts, _, _, _, val_dataset = data_obj.prep_data(
f"{eval_subset}-{asr_model_type}-asr-{lm}", "raw"
)
else:
asr_val_texts = None
eval_obj = NDM.Eval(model_dir, model_type, label_list, eval_label, eval_asr)
asr_val_texts, asr_val_dataset = None, None

label_list = read_lst(os.path.join(data_dir, f"{eval_label}_tag_lst_ordered"))
eval_obj = NDM.Eval(data_dir, model_dir, model_type, label_list, eval_label, eval_asr)
for score_type in ["standard", "label"]:
if eval_asr:
res_fn = "-".join(
Expand Down
23 changes: 13 additions & 10 deletions slue_toolkit/text_ner/ner_deberta_modules.py
Expand Up @@ -30,6 +30,7 @@
from slue_toolkit.eval import eval_utils



class VPDataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
Expand Down Expand Up @@ -76,12 +77,14 @@ def read_data(self, file_path):

def align_labels(self, tag2id, tags, encodings, label_all_tokens=False):
"""
Align labels with appropriate padding labels for sub-tokens
Align labels with appropriate padding labels for sub-tokens
label_all_tokens: Whether to put the label for one word on all tokens of generated by that word or just on the
one (in which case the other tokens will have a padding index).
"""
# TODO : Check this line
labels = [[tag2id[tag] if tag in tag2id else tag2id['O'] for tag in doc] for doc in tags]

label_all_tokens: Whether to put the label for one word on all tokens of generated by that word or just on the
one (in which case the other tokens will have a padding index).
"""
labels = [[tag2id[tag] for tag in doc] for doc in tags]
encoded_labels = []
for idx, doc_labels in enumerate(labels):
word_ids = encodings.word_ids(batch_index=idx)
Expand Down Expand Up @@ -242,7 +245,7 @@ def compute_metrics(p, return_entity_level_metrics=True):
model=model, # the instantiated 🤗 Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=eval_dataset, # evaluation dataset
eval_dataset=val_dataset, # evaluation dataset
compute_metrics=compute_metrics,
)

Expand Down Expand Up @@ -288,7 +291,7 @@ def compute_metrics(p, return_entity_level_metrics=True):
if training_args.do_eval:
logger.info("*** Evaluate ***")
metrics = trainer.evaluate()
metrics["eval_samples"] = len(eval_dataset)
metrics["eval_samples"] = len(val_dataset)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

Expand All @@ -298,11 +301,11 @@ def __init__(
self, data_dir, model_dir, model_type, label_list, eval_label, eval_asr=False
):
"""
Inference with batch size = 1
"""
Inference with batch size = 1
"""
self.data_dir = data_dir
self.model_dir = model_dir
best_model_ckpt_dir = os.path.join(self.model_dir, "best-checkpoint")
best_model_ckpt_dir = os.path.join(self.model_dir)
self.model = DebertaForTokenClassification.from_pretrained(
best_model_ckpt_dir, output_loading_info=False
)
Expand Down

0 comments on commit 065c951

Please sign in to comment.