Skip to content

Commit

Permalink
feat(corprep): add dataset_extract_nouns configuration, add dataset_e…
Browse files Browse the repository at this point in the history
…xtract_tokens configuration
  • Loading branch information
entelecheia committed Jul 25, 2023
1 parent 9ae964f commit 5ddf017
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/corprep/conf/pipe/dataset_extract_nouns.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
defaults:
- dataset_extract_tokens

run_with:
nouns_only: true
18 changes: 18 additions & 0 deletions src/corprep/conf/pipe/dataset_extract_tokens.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
defaults:
- __general_external_funcs__

run: corprep.datasets.tokenize.extract_tokens
run_with:
tokenizer_config_name: simple
num_proc: 1
batched: true
token_col: tokenizedText
nouns_only: false
postags:
stop_postags:
strip_pos: true
postag_delim:
postag_length:
verbose: ${..verbose}
use_pipe_obj: true
return_pipe_obj: false

0 comments on commit 5ddf017

Please sign in to comment.