Skip to content

Commit

Permalink
feat(pipeline): add extract tokens step with kakao tokenizer config
Browse files Browse the repository at this point in the history
  • Loading branch information
entelecheia committed Jul 25, 2023
1 parent 98e0228 commit 04e66d2
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions config/pipeline/datasets-tokenize.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
defaults:
- datasets
- /pipe@pipe_sample: dataset_sample
- /pipe@pipe_extract: dataset_extract_tokens

use_task_as_initial_object: true
steps:
Expand All @@ -20,6 +21,13 @@ steps:
token_col: tokenizedText
load_from_cache_file: false
verbose: true
- uses: pipe_extract
with:
tokenizer_config_name: kakao
token_col: tokenizedText
nouns_only: true
load_from_cache_file: false
verbose: true
- uses: pipe_save
with:
dataset_path: datasets/processed/kakao_tokenized
Expand Down

0 comments on commit 04e66d2

Please sign in to comment.