Skip to content

Commit

Permalink
feat(pipe): add dataset_sample and a second dataset_save to steps
Browse files Browse the repository at this point in the history
  • Loading branch information
entelecheia committed Jul 18, 2023
1 parent 4b64a40 commit 57cc25c
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/corprep/conf/pipe/dataset_load_raw.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defaults:
- __general_external_funcs__

run: corprep.datasets.raw.load_raw_dataset
run: corprep.datasets.io.load_raw_dataset
run_with:
raw_dataset_dir:
path: json
Expand Down
4 changes: 2 additions & 2 deletions src/corprep/conf/pipe/dataset_save.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
defaults:
- __general_instance_methods__
- __general_external_funcs__

run: save_to_disk
run: corprep.datasets.io.save_dataset
run_with:
dataset_path:
2 changes: 1 addition & 1 deletion src/corprep/conf/pipe/dataset_tokenize.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defaults:
- __general_external_funcs__

run: corprep.datasets.preprocessing.tokenize.tokenize_dataset
run: corprep.datasets.tokenize.tokenize_dataset
run_with:
num_proc: 1
batched: true
Expand Down
12 changes: 11 additions & 1 deletion src/corprep/conf/pipeline/datasets-kakao.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ defaults:
- __init__
- /pipe@pipe1: dataset_load_raw
- /pipe@pipe3: dataset_save
- /pipe@pipe4: dataset_sample
- /pipe@pipe5: dataset_save

use_task_as_initial_object: true
steps:
Expand All @@ -15,5 +17,13 @@ steps:
- uses: pipe3
with:
dataset_path: datasets/processed/kakao
# dataset_path: datasets/processed/daum_news_20230707
verbose: true
- uses: pipe4
with:
num_samples: 10
verbose: true
verbose: true
- uses: pipe5
with:
dataset_path: datasets/processed/kakao_sample
verbose: true

0 comments on commit 57cc25c

Please sign in to comment.