Skip to content

Commit

Permalink
feat(pipeline): add new pipeline for saving datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
entelecheia committed Aug 7, 2023
1 parent 686d782 commit eff2232
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions config/pipeline/datasets-save.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
defaults:
- datasets

use_task_as_initial_object: true
steps:
- uses: pipe_load
with:
dataset_path: datasets/absa_results/corprep-gpt4-sample
verbose: true
- uses: pipe_rmcols
with:
column_names: ["writers", "lastModifiedDt", "createdDt_int", "serviceUrl", "__index_level_0__"]
verbose: true
- uses: pipe_to_pandas
verbose: true
- uses: pipe_head
verbose: true
- uses: pipe_save_df
with:
data_file: datasets/absa_results/corprep-gpt4-sample.parquet

0 comments on commit eff2232

Please sign in to comment.