Skip to content

Commit

Permalink
feat(corprep): add find_similar_docs configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
entelecheia committed Jul 27, 2023
1 parent 3710f93 commit c468597
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/corprep/conf/pipe/find_similar_docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- __general_external_funcs__
- /run: find_similar_docs

use_pipe_obj: true
return_pipe_obj: false
18 changes: 18 additions & 0 deletions src/corprep/conf/run/find_similar_docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
_target_: corprep.datasets.similarity.find_similar_docs
num_workers: 2
min_num_docs: 5
percentile: 80
distance_threshold:
linkage: average
grouping_freq: W
grouping_name: Week
date_col: createdDt
token_col: nouns
id_col: newsId
ordering_col: createdDt_int
duplicate_col: duplicate
fig_col: fig_filename
output_dir: .
show_fig: false
save_fig: false
verbose: false

0 comments on commit c468597

Please sign in to comment.