Skip to content

Commit

Permalink
Merge branch 'master' of github.com:etalab/csv-detective
Browse files Browse the repository at this point in the history
  • Loading branch information
geoffreyaldebert committed Mar 3, 2022
2 parents 254b3c2 + 7c9c918 commit c26fbc6
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
Expand Up @@ -6,10 +6,10 @@
def _is(header):
'''Returns 1 if the (processed) header matches one of the expected words combination, else 0'''

words_combinations_list = ['code commune insee', 'code insee', 'codes insee']
words_combinations_list = ['code commune insee', 'code insee', 'codes insee', 'code commune']
processed_header = _process_text(header)

header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list]))
words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list]))

return max(header_matches_words_combination, words_combination_in_header)
return max(header_matches_words_combination, words_combination_in_header)
Expand Up @@ -6,10 +6,10 @@
def _is(header):
'''Returns 1 if the (processed) header matches one of the expected words combination, else 0'''

words_combinations_list = ['code departement', 'dep'] #'dep': Possible confusion with dep name?
words_combinations_list = ['code departement', 'dep', 'departement'] #'dep': Possible confusion with dep name?
processed_header = _process_text(header)

header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list]))
words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list]))

return max(header_matches_words_combination, words_combination_in_header)
return max(header_matches_words_combination, words_combination_in_header)
4 changes: 2 additions & 2 deletions csv_detective/detect_labels/FR/geo/code_region/__init__.py
Expand Up @@ -6,10 +6,10 @@
def _is(header):
'''Returns 1 if the (processed) header matches one of the expected words combination, else 0'''

words_combinations_list = ['code region', 'reg', 'code insee region'] #'reg' : possible confusion with region name?
words_combinations_list = ['code region', 'reg', 'code insee region', 'region'] #'reg' : possible confusion with region name?
processed_header = _process_text(header)

header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list]))
words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list]))

return max(header_matches_words_combination, words_combination_in_header)
return max(header_matches_words_combination, words_combination_in_header)

0 comments on commit c26fbc6

Please sign in to comment.