diff --git a/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py b/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py index c4f7ddb..9638547 100644 --- a/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +++ b/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py @@ -6,10 +6,10 @@ def _is(header): '''Returns 1 if the (processed) header matches one of the expected words combination, else 0''' - words_combinations_list = ['code commune insee', 'code insee', 'codes insee'] + words_combinations_list = ['code commune insee', 'code insee', 'codes insee', 'code commune'] processed_header = _process_text(header) header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list])) words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list])) - return max(header_matches_words_combination, words_combination_in_header) \ No newline at end of file + return max(header_matches_words_combination, words_combination_in_header) diff --git a/csv_detective/detect_labels/FR/geo/code_departement/__init__.py b/csv_detective/detect_labels/FR/geo/code_departement/__init__.py index 055b802..458e39b 100644 --- a/csv_detective/detect_labels/FR/geo/code_departement/__init__.py +++ b/csv_detective/detect_labels/FR/geo/code_departement/__init__.py @@ -6,10 +6,10 @@ def _is(header): '''Returns 1 if the (processed) header matches one of the expected words combination, else 0''' - words_combinations_list = ['code departement', 'dep'] #'dep': Possible confusion with dep name? + words_combinations_list = ['code departement', 'dep', 'departement'] #'dep': Possible confusion with dep name? processed_header = _process_text(header) header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list])) words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list])) - return max(header_matches_words_combination, words_combination_in_header) \ No newline at end of file + return max(header_matches_words_combination, words_combination_in_header) diff --git a/csv_detective/detect_labels/FR/geo/code_region/__init__.py b/csv_detective/detect_labels/FR/geo/code_region/__init__.py index 4c090f2..08bc97d 100644 --- a/csv_detective/detect_labels/FR/geo/code_region/__init__.py +++ b/csv_detective/detect_labels/FR/geo/code_region/__init__.py @@ -6,10 +6,10 @@ def _is(header): '''Returns 1 if the (processed) header matches one of the expected words combination, else 0''' - words_combinations_list = ['code region', 'reg', 'code insee region'] #'reg' : possible confusion with region name? + words_combinations_list = ['code region', 'reg', 'code insee region', 'region'] #'reg' : possible confusion with region name? processed_header = _process_text(header) header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list])) words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list])) - return max(header_matches_words_combination, words_combination_in_header) \ No newline at end of file + return max(header_matches_words_combination, words_combination_in_header)