Skip to content

Commit

Permalink
update to add slovene and chinese indexation to elasticsearch and uni…
Browse files Browse the repository at this point in the history
…t test checking.
  • Loading branch information
jphilou committed May 9, 2024
1 parent f99a261 commit 68b8522
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 12 deletions.
57 changes: 57 additions & 0 deletions c2corg_api/search/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,20 @@ class Meta(BaseMeta):
description_eu = String(
analyzer='index_basque', search_analyzer='search_basque')

# sl
title_sl = default_title_field("slovene")
summary_sl = String(
analyzer='index_slovene', search_analyzer='search_slovene')
description_sl = String(
analyzer='index_slovene', search_analyzer='search_slovene')

# zh
title_zh = default_title_field("chinois")
summary_zh = String(
analyzer='index_chinois', search_analyzer='search_chinois')
description_zh = String(
analyzer='index_chinois', search_analyzer='search_chinois')

@staticmethod
def to_search_document(document, index, include_areas=True):
search_document = {
Expand Down Expand Up @@ -537,6 +551,49 @@ def copy_enum_range_fields(
"basque_stemmer",
"icu_folding"
]
},
"index_slovene": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"autocomplete_filter",
"lowercase"
]
},
"search_slovene": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase"
]
},
"slovene_heavy": {
"tokenizer": "icu_tokenizer",
"filter": [
"lowercase",
"icu_folding"
]
},
"index_chinois": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"autocomplete_filter"
]
},
"search_chinois": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase"
]
},
"chinois_heavy": {
"tokenizer": "icu_tokenizer",
"filter": [
"lowercase",
"icu_folding"
]
}
}
}
25 changes: 13 additions & 12 deletions scripts/esUpdateMapSettings.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash

#change localhost to the correct url if elasticsearch is not hosted on your computer, or in docker
#operate the plugin installation from the server side manually if not dockerized.
#installation du plugin es et reboot
#Install elastic plugin analysis-icu
echo 'Installing es plugin to elasticsearch docker'
Expand All @@ -14,7 +15,7 @@ curl -X POST -H "Content-Type: application/json" http://localhost:9200/c2corg/_c
sleep 2
#3. Updating analyser settings
echo -e '\n updating Settings'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/settings.json http://localhost:9200/c2corg/_settings
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/settings.json http://localhost:9200/c2corg/_settings
sleep 2
#4. restarting the index
echo -e '\n restarting index'
Expand All @@ -23,25 +24,25 @@ sleep 2
#5. Docs Mappings update
echo 'updating doc type mappings'
echo 'doc type : a'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/a.json http://localhost:9200/c2corg/_mapping/a
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/a.json http://localhost:9200/c2corg/_mapping/a
echo -e '\n doc type : b'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/b.json http://localhost:9200/c2corg/_mapping/b
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/b.json http://localhost:9200/c2corg/_mapping/b
echo -e '\n doc type : c'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/c.json http://localhost:9200/c2corg/_mapping/c
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/c.json http://localhost:9200/c2corg/_mapping/c
echo -e '\n doc type : i'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/i.json http://localhost:9200/c2corg/_mapping/i
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/i.json http://localhost:9200/c2corg/_mapping/i
echo -e '\n doc type : m'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/m.json http://localhost:9200/c2corg/_mapping/m
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/m.json http://localhost:9200/c2corg/_mapping/m
echo -e '\n doc type : o'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/o.json http://localhost:9200/c2corg/_mapping/o
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/o.json http://localhost:9200/c2corg/_mapping/o
echo -e '\n doc type : r'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/r.json http://localhost:9200/c2corg/_mapping/r
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/r.json http://localhost:9200/c2corg/_mapping/r
echo -e '\n doc type : u'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/u.json http://localhost:9200/c2corg/_mapping/u
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/u.json http://localhost:9200/c2corg/_mapping/u
echo -e '\n doc type : w'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/w.json http://localhost:9200/c2corg/_mapping/w
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/w.json http://localhost:9200/c2corg/_mapping/w
echo -e '\n doc type : x'
curl -X PUT -H "Content-Type: application/json" -d @./esjson/x.json http://localhost:9200/c2corg/_mapping/x
curl -X PUT -H "Content-Type: application/json" -d @./scripts/esjson/x.json http://localhost:9200/c2corg/_mapping/x
sleep 2
#6. RE-index the indice to apply mappings
echo -e '\n Indexing with new parameters - should take sometimes - pls wait server acknowlegment'
Expand Down
43 changes: 43 additions & 0 deletions scripts/esjson/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,49 @@
"basque_stemmer",
"icu_folding"
]
},
"index_slovene": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"autocomplete_filter",
"lowercase"
]
},
"search_slovene": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase"
]
},
"slovene_heavy": {
"tokenizer": "icu_tokenizer",
"filter": [
"lowercase",
"icu_folding"
]
},
"index_chinois": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"autocomplete_filter"
]
},
"search_chinois": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase"
]
},
"chinois_heavy": {
"tokenizer": "icu_tokenizer",
"filter": [
"lowercase",
"icu_folding"
]
}
}
}
Expand Down

0 comments on commit 68b8522

Please sign in to comment.