/
fts-filter-normalizer-icu.c
145 lines (123 loc) · 3.94 KB
/
fts-filter-normalizer-icu.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "array.h"
#include "str.h"
#include "unichar.h" /* unicode replacement char */
#include "fts-filter-common.h"
#include "fts-filter-private.h"
#include "fts-language.h"
#ifdef HAVE_LIBICU
#include "fts-icu.h"
struct fts_filter_normalizer_icu {
struct fts_filter filter;
pool_t pool;
const char *transliterator_id;
UTransliterator *transliterator;
ARRAY_TYPE(icu_utf16) utf16_token, trans_token;
string_t *utf8_token;
};
static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter)
{
struct fts_filter_normalizer_icu *np =
(struct fts_filter_normalizer_icu *)filter;
if (np->transliterator != NULL)
utrans_close(np->transliterator);
pool_unref(&np->pool);
}
static int
fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
const char *const *settings,
struct fts_filter **filter_r,
const char **error_r)
{
struct fts_filter_normalizer_icu *np;
pool_t pp;
unsigned int i, max_length = 250;
const char *id = "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC; [\\x20] Remove";
for (i = 0; settings[i] != NULL; i += 2) {
const char *key = settings[i], *value = settings[i+1];
if (strcmp(key, "id") == 0) {
id = value;
} else if (strcmp(key, "maxlen") == 0) {
if (str_to_uint(value, &max_length) < 0 ||
max_length == 0) {
*error_r = t_strdup_printf("Invalid icu maxlen setting: %s", value);
return -1;
}
} else {
*error_r = t_strdup_printf("Unknown setting: %s", key);
return -1;
}
}
pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_normalizer_icu",
sizeof(struct fts_filter_normalizer_icu));
np = p_new(pp, struct fts_filter_normalizer_icu, 1);
np->pool = pp;
np->filter = *fts_filter_normalizer_icu;
np->transliterator_id = p_strdup(pp, id);
p_array_init(&np->utf16_token, pp, 64);
p_array_init(&np->trans_token, pp, 64);
np->utf8_token = buffer_create_dynamic(pp, 128);
np->filter.max_length = max_length;
*filter_r = &np->filter;
return 0;
}
static int
fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token,
const char **error_r)
{
struct fts_filter_normalizer_icu *np =
(struct fts_filter_normalizer_icu *)filter;
if (np->transliterator == NULL)
if (fts_icu_transliterator_create(np->transliterator_id,
&np->transliterator,
error_r) < 0)
return -1;
fts_icu_utf8_to_utf16(&np->utf16_token, *token);
array_append_zero(&np->utf16_token);
array_delete(&np->utf16_token, array_count(&np->utf16_token)-1, 1);
array_clear(&np->trans_token);
if (fts_icu_translate(&np->trans_token, array_first(&np->utf16_token),
array_count(&np->utf16_token),
np->transliterator, error_r) < 0)
return -1;
if (array_count(&np->trans_token) == 0)
return 0;
fts_icu_utf16_to_utf8(np->utf8_token, array_first(&np->trans_token),
array_count(&np->trans_token));
fts_filter_truncate_token(np->utf8_token, np->filter.max_length);
*token = str_c(np->utf8_token);
return 1;
}
#else
static int
fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
const char *const *settings ATTR_UNUSED,
struct fts_filter **filter_r ATTR_UNUSED,
const char **error_r)
{
*error_r = "libicu support not built in";
return -1;
}
static int
fts_filter_normalizer_icu_filter(struct fts_filter *filter ATTR_UNUSED,
const char **token ATTR_UNUSED,
const char **error_r ATTR_UNUSED)
{
return -1;
}
static void
fts_filter_normalizer_icu_destroy(struct fts_filter *normalizer ATTR_UNUSED)
{
}
#endif
static const struct fts_filter fts_filter_normalizer_icu_real = {
.class_name = "normalizer-icu",
.v = {
fts_filter_normalizer_icu_create,
fts_filter_normalizer_icu_filter,
fts_filter_normalizer_icu_destroy
}
};
const struct fts_filter *fts_filter_normalizer_icu =
&fts_filter_normalizer_icu_real;