Skip to content

Commit

Permalink
support to export user phrases
Browse files Browse the repository at this point in the history
  • Loading branch information
epico committed Jan 22, 2015
1 parent 9926656 commit 71f0161
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 0 deletions.
119 changes: 119 additions & 0 deletions src/pinyin.cpp
Expand Up @@ -90,6 +90,13 @@ struct _import_iterator_t{
guint8 m_phrase_index;
};

struct _export_iterator_t{
pinyin_context_t * m_context;
guint8 m_phrase_index;
/* null token means no next item. */
phrase_token_t m_next_token;
guint8 m_next_pronunciation;
};

static bool check_format(pinyin_context_t * context){
const char * userdir = context->m_user_dir;
Expand Down Expand Up @@ -484,6 +491,118 @@ void pinyin_end_add_phrases(import_iterator_t * iter){
delete iter;
}

export_iterator_t * pinyin_begin_get_phrases(pinyin_context_t * context,
guint index){
export_iterator_t * iter = new export_iterator_t;
iter->m_context = context;
iter->m_phrase_index = index;
iter->m_next_token = null_token;
iter->m_next_pronunciation = 0;

/* probe next token. */
PhraseIndexRange range;
int retval = iter->m_context->m_phrase_index->get_range
(iter->m_phrase_index, range);
if (retval != ERROR_OK)
return iter;

PhraseItem item;
phrase_token_t token = range.m_range_begin;
for (; token < range.m_range_end; ++token) {
retval = iter->m_context->m_phrase_index->get_phrase_item
(token, item);
if (ERROR_OK == retval && item.get_n_pronunciation() >= 1) {
iter->m_next_token = token;
break;
}
}
return iter;
}

bool pinyin_iterator_has_next_phrase(export_iterator_t * iter){
/* no next token. */
if (null_token == iter->m_next_token)
return false;
return true;
}

/* phrase, pinyin should be freed by g_free(). */
bool pinyin_iterator_get_next_phrase(export_iterator_t * iter,
gchar ** phrase,
gchar ** pinyin,
gint * count){
/* count "-1" means default count. */
*phrase = NULL; *pinyin = NULL; *count = -1;

PhraseItem item;
int retval = iter->m_context->m_phrase_index->get_phrase_item
(iter->m_next_token, item);
/* assume valid next token from previous call. */
assert(ERROR_OK == retval);

/* fill phrase and pronunciation pair. */
ucs4_t phrase_ucs4[MAX_PHRASE_LENGTH];
guint8 len = item.get_phrase_length();
assert(item.get_phrase_string(phrase_ucs4));
gchar * phrase_utf8 = g_ucs4_to_utf8
(phrase_ucs4, len, NULL, NULL, NULL);

guint8 nth_pronun = iter->m_next_pronunciation;
guint8 n_pronuns = item.get_n_pronunciation();
/* assume valid pronunciation from previous call. */
assert(nth_pronun < n_pronuns);
ChewingKey keys[MAX_PHRASE_LENGTH];
guint32 freq = 0;
assert(item.get_nth_pronunciation(nth_pronun, keys, freq));

GPtrArray * array = g_ptr_array_new();
for(size_t i = 0; i < len; ++i) {
g_ptr_array_add(array, keys[i].get_pinyin_string());
}
g_ptr_array_add(array, NULL);

gchar ** strings = (gchar **)g_ptr_array_free(array, FALSE);
gchar * pinyins = g_strjoinv("'", strings);
g_strfreev(strings);

/* use default value. */
*phrase = phrase_utf8; *pinyin = pinyins;
if (freq > 0)
*count = freq;

/* probe next pronunciation. */
nth_pronun ++;
if (nth_pronun < n_pronuns) {
iter->m_next_pronunciation = nth_pronun;
return true;
}

iter->m_next_pronunciation = 0;
/* probe next token. */
PhraseIndexRange range;
retval = iter->m_context->m_phrase_index->get_range
(iter->m_phrase_index, range);
if (retval != ERROR_OK) {
iter->m_next_token = null_token;
return true;
}

phrase_token_t token = iter->m_next_token + 1;
for (; token < range.m_range_end; ++token) {
retval = iter->m_context->m_phrase_index->get_phrase_item
(token, item);
if (ERROR_OK == retval && item.get_n_pronunciation() >= 1) {
iter->m_next_token = token;
break;
}
}
return true;
}

void pinyin_end_get_phrases(export_iterator_t * iter){
delete iter;
}

bool pinyin_save(pinyin_context_t * context){
if (!context->m_user_dir)
return false;
Expand Down
13 changes: 13 additions & 0 deletions src/pinyin.h
Expand Up @@ -38,6 +38,7 @@ typedef struct _pinyin_instance_t pinyin_instance_t;
typedef struct _lookup_candidate_t lookup_candidate_t;

typedef struct _import_iterator_t import_iterator_t;
typedef struct _export_iterator_t export_iterator_t;

typedef enum _lookup_candidate_type_t{
BEST_MATCH_CANDIDATE = 1,
Expand Down Expand Up @@ -120,6 +121,18 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
*/
void pinyin_end_add_phrases(import_iterator_t * iter);

export_iterator_t * pinyin_begin_get_phrases(pinyin_context_t * context,
guint index);

bool pinyin_iterator_has_next_phrase(export_iterator_t * iter);

bool pinyin_iterator_get_next_phrase(export_iterator_t * iter,
gchar ** phrase,
gchar ** pinyin,
gint * count);

void pinyin_end_get_phrases(export_iterator_t * iter);

/**
* pinyin_save:
* @context: the pinyin context to be saved into user directory.
Expand Down

0 comments on commit 71f0161

Please sign in to comment.