Skip to content

Commit

Permalink
re: pull request #364
Browse files Browse the repository at this point in the history
This is a follow-on in that the old utf8 code was still being
used in ncgen to convert utf8->utf16 when converting cdl to Java
(see genj.c).

The new code apparently has no utf16 support, but it does have
utf32 support. Converting utf32 -> utf16 can be approximated by
truncating the 32bits to 16 bits, unless the top 16 bits are
not zero. This latter condition is unlikely to be common because
it implies use of some rather obscure characters.

So solution is to convert to utf32 and truncate to 16 bits to
get utf16. An error is reported if the high-order truncated 16
bits are not zero. If we get complaints, then I will figure out
how to convert full utf32 to a utf16 pair.

Other changes:
1. removed the old code from ncgen.
2. changed UTF8PROC_DLLEXPORT (in utf8proc) to EXTERNL
   and added appropriate includes. This should fix
   issue #404,
   but since we cannot duplicate the failure, I am not quite
   sure.
  • Loading branch information
DennisHeimbigner committed Jun 19, 2017
1 parent e2e7c20 commit 9cde916
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 22 deletions.
8 changes: 5 additions & 3 deletions include/ncutf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#ifndef NCUTF8_H
#define NCUTF8_H 1

#include "ncexternl.h"

/* Provide a wrapper around whatever utf8 library we use. */

/*
Expand All @@ -15,7 +17,7 @@
* NC_ENOMEM -- out of memory
* NC_EBADNAME-- not valid utf8
*/
extern int nc_utf8_validate(const unsigned char * name);
EXTERNL int nc_utf8_validate(const unsigned char * name);

/*
* Apply NFC normalization to a string.
Expand All @@ -28,7 +30,7 @@ extern int nc_utf8_validate(const unsigned char * name);
* NC_ENOMEM -- out of memory
* NC_EBADNAME -- other failure
*/
extern int nc_utf8_normalize(const unsigned char* str, unsigned char** normalp);
EXTERNL int nc_utf8_normalize(const unsigned char* str, unsigned char** normalp);

/*
* Convert a normalized utf8 string to utf16. This is approximate
Expand All @@ -41,6 +43,6 @@ extern int nc_utf8_normalize(const unsigned char* str, unsigned char** normalp);
* NC_EBADNAME-- not valid utf16
*/

extern int nc_utf8_to_utf16(const unsigned char* s8, unsigned short** utf16p, size_t* lenp);
EXTERNL int nc_utf8_to_utf16(const unsigned char* s8, unsigned short** utf16p, size_t* lenp);

#endif /*NCUTF8_H*/
25 changes: 6 additions & 19 deletions libdispatch/utf8proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
/** @} */

#include <stdlib.h>
#include "ncexternl.h"

#if defined(_MSC_VER) && _MSC_VER < 1800
// MSVC prior to 2013 lacked stdbool.h and inttypes.h
Expand Down Expand Up @@ -120,20 +121,6 @@ typedef bool nc_utf8proc_bool;
#endif
#include <limits.h>

#ifndef UTF8PROC_EXPORTS
#ifdef _WIN32
# ifdef UTF8PROC_EXPORTS
# define UTF8PROC_DLLEXPORT __declspec(dllexport)
# else
# define UTF8PROC_DLLEXPORT __declspec(dllimport)
# endif
#elif __GNUC__ >= 4
# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
#else
# define UTF8PROC_DLLEXPORT
#endif
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -418,7 +405,7 @@ const char *nc_utf8proc_errmsg(nc_utf8proc_ssize_t errcode);
* In case of success, the number of bytes read is returned; otherwise, a
* negative error code is returned.
*/
UTF8PROC_DLLEXPORT nc_utf8proc_ssize_t nc_utf8proc_iterate(const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_int32_t *codepoint_ref);
EXTERNL nc_utf8proc_ssize_t nc_utf8proc_iterate(const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_int32_t *codepoint_ref);

/**
* Check if a codepoint is valid (regardless of whether it has been
Expand Down Expand Up @@ -685,13 +672,13 @@ nc_utf8proc_ssize_t nc_utf8proc_map_custom(
*/
/** @{ */
/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFD(const nc_utf8proc_uint8_t *str);
EXTERNL nc_utf8proc_uint8_t *nc_utf8proc_NFD(const nc_utf8proc_uint8_t *str);
/** NFC normalization (@ref UTF8PROC_COMPOSE). */
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFC(const nc_utf8proc_uint8_t *str);
EXTERNL nc_utf8proc_uint8_t *nc_utf8proc_NFC(const nc_utf8proc_uint8_t *str);
/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFKD(const nc_utf8proc_uint8_t *str);
EXTERNL nc_utf8proc_uint8_t *nc_utf8proc_NFKD(const nc_utf8proc_uint8_t *str);
/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFKC(const nc_utf8proc_uint8_t *str);
EXTERNL nc_utf8proc_uint8_t *nc_utf8proc_NFKC(const nc_utf8proc_uint8_t *str);
/** @} */

#ifdef __cplusplus
Expand Down

0 comments on commit 9cde916

Please sign in to comment.