Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
92 lines (84 sloc) 3.11 KB
/*****************************************************************
** encoding.h
**
** Copyright 1998 Clark Cooper
** All rights reserved.
**
** This program is free software; you can redistribute it and/or
** modify it under the same terms as Perl itself.
*/
#ifndef ENCODING_H
#define ENCODING_H 1
#define ENCMAP_MAGIC 0xfeebface
typedef struct prefixmap {
unsigned char min;
unsigned char len; /* 0 => 256 */
unsigned short bmap_start;
unsigned char ispfx[32];
unsigned char ischar[32];
} PrefixMap;
typedef struct encinf
{
unsigned short prefixes_size;
unsigned short bytemap_size;
int firstmap[256];
PrefixMap *prefixes;
unsigned short *bytemap;
} Encinfo;
typedef struct encmaphdr
{
unsigned int magic;
char name[40];
unsigned short pfsize;
unsigned short bmsize;
int map[256];
} Encmap_Header;
/*================================================================
** Structure of Encoding map binary encoding
**
** Note that all shorts and ints are in network order,
** so when packing or unpacking with perl, use 'n' and 'N' respectively.
** In C, use the htonl family of functions.
**
** The basic structure is:
**
** _______________________
** |Header (including map expat needs for 1st byte)
** |PrefixMap * pfsize
** | This section isn't included for single-byte encodings.
** | For multiple byte encodings, when a byte represents a prefix
** | then it indexes into this vector instead of mapping to a
** | Unicode character. The PrefixMap type is declared above. The
** | ispfx and ischar fields are bitvectors indicating whether
** | the byte being mapped is a prefix or character respectively.
** | If neither is set, then the character is not mapped to Unicode.
** |
** | The min field is the 1st byte mapped for this prefix; the
** | len field is the number of bytes mapped; and bmap_start is
** | the starting index of the map for this prefix in the overall
** | map (next section).
** |unsigned short * bmsize
** | This section also is omitted for single-byte encodings.
** | Each short is either a Unicode scalar or an index into the
** | PrefixMap vector.
**
** The header for these files is declared above as the Encmap_Header type.
** The magic field is a magic number which should match the ENCMAP_MAGIC
** macro above. The next 40 bytes stores IANA registered name for the
** encoding. The pfsize field holds the number of PrefixMaps, which should
** be zero for single byte encodings. The bmsize field holds the number of
** shorts used for the overall map.
**
** The map field contains either the Unicode scalar encoded by the 1st byte
** or -n where n is the number of bytes that such a 1st byte implies (Expat
** requires that the number of bytes to encode a character is indicated by
** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
**
** If the encoding is a multiple byte encoding, then there will be PrefixMap
** and character map sections. The 1st PrefixMap (index 0), covers a range
** of bytes that includes all 1st byte prefixes.
**
** Look at convert_to_unicode in Expat.xs to see how this data structure
** is used.
*/
#endif /* ndef ENCODING_H */
Jump to Line
Something went wrong with that request. Please try again.