Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 485 lines (430 sloc) 15.233 kb
96ab900d » Laurent Sansonetti
2010-02-16 more work
1 /*
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
5 *
6 * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
7 * Copyright (C) 1993-2007 Yukihiro Matsumoto
8 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
9 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
10 */
11
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
12 #include <string.h>
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
13
39b55f12 » Laurent Sansonetti
2010-02-20 some work on string
14 #include "ruby.h"
15 #include "ruby/encoding.h"
16 #include "encoding.h"
17
96ab900d » Laurent Sansonetti
2010-02-16 more work
18 VALUE rb_cEncoding;
8b9745b6 » Laurent Sansonetti
2009-06-04 define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
19
d0ac5933 » vincentisambart
2010-05-09 an (incomplete) implementation of String#encode
20 rb_encoding_t *default_internal = NULL;
96ab900d » Laurent Sansonetti
2010-02-16 more work
21 static rb_encoding_t *default_external = NULL;
22 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
23
96ab900d » Laurent Sansonetti
2010-02-16 more work
24 static void str_undefined_update_flags(rb_str_t *self) { abort(); }
25 static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
26 static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
27 static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
28 static long str_undefined_bytesize(rb_str_t *self) { abort(); }
29 static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
30 static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
d0ac5933 » vincentisambart
2010-05-09 an (incomplete) implementation of String#encode
31 static void str_undefined_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length) { abort(); }
32 static void str_undefined_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *pos, char **bytes, long *bytes_length) { abort(); }
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
33
34 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
35 mr_enc_s_list(VALUE klass, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
36 {
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
37 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
38 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900d » Laurent Sansonetti
2010-02-16 more work
39 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
40 }
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
41 return ary;
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
42 }
43
44 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
45 mr_enc_s_name_list(VALUE klass, SEL sel)
46 {
47 VALUE ary = rb_ary_new();
48 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900d » Laurent Sansonetti
2010-02-16 more work
49 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
50 // TODO: use US-ASCII strings
96ab900d » Laurent Sansonetti
2010-02-16 more work
51 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
52 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900d » Laurent Sansonetti
2010-02-16 more work
53 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
54 }
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
55 }
56 return ary;
57 }
58
59 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
60 mr_enc_s_aliases(VALUE klass, SEL sel)
61 {
62 VALUE hash = rb_hash_new();
63 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900d » Laurent Sansonetti
2010-02-16 more work
64 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
65 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900d » Laurent Sansonetti
2010-02-16 more work
66 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
67 rb_usascii_str_new2(encoding->public_name));
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
68 }
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
69 }
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
70 return hash;
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
71 }
72
73 static VALUE
4ede6523 » Laurent Sansonetti
2010-03-10 added #find
74 mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
75 {
76 StringValue(name);
77 rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
78 if (enc == NULL) {
79 rb_raise(rb_eArgError, "unknown encoding name - %s",
80 RSTRING_PTR(name));
81 }
82 return (VALUE)enc;
83 }
84
85 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
86 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
87 {
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
88 return (VALUE)default_internal;
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
89 }
90
91 static VALUE
50514138 » Laurent Sansonetti
2010-04-05 added #default_external=, #default_internal=
92 mr_enc_set_default_internal(VALUE klass, SEL sel, VALUE enc)
93 {
94 default_internal = rb_to_encoding(enc);
95 return (VALUE)default_internal;
96 }
97
98 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
99 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
100 {
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
101 return (VALUE)default_external;
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
102 }
103
104 static VALUE
50514138 » Laurent Sansonetti
2010-04-05 added #default_external=, #default_internal=
105 mr_enc_set_default_external(VALUE klass, SEL sel, VALUE enc)
106 {
107 default_external = rb_to_encoding(enc);
108 return (VALUE)default_external;
109 }
110
111 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
112 mr_enc_name(VALUE self, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
113 {
96ab900d » Laurent Sansonetti
2010-02-16 more work
114 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
115 }
116
117 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
118 mr_enc_inspect(VALUE self, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
119 {
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
120 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900d » Laurent Sansonetti
2010-02-16 more work
121 RENC(self)->public_name);
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
122 }
123
124 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
125 mr_enc_names(VALUE self, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
126 {
96ab900d » Laurent Sansonetti
2010-02-16 more work
127 rb_encoding_t *encoding = RENC(self);
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
128
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
129 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900d » Laurent Sansonetti
2010-02-16 more work
130 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
131 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900d » Laurent Sansonetti
2010-02-16 more work
132 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
133 }
134 return ary;
16235323 » Laurent Sansonetti
2009-11-10 added Encoding#default_external= and Encoding#default_internal= which…
135 }
136
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
137 static VALUE
138 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
139 {
96ab900d » Laurent Sansonetti
2010-02-16 more work
140 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
141 }
142
143 static VALUE
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
144 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
145 {
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
146 return Qfalse;
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
147 }
148
ffe45d2b » Patrick Thomson
2010-06-01 Add support for Encoding::Converter and move String#encode and String…
149 // For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
150 rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
151 {
152 rb_str_t *replacement_str = NULL;
153 if (destination == rb_encodings[ENCODING_UTF16BE]) {
154 replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
155 }
156 else if (destination == rb_encodings[ENCODING_UTF32BE]) {
157 replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
158 }
159 else if (destination == rb_encodings[ENCODING_UTF16LE]) {
160 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
161 }
162 else if (destination == rb_encodings[ENCODING_UTF32LE]) {
163 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
164 }
165 else if (destination == rb_encodings[ENCODING_UTF8]) {
166 replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
167 }
168 else {
169 replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
170 replacement_str = str_simple_transcode(replacement_str, destination);
171 }
172 return replacement_str;
173 }
174
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
175 static void
96ab900d » Laurent Sansonetti
2010-02-16 more work
176 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
177 {
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
178 char c = name[0];
179 if ((c >= '0') && (c <= '9')) {
180 // constants can't start with a number
181 return;
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
182 }
183
325c0325 » vincentisambart
2010-05-21 also define the encoding constants in upper case
184 if (strcmp(name, "locale") == 0) {
185 // there is no constant for locale
186 return;
187 }
188
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
189 char *name_copy = strdup(name);
190 if ((c >= 'a') && (c <= 'z')) {
191 // the first character must be upper case
192 name_copy[0] = c - ('a' - 'A');
193 }
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
194
325c0325 » vincentisambart
2010-05-21 also define the encoding constants in upper case
195 bool has_lower_case = false;
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
196 // '.' and '-' must be transformed into '_'
197 for (int i = 0; name_copy[i]; ++i) {
198 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
199 name_copy[i] = '_';
023dd4df » Laurent Sansonetti
2009-06-08 fixed Encoding#name for 10.6
200 }
325c0325 » vincentisambart
2010-05-21 also define the encoding constants in upper case
201 else if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
202 has_lower_case = true;
203 }
023dd4df » Laurent Sansonetti
2009-06-08 fixed Encoding#name for 10.6
204 }
b8818533 » Laurent Sansonetti
2010-02-16 s/MR//
205 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
325c0325 » vincentisambart
2010-05-21 also define the encoding constants in upper case
206 // if the encoding name has lower case characters,
207 // also define it in upper case
208 if (has_lower_case) {
209 for (int i = 0; name_copy[i]; ++i) {
210 if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
211 name_copy[i] = name_copy[i] - 'a' + 'A';
212 }
213 }
214 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
215 }
216
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
217 free(name_copy);
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
218 }
219
96ab900d » Laurent Sansonetti
2010-02-16 more work
220 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
221
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
222 enum {
223 ENCODING_TYPE_SPECIAL = 0,
224 ENCODING_TYPE_UCNV
225 };
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
226
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
227 static void
228 add_encoding(
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
229 unsigned int encoding_index, // index of the encoding in the encodings
230 // array
96ab900d » Laurent Sansonetti
2010-02-16 more work
231 unsigned int rb_encoding_type,
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
232 const char *public_name, // public name for the encoding
233 unsigned char min_char_size,
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
234 bool single_byte_encoding, // in the encoding a character takes only
235 // one byte
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
236 bool ascii_compatible, // is the encoding ASCII compatible or not
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
237 ... // aliases for the encoding (should no include the public name)
238 // - must end with a NULL
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
239 )
240 {
241 assert(encoding_index < ENCODINGS_COUNT);
242
243 // create an array for the aliases
244 unsigned int aliases_count = 0;
245 va_list va_aliases;
246 va_start(va_aliases, ascii_compatible);
247 while (va_arg(va_aliases, const char *) != NULL) {
248 ++aliases_count;
249 }
250 va_end(va_aliases);
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
251 const char **aliases = (const char **)
252 malloc(sizeof(const char *) * aliases_count);
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
253 va_start(va_aliases, ascii_compatible);
254 for (unsigned int i = 0; i < aliases_count; ++i) {
255 aliases[i] = va_arg(va_aliases, const char *);
256 }
257 va_end(va_aliases);
258
259 // create the MacRuby object
96ab900d » Laurent Sansonetti
2010-02-16 more work
260 NEWOBJ(encoding, rb_encoding_t);
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
261 encoding->basic.flags = 0;
b8818533 » Laurent Sansonetti
2010-02-16 s/MR//
262 encoding->basic.klass = rb_cEncoding;
96ab900d » Laurent Sansonetti
2010-02-16 more work
263 rb_encodings[encoding_index] = encoding;
264 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
265
266 // fill the fields
267 encoding->index = encoding_index;
268 encoding->public_name = public_name;
269 encoding->min_char_size = min_char_size;
270 encoding->single_byte_encoding = single_byte_encoding;
271 encoding->ascii_compatible = ascii_compatible;
272 encoding->aliases_count = aliases_count;
273 encoding->aliases = aliases;
274
275 // fill the default implementations with aborts
276 encoding->methods.update_flags = str_undefined_update_flags;
277 encoding->methods.make_data_binary = str_undefined_make_data_binary;
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
278 encoding->methods.try_making_data_uchars =
279 str_undefined_try_making_data_uchars;
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
280 encoding->methods.length = str_undefined_length;
281 encoding->methods.bytesize = str_undefined_bytesize;
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
282 encoding->methods.get_character_boundaries =
283 str_undefined_get_character_boundaries;
284 encoding->methods.offset_in_bytes_to_index =
285 str_undefined_offset_in_bytes_to_index;
d0ac5933 » vincentisambart
2010-05-09 an (incomplete) implementation of String#encode
286 encoding->methods.transcode_to_utf16 =
287 str_undefined_transcode_to_utf16;
288 encoding->methods.transcode_from_utf16 =
289 str_undefined_transcode_from_utf16;
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
290
96ab900d » Laurent Sansonetti
2010-02-16 more work
291 switch (rb_encoding_type) {
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
292 case ENCODING_TYPE_SPECIAL:
022cd7ca » Laurent Sansonetti
2009-06-05 fixed ByteString#encoding to always return US_ASCII (for now)
293 break;
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
294 case ENCODING_TYPE_UCNV:
295 enc_init_ucnv_encoding(encoding);
296 break;
297 default:
298 abort();
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
299 }
300 }
301
ae4da82a » Laurent Sansonetti
2010-02-17 more work
302 // This Init function is called very early. Do not use any runtime method
303 // because things may not be initialized properly yet.
304 void
305 Init_PreEncoding(void)
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
306 {
0f755839 » vincentisambart
2010-05-09 added the encodings used in the specs to be able to remove tags
307 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, "BINARY", NULL);
308 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, "ASCII", "ANSI_X3.4-1968", "646", NULL);
4e2db641 » Thibault Martin-Lagardette
2010-05-12 Improves core/env pass rate: add 'locale' as an alias of UTF-8
309 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, "CP65001", "locale", NULL);
0f755839 » vincentisambart
2010-05-09 added the encodings used in the specs to be able to remove tags
310 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, NULL);
311 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, NULL);
312 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, "UCS-4BE", NULL);
313 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, "UCS-4LE", NULL);
314 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
315 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
316 add_encoding(ENCODING_MACCYRILLIC, ENCODING_TYPE_UCNV, "macCyrillic", 1, true, true, NULL);
317 add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, "CP950", NULL);
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
318 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
0f755839 » vincentisambart
2010-05-09 added the encodings used in the specs to be able to remove tags
319 add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
ffe45d2b » Patrick Thomson
2010-06-01 Add support for Encoding::Converter and move String#encode and String…
320 add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, "SJIS", NULL);
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
321 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
322 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
323 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
324
96ab900d » Laurent Sansonetti
2010-02-16 more work
325 default_external = rb_encodings[ENCODING_UTF8];
326 default_internal = rb_encodings[ENCODING_UTF8];
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
327 }
328
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
329 void
b8818533 » Laurent Sansonetti
2010-02-16 s/MR//
330 Init_Encoding(void)
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
331 {
ae4da82a » Laurent Sansonetti
2010-02-17 more work
332 // rb_cEncoding is defined earlier in Init_PreVM().
333 rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
334 rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
335
b8818533 » Laurent Sansonetti
2010-02-16 s/MR//
336 rb_undef_alloc_func(rb_cEncoding);
337
338 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
339 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
340 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
341 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
342 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
343 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
344 mr_enc_ascii_compatible_p, 0);
4ede6523 » Laurent Sansonetti
2010-03-10 added #find
345 rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
346 rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
347 mr_enc_s_name_list, 0);
4ede6523 » Laurent Sansonetti
2010-03-10 added #find
348 rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
349 mr_enc_s_aliases, 0);
4ede6523 » Laurent Sansonetti
2010-03-10 added #find
350 rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
351 rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
39b55f12 » Laurent Sansonetti
2010-02-20 some work on string
352 mr_enc_s_is_compatible, 2); // in string.c
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
353
354 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
355 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
356
4ede6523 » Laurent Sansonetti
2010-03-10 added #find
357 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
358 mr_enc_s_default_external, 0);
50514138 » Laurent Sansonetti
2010-04-05 added #default_external=, #default_internal=
359 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=",
360 mr_enc_set_default_external, 1);
4ede6523 » Laurent Sansonetti
2010-03-10 added #find
361 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
0382b34b » Laurent Sansonetti
2010-02-16 indented code, better type checking, removed rb_cCFString, started ad…
362 mr_enc_s_default_internal, 0);
50514138 » Laurent Sansonetti
2010-04-05 added #default_external=, #default_internal=
363 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=",
364 mr_enc_set_default_internal, 1);
b8818533 » Laurent Sansonetti
2010-02-16 s/MR//
365 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d54 » Laurent Sansonetti
2010-02-16 import vincent's work
366
ae4da82a » Laurent Sansonetti
2010-02-17 more work
367 // Create constants.
368 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
369 rb_encoding_t *enc = rb_encodings[i];
370 define_encoding_constant(enc->public_name, enc);
371 for (unsigned int j = 0; j < enc->aliases_count; j++) {
372 define_encoding_constant(enc->aliases[j], enc);
373 }
374 }
9c1d2307 » Laurent Sansonetti
2009-03-11 committing experimental branch content
375 }
96ab900d » Laurent Sansonetti
2010-02-16 more work
376
377 // MRI C-API compatibility.
378
379 rb_encoding_t *
380 rb_enc_find(const char *name)
381 {
382 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
383 rb_encoding_t *enc = rb_encodings[i];
384 if (strcasecmp(enc->public_name, name) == 0) {
385 return enc;
386 }
387 for (unsigned int j = 0; j < enc->aliases_count; j++) {
388 const char *alias = enc->aliases[j];
389 if (strcasecmp(alias, name) == 0) {
390 return enc;
391 }
392 }
393 }
394 return NULL;
395 }
396
397 VALUE
398 rb_enc_from_encoding(rb_encoding_t *enc)
399 {
400 return (VALUE)enc;
401 }
402
403 rb_encoding_t *
404 rb_enc_get(VALUE obj)
405 {
406 if (IS_RSTR(obj)) {
407 return RSTR(obj)->encoding;
408 }
409 // TODO support symbols
410 return NULL;
411 }
412
413 rb_encoding_t *
414 rb_to_encoding(VALUE obj)
415 {
416 rb_encoding_t *enc;
417 if (CLASS_OF(obj) == rb_cEncoding) {
418 enc = RENC(obj);
419 }
420 else {
421 StringValue(obj);
422 enc = rb_enc_find(RSTRING_PTR(obj));
423 if (enc == NULL) {
424 rb_raise(rb_eArgError, "unknown encoding name - %s",
425 RSTRING_PTR(obj));
426 }
427 }
428 return enc;
429 }
430
431 const char *
432 rb_enc_name(rb_encoding_t *enc)
433 {
434 return RENC(enc)->public_name;
435 }
436
437 VALUE
438 rb_enc_name2(rb_encoding_t *enc)
439 {
440 return rb_usascii_str_new2(rb_enc_name(enc));
441 }
442
443 long
444 rb_enc_mbminlen(rb_encoding_t *enc)
445 {
446 return enc->min_char_size;
447 }
448
449 long
450 rb_enc_mbmaxlen(rb_encoding_t *enc)
451 {
452 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
453 }
454
4cd5f5e1 » Laurent Sansonetti
2010-05-13 added missing MRI methods
455 rb_encoding *
456 rb_ascii8bit_encoding(void)
457 {
458 return rb_encodings[ENCODING_BINARY];
459 }
460
461 rb_encoding *
462 rb_utf8_encoding(void)
463 {
464 return rb_encodings[ENCODING_UTF8];
465 }
466
467 rb_encoding *
468 rb_usascii_encoding(void)
469 {
470 return rb_encodings[ENCODING_ASCII];
471 }
472
96ab900d » Laurent Sansonetti
2010-02-16 more work
473 rb_encoding_t *
474 rb_locale_encoding(void)
475 {
476 // XXX
477 return rb_encodings[ENCODING_UTF8];
478 }
479
480 void
481 rb_enc_set_default_external(VALUE encoding)
482 {
483 assert(CLASS_OF(encoding) == rb_cEncoding);
484 default_external = RENC(encoding);
485 }
486
Something went wrong with that request. Please try again.