Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 1879 lines (1697 sloc) 47.155 kb
5678ab5 encoding.c: added.
matz authored
1 /**********************************************************************
2
3 encoding.c -
4
b314d3e @nobu * encoding.c (rb_obj_encoding): rdoc update. a patch from David
nobu authored
5 $Author$
5678ab5 encoding.c: added.
matz authored
6 created at: Thu May 24 17:23:27 JST 2007
7
8 Copyright (C) 2007 Yukihiro Matsumoto
9
10 **********************************************************************/
11
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
e7996eb @akr * internal.h: declare internal functions here.
akr authored
14 #include "internal.h"
5678ab5 encoding.c: added.
matz authored
15 #include "regenc.h"
5daecbc @nobu * encoding.c (rb_enc_alias, rb_enc_find_index): changed
nobu authored
16 #include <ctype.h>
ef7057f @nobu * encoding.c (rb_filesystem_encoding, rb_locale_charmap): uses
nobu authored
17 #ifndef NO_LOCALE_CHARMAP
49ef25b @nobu * encoding.c (rb_filesystem_encoding): use ANSI codepage for file
nobu authored
18 #ifdef __CYGWIN__
ef7057f @nobu * encoding.c (rb_filesystem_encoding, rb_locale_charmap): uses
nobu authored
19 #include <windows.h>
49ef25b @nobu * encoding.c (rb_filesystem_encoding): use ANSI codepage for file
nobu authored
20 #endif
21 #ifdef HAVE_LANGINFO_H
0530cf9 @akr * encoding.c: include locale.h
akr authored
22 #include <langinfo.h>
f2515d9 @akr * configure.in: check langinfo.h and locale.h.
akr authored
23 #endif
ef7057f @nobu * encoding.c (rb_filesystem_encoding, rb_locale_charmap): uses
nobu authored
24 #endif
4d763fb @mame * encoding.c: include util.h. [ruby-dev:35715]
mame authored
25 #include "ruby/util.h"
5678ab5 encoding.c: added.
matz authored
26
0bd71ff @nobu * configure.in (XCFLAGS): use -fvisibility=hidden if possible.
nobu authored
27 #if defined __GNUC__ && __GNUC__ >= 4
28 #pragma GCC visibility push(default)
29 int rb_enc_register(const char *name, rb_encoding *encoding);
30 void rb_enc_set_base(const char *name, const char *orig);
31 void rb_encdb_declare(const char *name);
32 int rb_encdb_replicate(const char *name, const char *orig);
33 int rb_encdb_dummy(const char *name);
34 int rb_encdb_alias(const char *alias, const char *orig);
35 #pragma GCC visibility pop
36 #endif
37
b73891b @nurse * encoding.c (Encoding#base_encoding): removed. [ruby-dev:36270]
nurse authored
38 static ID id_encoding;
44cd8e4 * regparse.c (PINC): use optimized enclen() instead of
matz authored
39 VALUE rb_cEncoding;
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
40 static VALUE rb_encoding_list;
5678ab5 encoding.c: added.
matz authored
41
42 struct rb_encoding_entry {
43 const char *name;
44 rb_encoding *enc;
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
45 rb_encoding *base;
5678ab5 encoding.c: added.
matz authored
46 };
47
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
48 static struct {
49 struct rb_encoding_entry *list;
50 int count;
51 int size;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
52 st_table *names;
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
53 } enc_table;
54
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
55 void rb_enc_init(void);
56
315196b @nurse * dmyencoding.c, encoding.c (enc_init_db, NO_ENCDB_H):
nurse authored
57 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
31c163e @nobu * encoding.c (rb_enc_set_default_internal): defines internal
nobu authored
58 #define UNSPECIFIED_ENCODING INT_MAX
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
59
a2393c3 @nobu * encoding.c (valid_encoding_name_p): rejects too long encoding
nobu authored
60 #define ENCODING_NAMELEN_MAX 63
61 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
62
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
63 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
5678ab5 encoding.c: added.
matz authored
64
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
65 static int load_encoding(const char *name);
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
66
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
67 static size_t
b46cd0e @nobu * include/ruby/ruby.h (rb_data_type_struct): constified dsize.
nobu authored
68 enc_memsize(const void *p)
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
69 {
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
70 return 0;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
71 }
72
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
73 static const rb_data_type_t encoding_data_type = {
25b9eb5 @nobu * include/ruby/ruby.h (rb_data_type_t): restructured. [ruby-dev:41862]
nobu authored
74 "encoding",
75 {0, 0, enc_memsize,},
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
76 };
77
8b83fc2 @nurse * encoding.c (is_data_encoding): fix condition.
nurse authored
78 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
79
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
80 static VALUE
81 enc_new(rb_encoding *encoding)
82 {
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
83 return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
84 }
85
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
86 static VALUE
87 rb_enc_from_encoding_index(int idx)
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
88 {
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
89 VALUE list, enc;
90
91 if (!(list = rb_encoding_list)) {
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
92 rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
93 }
94 enc = rb_ary_entry(list, idx);
95 if (NIL_P(enc)) {
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
96 rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
97 }
98 return enc;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
99 }
100
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
101 VALUE
102 rb_enc_from_encoding(rb_encoding *encoding)
103 {
104 int idx;
105 if (!encoding) return Qnil;
106 idx = ENC_TO_ENCINDEX(encoding);
107 return rb_enc_from_encoding_index(idx);
108 }
109
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
110 static int enc_autoload(rb_encoding *);
111
3cb10b3 @nobu * encoding.c (enc_check_encoding): returns index now.
nobu authored
112 static int
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
113 check_encoding(rb_encoding *enc)
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
114 {
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
115 int index = rb_enc_to_index(enc);
116 if (rb_enc_from_index(index) != enc)
117 return -1;
118 if (enc_autoload_p(enc)) {
119 index = enc_autoload(enc);
120 }
121 return index;
122 }
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
123
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
124 static int
125 enc_check_encoding(VALUE obj)
126 {
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
127 if (SPECIAL_CONST_P(obj) || !rb_typeddata_is_kind_of(obj, &encoding_data_type)) {
3cb10b3 @nobu * encoding.c (enc_check_encoding): returns index now.
nobu authored
128 return -1;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
129 }
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
130 return check_encoding(RDATA(obj)->data);
131 }
132
133 static int
134 must_encoding(VALUE enc)
135 {
136 int index = enc_check_encoding(enc);
137 if (index < 0) {
138 rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)",
139 rb_obj_classname(enc));
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
140 }
3cb10b3 @nobu * encoding.c (enc_check_encoding): returns index now.
nobu authored
141 return index;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
142 }
143
144 int
145 rb_to_encoding_index(VALUE enc)
146 {
3cb10b3 @nobu * encoding.c (enc_check_encoding): returns index now.
nobu authored
147 int idx;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
148
3cb10b3 @nobu * encoding.c (enc_check_encoding): returns index now.
nobu authored
149 idx = enc_check_encoding(enc);
150 if (idx >= 0) {
d0cbfbd @nobu * encoding.c (rb_to_encoding_index, rb_to_encoding): commit miss.
nobu authored
151 return idx;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
152 }
4baea7d @nobu * encoding.c (rb_to_encoding_index): should return error instead of
nobu authored
153 else if (NIL_P(enc = rb_check_string_type(enc))) {
154 return -1;
155 }
c97a842 @nobu * encoding.c (rb_to_encoding_index, rb_to_encoding): check if the name
nobu authored
156 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
157 return -1;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
158 }
c97a842 @nobu * encoding.c (rb_to_encoding_index, rb_to_encoding): check if the name
nobu authored
159 return rb_enc_find_index(StringValueCStr(enc));
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
160 }
161
48bc63f @nurse Fix r32811: add UNSPECIFIED_ENCODING case.
nurse authored
162 /* Returns encoding index or UNSPECIFIED_ENCODING */
faf295f @nurse * encoding.c (str_to_encoding): rename from to_encoding and
nurse authored
163 static int
164 str_to_encindex(VALUE enc)
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
165 {
166 int idx;
167
c97a842 @nobu * encoding.c (rb_to_encoding_index, rb_to_encoding): check if the name
nobu authored
168 StringValue(enc);
169 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
170 rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
171 }
172 idx = rb_enc_find_index(StringValueCStr(enc));
173 if (idx < 0) {
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
174 rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
175 }
faf295f @nurse * encoding.c (str_to_encoding): rename from to_encoding and
nurse authored
176 return idx;
177 }
178
179 static rb_encoding *
180 str_to_encoding(VALUE enc)
181 {
182 return rb_enc_from_index(str_to_encindex(enc));
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
183 }
184
c97a842 @nobu * encoding.c (rb_to_encoding_index, rb_to_encoding): check if the name
nobu authored
185 rb_encoding *
186 rb_to_encoding(VALUE enc)
187 {
188 if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
faf295f @nurse * encoding.c (str_to_encoding): rename from to_encoding and
nurse authored
189 return str_to_encoding(enc);
c97a842 @nobu * encoding.c (rb_to_encoding_index, rb_to_encoding): check if the name
nobu authored
190 }
191
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
192 void
193 rb_gc_mark_encodings(void)
194 {
195 }
196
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
197 static int
198 enc_table_expand(int newsize)
5678ab5 encoding.c: added.
matz authored
199 {
200 struct rb_encoding_entry *ent;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
201 int count = newsize;
5678ab5 encoding.c: added.
matz authored
202
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
203 if (enc_table.size >= newsize) return newsize;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
204 newsize = (newsize + 7) / 8 * 8;
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
205 ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
597974c @nobu * encoding.c (rb_enc_register): returns new index or -1 if failed.
nobu authored
206 if (!ent) return -1;
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
207 memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
208 enc_table.list = ent;
209 enc_table.size = newsize;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
210 return count;
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
211 }
212
213 static int
214 enc_register_at(int index, const char *name, rb_encoding *encoding)
215 {
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
216 struct rb_encoding_entry *ent = &enc_table.list[index];
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
217 VALUE list;
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
218
a2393c3 @nobu * encoding.c (valid_encoding_name_p): rejects too long encoding
nobu authored
219 if (!valid_encoding_name_p(name)) return -1;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
220 if (!ent->name) {
221 ent->name = name = strdup(name);
222 }
223 else if (STRCASECMP(name, ent->name)) {
224 return -1;
225 }
226 if (!ent->enc) {
72ba13a @ko1 * array.c, bignum.c, cont.c, dir.c, dln.c, encoding.c, enumerator.c,
ko1 authored
227 ent->enc = xmalloc(sizeof(rb_encoding));
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
228 }
229 if (encoding) {
230 *ent->enc = *encoding;
231 }
232 else {
233 memset(ent->enc, 0, sizeof(*ent->enc));
234 }
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
235 encoding = ent->enc;
236 encoding->name = name;
4e4d433 @akr * include/ruby/oniguruma.h (OnigEncodingType): new member
akr authored
237 encoding->ruby_encoding_index = index;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
238 st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
239 list = rb_encoding_list;
240 if (list && NIL_P(rb_ary_entry(list, index))) {
4ab384b * test/ruby/test_basicinstructions.rb: updated for new class
matz authored
241 /* initialize encoding data */
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
242 rb_ary_store(list, index, enc_new(encoding));
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
243 }
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
244 return index;
245 }
246
8063edd @nobu * encoding.c (rb_enc_register, rb_enc_replicate, rb_enc_alias): check
nobu authored
247 static int
248 enc_register(const char *name, rb_encoding *encoding)
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
249 {
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
250 int index = enc_table.count;
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
251
252 if ((index = enc_table_expand(index + 1)) < 0) return -1;
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
253 enc_table.count = index;
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
254 return enc_register_at(index - 1, name, encoding);
255 }
256
ef0706a @nobu * encoding.c (rb_enc_register): set encoding constant.
nobu authored
257 static void set_encoding_const(const char *, rb_encoding *);
f2bd108 @nobu * configure.in (enc/Makefile): add external encoding objects list.
nobu authored
258 int rb_enc_registered(const char *name);
8063edd @nobu * encoding.c (rb_enc_register, rb_enc_replicate, rb_enc_alias): check
nobu authored
259
260 int
261 rb_enc_register(const char *name, rb_encoding *encoding)
262 {
263 int index = rb_enc_registered(name);
264
265 if (index >= 0) {
266 rb_encoding *oldenc = rb_enc_from_index(index);
041e829 @akr * include/ruby/encoding.h (rb_isascii): defined.
akr authored
267 if (STRCASECMP(name, rb_enc_name(oldenc))) {
ef0706a @nobu * encoding.c (rb_enc_register): set encoding constant.
nobu authored
268 index = enc_register(name, encoding);
8063edd @nobu * encoding.c (rb_enc_register, rb_enc_replicate, rb_enc_alias): check
nobu authored
269 }
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
270 else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
ef0706a @nobu * encoding.c (rb_enc_register): set encoding constant.
nobu authored
271 enc_register_at(index, name, encoding);
8063edd @nobu * encoding.c (rb_enc_register, rb_enc_replicate, rb_enc_alias): check
nobu authored
272 }
273 else {
274 rb_raise(rb_eArgError, "encoding %s is already registered", name);
275 }
276 }
ef0706a @nobu * encoding.c (rb_enc_register): set encoding constant.
nobu authored
277 else {
278 index = enc_register(name, encoding);
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
279 set_encoding_const(name, rb_enc_from_index(index));
ef0706a @nobu * encoding.c (rb_enc_register): set encoding constant.
nobu authored
280 }
281 return index;
8063edd @nobu * encoding.c (rb_enc_register, rb_enc_replicate, rb_enc_alias): check
nobu authored
282 }
283
1369cfd @nobu * encoding.c (enc_init_db): moved to enc/encdb.c.
nobu authored
284 void
285 rb_encdb_declare(const char *name)
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
286 {
287 int idx = rb_enc_registered(name);
288 if (idx < 0) {
289 idx = enc_register(name, 0);
290 }
291 set_encoding_const(name, rb_enc_from_index(idx));
292 }
293
294 static void
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
295 enc_check_duplication(const char *name)
296 {
297 if (rb_enc_registered(name) >= 0) {
298 rb_raise(rb_eArgError, "encoding %s is already registered", name);
299 }
300 }
301
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
302 static rb_encoding*
c3c5851 @nobu * encoding.c (set_base_encoding, enc_base_encoding): renamed
nobu authored
303 set_base_encoding(int index, rb_encoding *base)
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
304 {
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
305 rb_encoding *enc = enc_table.list[index].enc;
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
306
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
307 enc_table.list[index].base = base;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
308 if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
309 return enc;
310 }
311
b3d7273 @nurse Add functions and macros for second encoding definitions.
nurse authored
312 /* for encdb.h
313 * Set base encoding for encodings which are not replicas
314 * but not in their own files.
315 */
316 void
317 rb_enc_set_base(const char *name, const char *orig)
318 {
319 int idx = rb_enc_registered(name);
320 int origidx = rb_enc_registered(orig);
321 set_base_encoding(idx, rb_enc_from_index(origidx));
322 }
323
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
324 int
325 rb_enc_replicate(const char *name, rb_encoding *encoding)
326 {
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
327 int idx;
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
328
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
329 enc_check_duplication(name);
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
330 idx = enc_register(name, encoding);
331 set_base_encoding(idx, encoding);
332 set_encoding_const(name, rb_enc_from_index(idx));
333 return idx;
334 }
335
07f81c0 @nurse Add rdoc and test of Encoding#replicate.
nurse authored
336 /*
337 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
338 * enc.replicate(name) -> encoding
07f81c0 @nurse Add rdoc and test of Encoding#replicate.
nurse authored
339 *
1edc8d3 @znz fix rdoc
znz authored
340 * Returns a replicated encoding of _enc_ whose name is _name_.
07f81c0 @nurse Add rdoc and test of Encoding#replicate.
nurse authored
341 * The new encoding should have the same byte structure of _enc_.
342 * If _name_ is used by another encoding, raise ArgumentError.
343 *
344 */
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
345 static VALUE
346 enc_replicate(VALUE encoding, VALUE name)
347 {
348 return rb_enc_from_encoding_index(
9f9ace8 @nobu * encoding.c (enc_replicate): new encoding name must be valid
nobu authored
349 rb_enc_replicate(StringValueCStr(name),
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
350 rb_to_encoding(encoding)));
351 }
352
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
353 static int
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
354 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
355 {
356 if (idx < 0) {
357 idx = enc_register(name, origenc);
358 }
359 else {
360 idx = enc_register_at(idx, name, origenc);
361 }
362 if (idx >= 0) {
363 set_base_encoding(idx, origenc);
364 set_encoding_const(name, rb_enc_from_index(idx));
365 }
366 return idx;
367 }
368
1369cfd @nobu * encoding.c (enc_init_db): moved to enc/encdb.c.
nobu authored
369 int
370 rb_encdb_replicate(const char *name, const char *orig)
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
371 {
372 int origidx = rb_enc_registered(orig);
373 int idx = rb_enc_registered(name);
374
375 if (origidx < 0) {
376 origidx = enc_register(orig, 0);
377 }
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
378 return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
379 }
380
381 int
382 rb_define_dummy_encoding(const char *name)
383 {
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
384 int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
385 rb_encoding *enc = enc_table.list[index].enc;
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
386
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
387 ENC_SET_DUMMY(enc);
388 return index;
389 }
390
1369cfd @nobu * encoding.c (enc_init_db): moved to enc/encdb.c.
nobu authored
391 int
392 rb_encdb_dummy(const char *name)
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
393 {
614a842 @nurse * encoding.c (enc_replicate): add Encoding#replicate(name).
nurse authored
394 int index = enc_replicate_with_index(name, rb_ascii8bit_encoding(),
395 rb_enc_registered(name));
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
396 rb_encoding *enc = enc_table.list[index].enc;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
397
398 ENC_SET_DUMMY(enc);
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
399 return index;
5678ab5 encoding.c: added.
matz authored
400 }
401
0fc7dfe @akr add rdoc.
akr authored
402 /*
403 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
404 * enc.dummy? -> true or false
0fc7dfe @akr add rdoc.
akr authored
405 *
ad3577b Mon Feb 25 10:41:41 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
duerst authored
406 * Returns true for dummy encodings.
407 * A dummy encoding is an encoding for which character handling is not properly
0fc7dfe @akr add rdoc.
akr authored
408 * implemented.
ad3577b Mon Feb 25 10:41:41 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
duerst authored
409 * It is used for stateful encodings.
0fc7dfe @akr add rdoc.
akr authored
410 *
411 * Encoding::ISO_2022_JP.dummy? #=> true
412 * Encoding::UTF_8.dummy? #=> false
413 *
414 */
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
415 static VALUE
416 enc_dummy_p(VALUE enc)
417 {
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
418 return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
419 }
420
e56bf07 @nurse New API Encoding#ascii_compatible?.
nurse authored
421 /*
422 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
423 * enc.ascii_compatible? -> true or false
e56bf07 @nurse New API Encoding#ascii_compatible?.
nurse authored
424 *
425 * Returns whether ASCII-compatible or not.
426 *
427 * Encoding::UTF_8.ascii_compatible? #=> true
428 * Encoding::UTF_16BE.ascii_compatible? #=> false
429 *
430 */
431 static VALUE
432 enc_ascii_compatible_p(VALUE enc)
433 {
434 return rb_enc_asciicompat(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
435 }
436
f755413 @nurse * encoding.c (rb_enc_unicode_p): defined.
nurse authored
437 /*
438 * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
439 */
440 int
441 rb_enc_unicode_p(rb_encoding *enc)
442 {
d13e191 @nurse * encoding.c (rb_enc_unicode_p): check the encoding is Unicode
nurse authored
443 const char *name = rb_enc_name(enc);
444 return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7';
f755413 @nurse * encoding.c (rb_enc_unicode_p): defined.
nurse authored
445 }
446
260e8ce @nurse * encoding.c (enc_alias_internal): free the copied key and
nurse authored
447 /*
448 * Returns copied alias name when the key is added for st_table,
449 * else returns NULL.
450 */
e59b9e9 @nurse * encoding.c (enc_alias_internal): use st_insert2 and change return
nurse authored
451 static int
84b5d04 @nobu * encoding.c (rb_locale_encoding): removed Encoding::LOCALE.
nobu authored
452 enc_alias_internal(const char *alias, int idx)
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
453 {
e59b9e9 @nurse * encoding.c (enc_alias_internal): use st_insert2 and change return
nurse authored
454 return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
455 (st_data_t(*)(st_data_t))strdup);
84b5d04 @nobu * encoding.c (rb_locale_encoding): removed Encoding::LOCALE.
nobu authored
456 }
457
458 static int
459 enc_alias(const char *alias, int idx)
460 {
a2393c3 @nobu * encoding.c (valid_encoding_name_p): rejects too long encoding
nobu authored
461 if (!valid_encoding_name_p(alias)) return -1;
e59b9e9 @nurse * encoding.c (enc_alias_internal): use st_insert2 and change return
nurse authored
462 if (!enc_alias_internal(alias, idx))
463 set_encoding_const(alias, rb_enc_from_index(idx));
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
464 return idx;
465 }
466
467 int
468 rb_enc_alias(const char *alias, const char *orig)
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
469 {
597974c @nobu * encoding.c (rb_enc_register): returns new index or -1 if failed.
nobu authored
470 int idx;
471
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
472 enc_check_duplication(alias);
473 if (!enc_table.list) {
474 rb_enc_init();
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
475 }
5daecbc @nobu * encoding.c (rb_enc_alias, rb_enc_find_index): changed
nobu authored
476 if ((idx = rb_enc_find_index(orig)) < 0) {
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
477 return -1;
597974c @nobu * encoding.c (rb_enc_register): returns new index or -1 if failed.
nobu authored
478 }
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
479 return enc_alias(alias, idx);
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
480 }
481
1369cfd @nobu * encoding.c (enc_init_db): moved to enc/encdb.c.
nobu authored
482 int
483 rb_encdb_alias(const char *alias, const char *orig)
8063edd @nobu * encoding.c (rb_enc_register, rb_enc_replicate, rb_enc_alias): check
nobu authored
484 {
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
485 int idx = rb_enc_registered(orig);
486
487 if (idx < 0) {
488 idx = enc_register(orig, 0);
489 }
490 return enc_alias(alias, idx);
8063edd @nobu * encoding.c (rb_enc_register, rb_enc_replicate, rb_enc_alias): check
nobu authored
491 }
492
12df6cf @nobu * encoding.c (rb_enc_init): use enc_register_at() directly.
nobu authored
493 enum {
494 ENCINDEX_ASCII,
edc61cf @nurse * encoding.c (ENC_REGISTER): use &OnigEncoding*.
nurse authored
495 ENCINDEX_UTF_8,
d9ff499 * re.c (rb_char_to_option_kcode): use rb_enc_find_index() instead
matz authored
496 ENCINDEX_US_ASCII,
12df6cf @nobu * encoding.c (rb_enc_init): use enc_register_at() directly.
nobu authored
497 ENCINDEX_BUILTIN_MAX
498 };
499
edc61cf @nurse * encoding.c (ENC_REGISTER): use &OnigEncoding*.
nurse authored
500 extern rb_encoding OnigEncodingUTF_8;
d9ff499 * re.c (rb_char_to_option_kcode): use rb_enc_find_index() instead
matz authored
501 extern rb_encoding OnigEncodingUS_ASCII;
502
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
503 void
5678ab5 encoding.c: added.
matz authored
504 rb_enc_init(void)
505 {
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
506 enc_table_expand(ENCODING_COUNT + 1);
507 if (!enc_table.names) {
508 enc_table.names = st_init_strcasetable();
509 }
edc61cf @nurse * encoding.c (ENC_REGISTER): use &OnigEncoding*.
nurse authored
510 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
12df6cf @nobu * encoding.c (rb_enc_init): use enc_register_at() directly.
nobu authored
511 ENC_REGISTER(ASCII);
edc61cf @nurse * encoding.c (ENC_REGISTER): use &OnigEncoding*.
nurse authored
512 ENC_REGISTER(UTF_8);
513 ENC_REGISTER(US_ASCII);
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
514 #undef ENC_REGISTER
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
515 enc_table.count = ENCINDEX_BUILTIN_MAX;
5678ab5 encoding.c: added.
matz authored
516 }
517
518 rb_encoding *
519 rb_enc_from_index(int index)
520 {
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
521 if (!enc_table.list) {
5678ab5 encoding.c: added.
matz authored
522 rb_enc_init();
523 }
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
524 if (index < 0 || enc_table.count <= index) {
5678ab5 encoding.c: added.
matz authored
525 return 0;
526 }
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
527 return enc_table.list[index].enc;
5678ab5 encoding.c: added.
matz authored
528 }
529
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
530 int
f2bd108 @nobu * configure.in (enc/Makefile): add external encoding objects list.
nobu authored
531 rb_enc_registered(const char *name)
5678ab5 encoding.c: added.
matz authored
532 {
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
533 st_data_t idx = 0;
5678ab5 encoding.c: added.
matz authored
534
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
535 if (!name) return -1;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
536 if (!enc_table.list) return -1;
537 if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
538 return (int)idx;
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
539 }
540 return -1;
541 }
542
f2bd108 @nobu * configure.in (enc/Makefile): add external encoding objects list.
nobu authored
543 static VALUE
544 require_enc(VALUE enclib)
545 {
fecda0d @nobu * encoding.c (require_enc): reject only loading from untrusted
nobu authored
546 int safe = rb_safe_level();
547 return rb_require_safe(enclib, safe > 3 ? 3 : safe);
f2bd108 @nobu * configure.in (enc/Makefile): add external encoding objects list.
nobu authored
548 }
549
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
550 static int
551 load_encoding(const char *name)
552 {
98aac1e @akr * ruby.c (process_options): don't specify .so for encdb here.
akr authored
553 VALUE enclib = rb_sprintf("enc/%s.so", name);
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
554 VALUE verbose = ruby_verbose;
555 VALUE debug = ruby_debug;
845ae33 @nobu * encoding.c (load_encoding): check if successfully loaded.
nobu authored
556 VALUE loaded;
98aac1e @akr * ruby.c (process_options): don't specify .so for encdb here.
akr authored
557 char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
845ae33 @nobu * encoding.c (load_encoding): check if successfully loaded.
nobu authored
558 int idx;
559
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
560 while (s < e) {
561 if (!ISALNUM(*s)) *s = '_';
562 else if (ISUPPER(*s)) *s = TOLOWER(*s);
563 ++s;
564 }
3a6c3a6 @nobu * encoding.c (load_encoding): predefined encoding names are safe.
nobu authored
565 FL_UNSET(enclib, FL_TAINT|FL_UNTRUSTED);
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
566 OBJ_FREEZE(enclib);
567 ruby_verbose = Qfalse;
568 ruby_debug = Qfalse;
845ae33 @nobu * encoding.c (load_encoding): check if successfully loaded.
nobu authored
569 loaded = rb_protect(require_enc, enclib, 0);
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
570 ruby_verbose = verbose;
571 ruby_debug = debug;
572 rb_set_errinfo(Qnil);
845ae33 @nobu * encoding.c (load_encoding): check if successfully loaded.
nobu authored
573 if (NIL_P(loaded)) return -1;
574 if ((idx = rb_enc_registered(name)) < 0) return -1;
575 if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
576 return idx;
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
577 }
578
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
579 static int
580 enc_autoload(rb_encoding *enc)
581 {
582 int i;
583 rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
584
585 if (base) {
586 i = 0;
587 do {
588 if (i >= enc_table.count) return -1;
589 } while (enc_table.list[i].enc != base && (++i, 1));
590 if (enc_autoload_p(base)) {
591 if (enc_autoload(base) < 0) return -1;
592 }
593 i = ENC_TO_ENCINDEX(enc);
594 enc_register_at(i, rb_enc_name(enc), base);
595 }
596 else {
597 i = load_encoding(rb_enc_name(enc));
598 }
599 return i;
600 }
601
48bc63f @nurse Fix r32811: add UNSPECIFIED_ENCODING case.
nurse authored
602 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
f2bd108 @nobu * configure.in (enc/Makefile): add external encoding objects list.
nobu authored
603 int
604 rb_enc_find_index(const char *name)
605 {
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
606 int i = rb_enc_registered(name);
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
607 rb_encoding *enc;
608
f2bd108 @nobu * configure.in (enc/Makefile): add external encoding objects list.
nobu authored
609 if (i < 0) {
0052259 @nobu * common.mk (encdb.h): give output file name to make_encdb.rb.
nobu authored
610 i = load_encoding(name);
80a5699 @nurse * encoding.c (rb_enc_init): revert removing SJIS.
nurse authored
611 }
31c163e @nobu * encoding.c (rb_enc_set_default_internal): defines internal
nobu authored
612 else if (!(enc = rb_enc_from_index(i))) {
613 if (i != UNSPECIFIED_ENCODING) {
614 rb_raise(rb_eArgError, "encoding %s is not registered", name);
615 }
616 }
617 else if (enc_autoload_p(enc)) {
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
618 if (enc_autoload(enc) < 0) {
619 rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
620 name);
621 return 0;
5b9739a @nurse * enc/make_encdb.rb: added. search enc/*.c and make encoding database.
nurse authored
622 }
f2bd108 @nobu * configure.in (enc/Makefile): add external encoding objects list.
nobu authored
623 }
624 return i;
625 }
626
c351afc @nobu * encoding.c (rb_enc_alias): allow encodings multiple aliases.
nobu authored
627 rb_encoding *
628 rb_enc_find(const char *name)
629 {
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
630 int idx = rb_enc_find_index(name);
631 if (idx < 0) idx = 0;
632 return rb_enc_from_index(idx);
5678ab5 encoding.c: added.
matz authored
633 }
634
19c4d26 * string.c (tr_setup_table): use C array for characters that fit
matz authored
635 static inline int
26adfc1 @nobu * encoding.c (rb_enc_associate_index, rb_enc_get_index): check if
nobu authored
636 enc_capable(VALUE obj)
637 {
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
638 if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
26adfc1 @nobu * encoding.c (rb_enc_associate_index, rb_enc_get_index): check if
nobu authored
639 switch (BUILTIN_TYPE(obj)) {
640 case T_STRING:
641 case T_REGEXP:
642 case T_FILE:
e07cb85 @nobu * suppressed shorten-64-to-32 warnings.
nobu authored
643 return TRUE;
2d1d6c4 @nobu * encoding.c (rb_enc_from_encoding, rb_enc_register): associate index
nobu authored
644 case T_DATA:
8b83fc2 @nurse * encoding.c (is_data_encoding): fix condition.
nurse authored
645 if (is_data_encoding(obj)) return TRUE;
26adfc1 @nobu * encoding.c (rb_enc_associate_index, rb_enc_get_index): check if
nobu authored
646 default:
e07cb85 @nobu * suppressed shorten-64-to-32 warnings.
nobu authored
647 return FALSE;
26adfc1 @nobu * encoding.c (rb_enc_associate_index, rb_enc_get_index): check if
nobu authored
648 }
649 }
650
5d5d0b5 @nobu * encoding.c (rb_id_encoding): returns ID "encoding".
nobu authored
651 ID
652 rb_id_encoding(void)
653 {
5a647a3 @nobu * include/ruby/ruby.h (CONST_ID): constant ID cache for non-gcc.
nobu authored
654 CONST_ID(id_encoding, "encoding");
5d5d0b5 @nobu * encoding.c (rb_id_encoding): returns ID "encoding".
nobu authored
655 return id_encoding;
656 }
657
063beac @akr * encoding.c (rb_enc_internal_get_index): extracted from
akr authored
658 int
44cd8e4 * regparse.c (PINC): use optimized enclen() instead of
matz authored
659 rb_enc_get_index(VALUE obj)
063beac @akr * encoding.c (rb_enc_internal_get_index): extracted from
akr authored
660 {
5fc383f @nurse * encoding.c (rb_enc_compatible): accepst other than strings and
nurse authored
661 int i = -1;
662 VALUE tmp;
063beac @akr * encoding.c (rb_enc_internal_get_index): extracted from
akr authored
663
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
664 if (SPECIAL_CONST_P(obj)) {
665 if (!SYMBOL_P(obj)) return -1;
666 obj = rb_id2str(SYM2ID(obj));
667 }
5fc383f @nurse * encoding.c (rb_enc_compatible): accepst other than strings and
nurse authored
668 switch (BUILTIN_TYPE(obj)) {
87c1a07 @nobu * encoding.c (rb_enc_get_index): fixed indent.
nobu authored
669 as_default:
670 default:
671 case T_STRING:
672 case T_REGEXP:
673 i = ENCODING_GET_INLINED(obj);
674 if (i == ENCODING_INLINE_MAX) {
675 VALUE iv;
676
677 iv = rb_ivar_get(obj, rb_id_encoding());
678 i = NUM2INT(iv);
679 }
680 break;
681 case T_FILE:
682 tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
683 if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
684 else obj = tmp;
685 if (NIL_P(obj)) break;
686 case T_DATA:
30995c6 @nobu * encoding.c (encoding_data_type): typed data.
nobu authored
687 if (is_data_encoding(obj)) {
87c1a07 @nobu * encoding.c (rb_enc_get_index): fixed indent.
nobu authored
688 i = enc_check_encoding(obj);
689 }
690 else {
691 goto as_default;
692 }
693 break;
063beac @akr * encoding.c (rb_enc_internal_get_index): extracted from
akr authored
694 }
695 return i;
696 }
697
cfa0035 @nobu * encoding.c (rb_enc_set_index, rb_enc_associate_index): should
nobu authored
698 static void
699 enc_set_index(VALUE obj, int idx)
063beac @akr * encoding.c (rb_enc_internal_get_index): extracted from
akr authored
700 {
701 if (idx < ENCODING_INLINE_MAX) {
702 ENCODING_SET_INLINED(obj, idx);
703 return;
704 }
705 ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
706 rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
cfa0035 @nobu * encoding.c (rb_enc_set_index, rb_enc_associate_index): should
nobu authored
707 }
708
709 void
710 rb_enc_set_index(VALUE obj, int idx)
711 {
712 rb_check_frozen(obj);
713 enc_set_index(obj, idx);
063beac @akr * encoding.c (rb_enc_internal_get_index): extracted from
akr authored
714 }
715
fe8bb86 @nurse * encoding.c, include/ruby/encoding.h
nurse authored
716 VALUE
5678ab5 encoding.c: added.
matz authored
717 rb_enc_associate_index(VALUE obj, int idx)
718 {
fe8bb86 @nurse * encoding.c, include/ruby/encoding.h
nurse authored
719 /* enc_check_capable(obj);*/
cfa0035 @nobu * encoding.c (rb_enc_set_index, rb_enc_associate_index): should
nobu authored
720 rb_check_frozen(obj);
44cd8e4 * regparse.c (PINC): use optimized enclen() instead of
matz authored
721 if (rb_enc_get_index(obj) == idx)
4d786d2 @nobu * removed spaces just before tabs.
nobu authored
722 return obj;
a809077 @nobu * encoding.c (rb_enc_associate_index): cannot set encoding on
nobu authored
723 if (SPECIAL_CONST_P(obj)) {
724 rb_raise(rb_eArgError, "cannot set encoding");
725 }
e41b848 @nobu * encoding.c (rb_enc_associate_index): deal with ASCII compatible
nobu authored
726 if (!ENC_CODERANGE_ASCIIONLY(obj) ||
727 !rb_enc_asciicompat(rb_enc_from_index(idx))) {
728 ENC_CODERANGE_CLEAR(obj);
729 }
cfa0035 @nobu * encoding.c (rb_enc_set_index, rb_enc_associate_index): should
nobu authored
730 enc_set_index(obj, idx);
fe8bb86 @nurse * encoding.c, include/ruby/encoding.h
nurse authored
731 return obj;
5678ab5 encoding.c: added.
matz authored
732 }
733
fe8bb86 @nurse * encoding.c, include/ruby/encoding.h
nurse authored
734 VALUE
5678ab5 encoding.c: added.
matz authored
735 rb_enc_associate(VALUE obj, rb_encoding *enc)
736 {
fe8bb86 @nurse * encoding.c, include/ruby/encoding.h
nurse authored
737 return rb_enc_associate_index(obj, rb_enc_to_index(enc));
5678ab5 encoding.c: added.
matz authored
738 }
739
740 rb_encoding*
741 rb_enc_get(VALUE obj)
742 {
743 return rb_enc_from_index(rb_enc_get_index(obj));
744 }
745
746 rb_encoding*
747 rb_enc_check(VALUE str1, VALUE str2)
748 {
00befb4 @nobu * encoding.c (rb_enc_compatible): check if two objects have compatible
nobu authored
749 rb_encoding *enc = rb_enc_compatible(str1, str2);
750 if (!enc)
85c41f4 @nurse * error.c (rb_eEncCompatError): add Exception.
nurse authored
751 rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
8b09f70 * string.c (str_strlen): use search_nonascii() for performance.
matz authored
752 rb_enc_name(rb_enc_get(str1)),
753 rb_enc_name(rb_enc_get(str2)));
00befb4 @nobu * encoding.c (rb_enc_compatible): check if two objects have compatible
nobu authored
754 return enc;
755 }
756
757 rb_encoding*
758 rb_enc_compatible(VALUE str1, VALUE str2)
759 {
5678ab5 encoding.c: added.
matz authored
760 int idx1, idx2;
65a8185 * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
matz authored
761 rb_encoding *enc1, *enc2;
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
762 int isstr1, isstr2;
5678ab5 encoding.c: added.
matz authored
763
764 idx1 = rb_enc_get_index(str1);
765 idx2 = rb_enc_get_index(str2);
766
8bcfa69 @akr * encoding.c (rb_enc_compatible): check encoding incapable arguments.
akr authored
767 if (idx1 < 0 || idx2 < 0)
768 return 0;
769
5678ab5 encoding.c: added.
matz authored
770 if (idx1 == idx2) {
771 return rb_enc_from_index(idx1);
772 }
65a8185 * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
matz authored
773 enc1 = rb_enc_from_index(idx1);
774 enc2 = rb_enc_from_index(idx2);
7eeba5f @akr * encoding.c (rb_enc_compatible): empty strings are always compatible.
akr authored
775
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
776 isstr2 = RB_TYPE_P(str2, T_STRING);
777 if (isstr2 && RSTRING_LEN(str2) == 0)
a8f5a06 @nurse * encoding.c (rb_enc_compatible): change the rule for empty strings:
nurse authored
778 return enc1;
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
779 isstr1 = RB_TYPE_P(str1, T_STRING);
780 if (isstr1 && RSTRING_LEN(str1) == 0)
a8f5a06 @nurse * encoding.c (rb_enc_compatible): change the rule for empty strings:
nurse authored
781 return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
7eeba5f @akr * encoding.c (rb_enc_compatible): empty strings are always compatible.
akr authored
782 if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
65a8185 * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
matz authored
783 return 0;
7eeba5f @akr * encoding.c (rb_enc_compatible): empty strings are always compatible.
akr authored
784 }
5fc383f @nurse * encoding.c (rb_enc_compatible): accepst other than strings and
nurse authored
785
786 /* objects whose encoding is the same of contents */
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
787 if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
7fc16c2 @nurse * encoding.c (rb_enc_compatible): ASCII compatible string and US-ASCII
nurse authored
788 return enc1;
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
789 if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
7fc16c2 @nurse * encoding.c (rb_enc_compatible): ASCII compatible string and US-ASCII
nurse authored
790 return enc2;
7eeba5f @akr * encoding.c (rb_enc_compatible): empty strings are always compatible.
akr authored
791
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
792 if (!isstr1) {
335fe1e * string.c (rb_str_comparable): need not to check asciicompat here.
matz authored
793 VALUE tmp = str1;
af69c73 @nobu * encoding.c (rb_enc_compatible): should swap encoding indexes too.
nobu authored
794 int idx0 = idx1;
335fe1e * string.c (rb_str_comparable): need not to check asciicompat here.
matz authored
795 str1 = str2;
796 str2 = tmp;
af69c73 @nobu * encoding.c (rb_enc_compatible): should swap encoding indexes too.
nobu authored
797 idx1 = idx2;
798 idx2 = idx0;
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
799 idx0 = isstr1;
800 isstr1 = isstr2;
801 isstr2 = idx0;
335fe1e * string.c (rb_str_comparable): need not to check asciicompat here.
matz authored
802 }
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
803 if (isstr1) {
335fe1e * string.c (rb_str_comparable): need not to check asciicompat here.
matz authored
804 int cr1, cr2;
805
806 cr1 = rb_enc_str_coderange(str1);
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
807 if (isstr2) {
335fe1e * string.c (rb_str_comparable): need not to check asciicompat here.
matz authored
808 cr2 = rb_enc_str_coderange(str2);
809 if (cr1 != cr2) {
810 /* may need to handle ENC_CODERANGE_BROKEN */
65a8185 * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
matz authored
811 if (cr1 == ENC_CODERANGE_7BIT) return enc2;
707f2fd * encoding.c (rb_enc_compatible): wrong compatibility condition.
matz authored
812 if (cr2 == ENC_CODERANGE_7BIT) return enc1;
335fe1e * string.c (rb_str_comparable): need not to check asciicompat here.
matz authored
813 }
a648fc8 * encoding.c (rb_enc_compatible): encoding should never fall back
matz authored
814 if (cr2 == ENC_CODERANGE_7BIT) {
65a8185 * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
matz authored
815 return enc1;
a648fc8 * encoding.c (rb_enc_compatible): encoding should never fall back
matz authored
816 }
5678ab5 encoding.c: added.
matz authored
817 }
65a8185 * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
matz authored
818 if (cr1 == ENC_CODERANGE_7BIT)
819 return enc2;
e41b848 @nobu * encoding.c (rb_enc_associate_index): deal with ASCII compatible
nobu authored
820 }
00befb4 @nobu * encoding.c (rb_enc_compatible): check if two objects have compatible
nobu authored
821 return 0;
5678ab5 encoding.c: added.
matz authored
822 }
823
824 void
825 rb_enc_copy(VALUE obj1, VALUE obj2)
826 {
827 rb_enc_associate_index(obj1, rb_enc_get_index(obj2));
828 }
829
830
19dee8a @nobu * encoding.c (rb_obj_encoding): returns encoding of the given object.
nobu authored
831 /*
832 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
833 * obj.encoding -> encoding
19dee8a @nobu * encoding.c (rb_obj_encoding): returns encoding of the given object.
nobu authored
834 *
b314d3e @nobu * encoding.c (rb_obj_encoding): rdoc update. a patch from David
nobu authored
835 * Returns the Encoding object that represents the encoding of obj.
19dee8a @nobu * encoding.c (rb_obj_encoding): returns encoding of the given object.
nobu authored
836 */
837
838 VALUE
839 rb_obj_encoding(VALUE obj)
840 {
faf295f @nurse * encoding.c (str_to_encoding): rename from to_encoding and
nurse authored
841 int idx = rb_enc_get_index(obj);
842 if (idx < 0) {
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
843 rb_raise(rb_eTypeError, "unknown encoding");
844 }
faf295f @nurse * encoding.c (str_to_encoding): rename from to_encoding and
nurse authored
845 return rb_enc_from_encoding_index(idx);
19dee8a @nobu * encoding.c (rb_obj_encoding): returns encoding of the given object.
nobu authored
846 }
847
5678ab5 encoding.c: added.
matz authored
848 int
0980fe7 * encoding.c (rb_enc_fast_mbclen): faster mbclen for strings known
matz authored
849 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
850 {
851 return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
852 }
853
854 int
edd7c78 * array.c (rb_ary_cycle): typo in rdoc. a patch from Yugui
matz authored
855 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
4461bd2 * encoding.c (rb_enc_codelen): raises invalid sequence exception
matz authored
856 {
f1b7e60 @akr * encoding.c (rb_enc_mbclen): make it never fail.
akr authored
857 int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
fc208c1 @akr * include/ruby/oniguruma.h: precise mbclen API redesigned to avoid
akr authored
858 if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
859 return MBCLEN_CHARFOUND_LEN(n);
81fc1cf @akr * encoding.c (rb_enc_mbclen): return minlen instead of 1 when
akr authored
860 else {
861 int min = rb_enc_mbminlen(enc);
e07cb85 @nobu * suppressed shorten-64-to-32 warnings.
nobu authored
862 return min <= e-p ? min : (int)(e-p);
81fc1cf @akr * encoding.c (rb_enc_mbclen): return minlen instead of 1 when
akr authored
863 }
4461bd2 * encoding.c (rb_enc_codelen): raises invalid sequence exception
matz authored
864 }
865
866 int
69406aa @akr * encoding.c (rb_enc_precise_mbclen): new function for mbclen with
akr authored
867 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
868 {
78eae5d @akr * encoding.c (rb_enc_precise_mbclen): return needmore if underlying
akr authored
869 int n;
f1b7e60 @akr * encoding.c (rb_enc_mbclen): make it never fail.
akr authored
870 if (e <= p)
871 return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
78eae5d @akr * encoding.c (rb_enc_precise_mbclen): return needmore if underlying
akr authored
872 n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
873 if (e-p < n)
e07cb85 @nobu * suppressed shorten-64-to-32 warnings.
nobu authored
874 return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
78eae5d @akr * encoding.c (rb_enc_precise_mbclen): return needmore if underlying
akr authored
875 return n;
69406aa @akr * encoding.c (rb_enc_precise_mbclen): new function for mbclen with
akr authored
876 }
877
ea1b9d5 * string.c (rb_str_index): check if substring is broken.
matz authored
878 int
879 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
f1b7e60 @akr * encoding.c (rb_enc_mbclen): make it never fail.
akr authored
880 {
081c802 @akr * grapheme cluster implementation reverted. [ruby-dev:36375]
akr authored
881 unsigned int c, l;
f1b7e60 @akr * encoding.c (rb_enc_mbclen): make it never fail.
akr authored
882 if (e <= p)
883 return -1;
081c802 @akr * grapheme cluster implementation reverted. [ruby-dev:36375]
akr authored
884 if (rb_enc_asciicompat(enc)) {
885 c = (unsigned char)*p;
886 if (!ISASCII(c))
887 return -1;
888 if (len) *len = 1;
889 return c;
890 }
f1b7e60 @akr * encoding.c (rb_enc_mbclen): make it never fail.
akr authored
891 l = rb_enc_precise_mbclen(p, e, enc);
fc208c1 @akr * include/ruby/oniguruma.h: precise mbclen API redesigned to avoid
akr authored
892 if (!MBCLEN_CHARFOUND_P(l))
f1b7e60 @akr * encoding.c (rb_enc_mbclen): make it never fail.
akr authored
893 return -1;
081c802 @akr * grapheme cluster implementation reverted. [ruby-dev:36375]
akr authored
894 c = rb_enc_mbc_to_codepoint(p, e, enc);
5802768 @akr * encoding.c (rb_enc_get_ascii): add an argument to provide the
akr authored
895 if (!rb_enc_isascii(c, enc))
896 return -1;
897 if (len) *len = l;
898 return c;
f1b7e60 @akr * encoding.c (rb_enc_mbclen): make it never fail.
akr authored
899 }
900
d51b061 @nurse * include/ruby/oniguruma.h (OnigCodePoint): unsigned long to unsigned in...
nurse authored
901 unsigned int
91e5ba1 * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
matz authored
902 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
cf36df9 @akr * encoding.c (rb_enc_codepoint): implemented to raise invalid
akr authored
903 {
ead329f @akr * string.c (rb_str_inspect): don't call rb_enc_codepoint with empty
akr authored
904 int r;
905 if (e <= p)
906 rb_raise(rb_eArgError, "empty string");
081c802 @akr * grapheme cluster implementation reverted. [ruby-dev:36375]
akr authored
907 r = rb_enc_precise_mbclen(p, e, enc);
91e5ba1 * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
matz authored
908 if (MBCLEN_CHARFOUND_P(r)) {
909 if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
081c802 @akr * grapheme cluster implementation reverted. [ruby-dev:36375]
akr authored
910 return rb_enc_mbc_to_codepoint(p, e, enc);
91e5ba1 * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
matz authored
911 }
cf36df9 @akr * encoding.c (rb_enc_codepoint): implemented to raise invalid
akr authored
912 else
85c41f4 @nurse * error.c (rb_eEncCompatError): add Exception.
nurse authored
913 rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
cf36df9 @akr * encoding.c (rb_enc_codepoint): implemented to raise invalid
akr authored
914 }
915
91e5ba1 * encoding.c (rb_enc_codepoint_len): combine rb_enc_codepoint()
matz authored
916 #undef rb_enc_codepoint
917 unsigned int
918 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
919 {
920 return rb_enc_codepoint_len(p, e, 0, enc);
921 }
922
69406aa @akr * encoding.c (rb_enc_precise_mbclen): new function for mbclen with
akr authored
923 int
4461bd2 * encoding.c (rb_enc_codelen): raises invalid sequence exception
matz authored
924 rb_enc_codelen(int c, rb_encoding *enc)
925 {
926 int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
927 if (n == 0) {
85c41f4 @nurse * error.c (rb_eEncCompatError): add Exception.
nurse authored
928 rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
4461bd2 * encoding.c (rb_enc_codelen): raises invalid sequence exception
matz authored
929 }
930 return n;
931 }
932
933 int
5678ab5 encoding.c: added.
matz authored
934 rb_enc_toupper(int c, rb_encoding *enc)
935 {
936 return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c));
937 }
938
939 int
940 rb_enc_tolower(int c, rb_encoding *enc)
941 {
942 return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c));
943 }
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
944
0fc7dfe @akr add rdoc.
akr authored
945 /*
946 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
947 * enc.inspect -> string
0fc7dfe @akr add rdoc.
akr authored
948 *
949 * Returns a string which represents the encoding for programmers.
950 *
951 * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
952 * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
953 */
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
954 static VALUE
5bb7c86 @akr * encoding.c (Init_Encoding): use enc_name as to_s.
akr authored
955 enc_inspect(VALUE self)
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
956 {
42dcda0 @nurse * string.c (rb_str_usascii_new{,2}: defined.
nurse authored
957 VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
5f2e5c0 @nobu * encoding.c (rb_enc_replicate): now creates first class encoding.
nobu authored
958 rb_enc_name((rb_encoding*)DATA_PTR(self)),
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
959 (enc_dummy_p(self) ? " (dummy)" : ""));
42dcda0 @nurse * string.c (rb_str_usascii_new{,2}: defined.
nurse authored
960 ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
961 return str;
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
962 }
963
0fc7dfe @akr add rdoc.
akr authored
964 /*
965 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
966 * enc.name -> string
0fc7dfe @akr add rdoc.
akr authored
967 *
968 * Returns the name of the encoding.
969 *
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
970 * Encoding::UTF_8.name #=> "UTF-8"
0fc7dfe @akr add rdoc.
akr authored
971 */
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
972 static VALUE
973 enc_name(VALUE self)
974 {
42dcda0 @nurse * string.c (rb_str_usascii_new{,2}: defined.
nurse authored
975 return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
976 }
977
517a1c3 @nurse * encoding.c (Init_Encoding): new instance method Encoding#names,
nurse authored
978 static int
017ff9b @nobu * encoding.c (enc_names): minor improvement.
nobu authored
979 enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
517a1c3 @nurse * encoding.c (Init_Encoding): new instance method Encoding#names,
nurse authored
980 {
017ff9b @nobu * encoding.c (enc_names): minor improvement.
nobu authored
981 VALUE *arg = (VALUE *)args;
982
983 if ((int)idx == (int)arg[0]) {
517a1c3 @nurse * encoding.c (Init_Encoding): new instance method Encoding#names,
nurse authored
984 VALUE str = rb_usascii_str_new2((char *)name);
985 OBJ_FREEZE(str);
017ff9b @nobu * encoding.c (enc_names): minor improvement.
nobu authored
986 rb_ary_push(arg[1], str);
517a1c3 @nurse * encoding.c (Init_Encoding): new instance method Encoding#names,
nurse authored
987 }
988 return ST_CONTINUE;
989 }
990
991 /*
992 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
993 * enc.names -> array
517a1c3 @nurse * encoding.c (Init_Encoding): new instance method Encoding#names,
nurse authored
994 *
995 * Returns the list of name and aliases of the encoding.
996 *
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
997 * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
517a1c3 @nurse * encoding.c (Init_Encoding): new instance method Encoding#names,
nurse authored
998 */
999 static VALUE
1000 enc_names(VALUE self)
1001 {
017ff9b @nobu * encoding.c (enc_names): minor improvement.
nobu authored
1002 VALUE args[2];
1003
1004 args[0] = (VALUE)rb_to_encoding_index(self);
1005 args[1] = rb_ary_new2(0);
1006 st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1007 return args[1];
517a1c3 @nurse * encoding.c (Init_Encoding): new instance method Encoding#names,
nurse authored
1008 }
1009
0fc7dfe @akr add rdoc.
akr authored
1010 /*
1011 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1012 * Encoding.list -> [enc1, enc2, ...]
0fc7dfe @akr add rdoc.
akr authored
1013 *
1014 * Returns the list of loaded encodings.
1015 *
1016 * Encoding.list
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1017 * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1018 * #<Encoding:ISO-2022-JP (dummy)>]
0fc7dfe @akr add rdoc.
akr authored
1019 *
1020 * Encoding.find("US-ASCII")
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1021 * #=> #<Encoding:US-ASCII>
0fc7dfe @akr add rdoc.
akr authored
1022 *
1023 * Encoding.list
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1024 * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1025 * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
0fc7dfe @akr add rdoc.
akr authored
1026 *
1027 */
b088414 @nobu * encoding.c (rb_enc_replicate): new function to replicate encoding.
nobu authored
1028 static VALUE
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
1029 enc_list(VALUE klass)
1030 {
28b216a @nobu * include/ruby/oniguruma.h (OnigEncoding): removed auxiliary_data.
nobu authored
1031 VALUE ary = rb_ary_new2(0);
1032 rb_ary_replace(ary, rb_encoding_list);
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
1033 return ary;
1034 }
1035
0fc7dfe @akr add rdoc.
akr authored
1036 /*
1037 * call-seq:
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1038 * Encoding.find(string) -> enc
1039 * Encoding.find(symbol) -> enc
0fc7dfe @akr add rdoc.
akr authored
1040 *
1041 * Search the encoding with specified <i>name</i>.
1042 * <i>name</i> should be a string or symbol.
1043 *
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1044 * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1045 * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
5b1d121 @akr document updated.
akr authored
1046 *
7a884fe @nurse * encoding.c (get_filesystem_encoding): removed.
nurse authored
1047 * Names which this method accept are encoding names and aliases
1048 * including following special aliases
1049 *
feb3470 @nobu * encoding.c (enc_find): fixed rdoc formatting.
nobu authored
1050 * "external":: default external encoding
1051 * "internal":: default internal encoding
1052 * "locale":: locale encoding
1053 * "filesystem":: filesystem encoding
7a884fe @nurse * encoding.c (get_filesystem_encoding): removed.
nurse authored
1054 *
58740ef @yugui * encoding.c (rb_enc_set_default_external): default_internal can be
yugui authored
1055 * An ArgumentError is raised when no encoding with <i>name</i>.
feb3470 @nobu * encoding.c (enc_find): fixed rdoc formatting.
nobu authored
1056 * Only <code>Encoding.find("internal")</code> however returns nil
1057 * when no encoding named "internal", in other words, when Ruby has no
1058 * default internal encoding.
0fc7dfe @akr add rdoc.
akr authored
1059 */
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
1060 static VALUE
1061 enc_find(VALUE klass, VALUE enc)
1062 {
48bc63f @nurse Fix r32811: add UNSPECIFIED_ENCODING case.
nurse authored
1063 int idx;
a0b9f48 @nobu * encoding.c (enc_find): mistakenly remained !. [Bug #5150]
nobu authored
1064 if (RB_TYPE_P(enc, T_DATA) && is_data_encoding(enc))
faf295f @nurse * encoding.c (str_to_encoding): rename from to_encoding and
nurse authored
1065 return enc;
48bc63f @nurse Fix r32811: add UNSPECIFIED_ENCODING case.
nurse authored
1066 idx = str_to_encindex(enc);
1067 if (idx == UNSPECIFIED_ENCODING) return Qnil;
1068 return rb_enc_from_encoding_index(idx);
9c24fed @nobu * encoding.c (rb_cEncoding): new Encoding class.
nobu authored
1069 }
1070
0fc7dfe @akr add rdoc.
akr authored
1071 /*
1072 * call-seq:
58b14e7 @drbrain * encoding.c: Mention that Encoding.compatible? can work with more
drbrain authored
1073 * Encoding.compatible?(obj1, obj2) -> enc or nil
0fc7dfe @akr add rdoc.
akr authored
1074 *
58b14e7 @drbrain * encoding.c: Mention that Encoding.compatible? can work with more
drbrain authored
1075 * Checks the compatibility of two objects.
1076 *
1077 * If the objects are both strings they are compatible when they are
1078 * concatenatable. The encoding of the concatenated string will be returned
1079 * if they are compatible, nil if they are not.
0fc7dfe @akr add rdoc.
akr authored
1080 *
1081 * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1082 * #=> #<Encoding:ISO-8859-1>
0fc7dfe @akr add rdoc.
akr authored
1083 *
1084 * Encoding.compatible?(
1085 * "\xa1".force_encoding("iso-8859-1"),
1086 * "\xa1\xa1".force_encoding("euc-jp"))
7729de4 @marcandre * array.c: Documentation: change => in call-seq to ->.
marcandre authored
1087 * #=> nil
0fc7dfe @akr add rdoc.
akr authored
1088 *
58b14e7 @drbrain * encoding.c: Mention that Encoding.compatible? can work with more
drbrain authored
1089 * If the objects are non-strings their encodings are compatible when they
1090 * have an encoding and:
b1428ac @nurse * encoding.c (rb_enc_compatible): fix segv on symbols.
nurse authored
1091 * * Either encoding is US-ASCII compatible
58b14e7 @drbrain * encoding.c: Mention that Encoding.compatible? can work with more
drbrain authored
1092 * * One of the encodings is a 7-bit encoding
1093 *
0fc7dfe @akr add rdoc.
akr authored
1094 */
00befb4 @nobu * encoding.c (rb_enc_compatible): check if two objects have compatible
nobu authored
1095 static VALUE
1096 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1097 {
44cd8e4 * regparse.c (PINC): use optimized enclen() instead of
matz authored
1098 rb_encoding *enc;
1099
1100 if (!enc_capable(str1)) return Qnil;
1101 if (!enc_capable(str2)) return Qnil;
1102 enc = rb_enc_compatible(str1, str2);
1103 if (!enc) return Qnil;
1104 return rb_enc_from_encoding(enc);
00befb4 @nobu * encoding.c (rb_enc_compatible): check if two objects have compatible
nobu authored
1105 }
1106
b314d3e @nobu * encoding.c (rb_obj_encoding): rdoc update. a patch from David
nobu authored
1107 /* :nodoc: */
1108 static VALUE
1109 enc_dump(int argc, VALUE *argv, VALUE self)
1110 {
1111 rb_scan_args(argc, argv, "01", 0);
1112 return enc_name(self);
1113 }
1114
1115 /* :nodoc: */
1116 static VALUE
1117 enc_load(VALUE klass, VALUE str)
1118 {
1119 return enc_find(klass, str);
1120 }
1121
b2c8174 @nobu * encoding.c (rb_enc_default, rb_enc_primary): return pointers to
nobu authored
1122 rb_encoding *
5b809a2 @akr * include/ruby/encoding.h, encoding.c, re.c, io.c, parse.y, numeric.c,
akr authored
1123 rb_ascii8bit_encoding(void)
b2c8174 @nobu * encoding.c (rb_enc_default, rb_enc_primary): return pointers to
nobu authored
1124 {
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
1125 if (!enc_table.list) {
b2c8174 @nobu * encoding.c (rb_enc_default, rb_enc_primary): return pointers to
nobu authored
1126 rb_enc_init();
1127 }
13f20cc @nurse * encoding.c (rb_ascii8bit_encoding): use ENCINDEX_ASCII.
nurse authored
1128 return enc_table.list[ENCINDEX_ASCII].enc;
1129 }
1130
1131 int
1132 rb_ascii8bit_encindex(void)
1133 {
1134 return ENCINDEX_ASCII;
b2c8174 @nobu * encoding.c (rb_enc_default, rb_enc_primary): return pointers to
nobu authored
1135 }
1136
12df6cf @nobu * encoding.c (rb_enc_init): use enc_register_at() directly.
nobu authored
1137 rb_encoding *
1138 rb_utf8_encoding(void)
1139 {
00fb802 @nobu * encoding.c (enc_table): packed all enc_table stuff.
nobu authored
1140 if (!enc_table.list) {
12df6cf @nobu * encoding.c (rb_enc_init): use enc_register_at() directly.
nobu authored
1141 rb_enc_init();
1142 }
edc61cf @nurse * encoding.c (ENC_REGISTER): use &OnigEncoding*.
nurse authored
1143 return enc_table.list[ENCINDEX_UTF_8].enc;
12df6cf @nobu * encoding.c (rb_enc_init): use enc_register_at() directly.
nobu authored
1144 }
1145
04d8311 @nurse * encoding.c (rb_utf8_encindex): defined.
nurse authored
1146 int
1147 rb_utf8_encindex(void)
1148 {
1149 return ENCINDEX_UTF_8;
1150 }
1151
371977f @akr * encoding.c (rb_locale_encoding): defined.
akr authored
1152 rb_encoding *
1ccb064 @nurse * encoding.{c, h} (rb_usascii_encoding): added.
nurse authored
1153 rb_usascii_encoding(void)
1154 {
1155 if (!enc_table.list) {
1156 rb_enc_init();
1157 }
1158 return enc_table.list[ENCINDEX_US_ASCII].enc;
1159 }
1160
42dcda0 @nurse * string.c (rb_str_usascii_new{,2}: defined.
nurse authored
1161 int
1162 rb_usascii_encindex(void)
1163 {
1164 return ENCINDEX_US_ASCII;
1165 }
1166
f39ad4c @nurse * include/ruby/encoding.h (rb_filesystem_encindex): defined.
nurse authored
1167 int
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1168 rb_locale_encindex(void)
371977f @akr * encoding.c (rb_locale_encoding): defined.
akr authored
1169 {
1170 VALUE charmap = rb_locale_charmap(rb_cEncoding);
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1171 int idx;
eb1014e @unak * encoding.c (rb_locale_encoding): should check return value from
unak authored
1172
1173 if (NIL_P(charmap))
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1174 idx = rb_usascii_encindex();
1175 else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
1176 idx = rb_ascii8bit_encindex();
2db25b6 @nobu * encoding.c (rb_locale_encoding): makes an alias for locale.
nobu authored
1177
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1178 if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
1179
1180 return idx;
371977f @akr * encoding.c (rb_locale_encoding): defined.
akr authored
1181 }
1182
2755479 @nurse * encoding.c (rb_filesystem_encoding): defined.
nurse authored
1183 rb_encoding *
7a884fe @nurse * encoding.c (get_filesystem_encoding): removed.
nurse authored
1184 rb_locale_encoding(void)
2755479 @nurse * encoding.c (rb_filesystem_encoding): defined.
nurse authored
1185 {
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1186 return rb_enc_from_index(rb_locale_encindex());
7a884fe @nurse * encoding.c (get_filesystem_encoding): removed.
nurse authored
1187 }
1188
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1189 static int
a7db2c1 @nurse * encoding.c (enc_set_default_encoding): reset filesytem
nurse authored
1190 enc_set_filesystem_encoding(void)
7a884fe @nurse * encoding.c (get_filesystem_encoding): removed.
nurse authored
1191 {
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1192 int idx;
49ef25b @nobu * encoding.c (rb_filesystem_encoding): use ANSI codepage for file
nobu authored
1193 #if defined NO_LOCALE_CHARMAP
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1194 idx = rb_enc_to_index(rb_default_external_encoding());
49ef25b @nobu * encoding.c (rb_filesystem_encoding): use ANSI codepage for file
nobu authored
1195 #elif defined _WIN32 || defined __CYGWIN__
1196 char cp[sizeof(int) * 8 / 3 + 4];
9bd77cf @unak * encoding.c (rb_filesystem_encoding): Windows' filesystem encoding is
unak authored
1197 snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1198 idx = rb_enc_find_index(cp);
0859e7c @nurse * encoding.c (rb_filesystem_encindex): add fallback to
nurse authored
1199 if (idx < 0) idx = rb_ascii8bit_encindex();
2755479 @nurse * encoding.c (rb_filesystem_encoding): defined.
nurse authored
1200 #else
1aba3f9 @akr * encoding.c (rb_filesystem_encindex): use default external encoding
akr authored
1201 idx = rb_enc_to_index(rb_default_external_encoding());
2755479 @nurse * encoding.c (rb_filesystem_encoding): defined.
nurse authored
1202 #endif
7a884fe @nurse * encoding.c (get_filesystem_encoding): removed.
nurse authored
1203
a7db2c1 @nurse * encoding.c (enc_set_default_encoding): reset filesytem
nurse authored
1204 enc_alias_internal("filesystem", idx);
1205 return idx;
1206 }
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored
1207
f39ad4c @nurse * include/ruby/encoding.h (rb_filesystem_encindex): defined.
nurse authored
1208 int
a7db2c1 @nurse * encoding.c (enc_set_default_encoding): reset filesytem
nurse authored
1209 rb_filesystem_encindex(void)
1210 {
1211 int idx = rb_enc_registered("filesystem");
1212 if (idx < 0)
2f9d028 @nurse * encoding.c (rb_filesystem_encindex): avoid infinite require
nurse authored
1213 idx = rb_ascii8bit_encindex();
2f2e845 @nurse Revert set locale and filesystem encindex in Init_Encoding.
nurse authored