Permalink
Browse files

Handle UTF16 endianness according to the documentation

  • Loading branch information...
1 parent 33ba3a4 commit 679bd5eb32f7957c0411901aeb429b8203e10439 @drbrain committed Mar 25, 2011
Showing with 66 additions and 9 deletions.
  1. +52 −8 ext/pasteboard/pasteboard.c
  2. +13 −0 lib/pasteboard.rb
  3. +1 −1 lib/pasteboard/type.rb
@@ -2,18 +2,29 @@
#include <Pasteboard.h>
#include "extconf.h"
-#if HAVE_RB_STR_ENCODE
-#include <ruby/encoding.h>
-#endif
-
#define BUFSIZE 128
static VALUE cPB;
static VALUE cPBType;
static VALUE cPBTypeEncodings;
static VALUE ePBError;
+
+#if HAVE_RB_STR_ENCODE
+#include <ruby/encoding.h>
+
+static VALUE BE_BOM;
+static VALUE LE_BOM;
+
+static VALUE binary_encoding;
static VALUE usascii_encoding;
static VALUE utf8_encoding;
+static VALUE utf16be_encoding;
+static VALUE utf16le_encoding;
+static VALUE native_encoding;
+
+static VALUE utf16_external_flavor;
+static VALUE utf16_internal_flavor;
+#endif
static VALUE
string_ref_to_value(CFStringRef ref) {
@@ -50,6 +61,24 @@ value_to_utf8_cstr(VALUE string) {
return StringValueCStr(string);
}
+#if HAVE_RB_STR_ENCODE
+static void
+handle_bom(VALUE data, VALUE default_encoding) {
+ VALUE bom;
+
+ rb_enc_associate(data, rb_to_encoding(binary_encoding));
+ bom = rb_str_substr(data, 0, 2);
+
+ if (rb_str_equal(bom, BE_BOM)) {
+ rb_enc_associate(data, rb_to_encoding(utf16be_encoding));
+ } else if (rb_str_equal(bom, LE_BOM)) {
+ rb_enc_associate(data, rb_to_encoding(utf16le_encoding));
+ } else {
+ rb_enc_associate(data, rb_to_encoding(default_encoding));
+ }
+}
+#endif
+
static void pb_free(void *ptr) {
if (ptr)
CFRelease((PasteboardRef)ptr);
@@ -258,7 +287,12 @@ pb_copy_item_flavor_data(VALUE self, VALUE identifier, VALUE flavor) {
#if HAVE_RB_STR_ENCODE
encoding = rb_hash_aref(cPBTypeEncodings, flavor);
- rb_enc_associate(data, rb_to_encoding(encoding));
+ if (rb_str_equal(flavor, utf16_external_flavor) ||
+ rb_str_equal(flavor, utf16_internal_flavor)) {
+ handle_bom(data, encoding);
+ } else {
+ rb_enc_associate(data, rb_to_encoding(encoding));
+ }
#endif
return data;
@@ -411,10 +445,20 @@ Init_pasteboard(void) {
#if HAVE_RB_STR_ENCODE
utf8_encoding = rb_enc_from_encoding(rb_utf8_encoding());
+ binary_encoding = rb_const_get_at(rb_cEncoding, rb_intern("BINARY"));
+ utf16be_encoding = rb_const_get_at(rb_cEncoding, rb_intern("UTF_16BE"));
+ utf16le_encoding = rb_const_get_at(rb_cEncoding, rb_intern("UTF_16LE"));
+ native_encoding = rb_const_get_at(cPB, rb_intern("NATIVE_ENCODING"));
usascii_encoding = rb_enc_from_encoding(rb_usascii_encoding());
-#else
- utf8_encoding = Qnil;
- usascii_encoding = Qnil;
+
+ utf16_external_flavor = rb_const_get_at(cPBType,
+ rb_intern("PLAIN_TEXT_UTF16_EXTERNAL"));
+
+ utf16_internal_flavor = rb_const_get_at(cPBType,
+ rb_intern("PLAIN_TEXT_UTF16"));
+
+ BE_BOM = rb_const_get_at(cPB, rb_intern("BE_BOM"));
+ LE_BOM = rb_const_get_at(cPB, rb_intern("LE_BOM"));
#endif
rb_define_const(cPB, "MODIFIED", ULONG2NUM(kPasteboardModified));
View
@@ -18,6 +18,19 @@ class Error < RuntimeError
VERSION = '1.0'
+ if defined? Encoding then
+ BE_BOM = "\xFE\xFF" # :nodoc:
+ LE_BOM = "\xFF\xFE" # :nodoc:
+
+ BE_BOM.force_encoding Encoding::BINARY
+ LE_BOM.force_encoding Encoding::BINARY
+
+ little = [1].pack('S') == "\001\000" ? true : false
+
+ NATIVE_BOM = little ? LE_BOM : BE_BOM
+ NATIVE_ENCODING = little ? Encoding::UTF_16LE : Encoding::UTF_16BE
+ end
+
##
# General clipboard pasteboard type. Cut, copy and paste use this
# pasteboard.
View
@@ -91,7 +91,7 @@ module Pasteboard::Type
# Unicode-16, native byte order, with an optional byte-order mark (BOM).
PLAIN_TEXT_UTF16 = 'public.utf16-plain-text'
- Encodings[PLAIN_TEXT_UTF16] = Encoding::UTF_16LE if encoding
+ Encodings[PLAIN_TEXT_UTF16] = Pasteboard::NATIVE_ENCODING if encoding
# Classic Mac OS text.

0 comments on commit 679bd5e

Please sign in to comment.