Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

add r15 support #2

Open
wants to merge 18 commits into from

3 participants

@mihawk

with compatibility with older otp version

mihawk and others added some commits
@mihawk mihawk add R15 driver support 648a65a
@mihawk mihawk add comment R15 0de5ec7
@nbaronov nbaronov Update c_src/iconv_drv.c
nit: Fix incorrect prototype of driver_send_bin()
1b20fea
@mihawk mihawk Merge pull request #1 from nbaronov/patch-1
Update c_src/iconv_drv.c
2fb36e2
@eriksoe eriksoe Add a bunch of unit tests. Quote a few of them fail at present. f5d9296
@eriksoe eriksoe Add simple test case for the double-expand bug. 4d71431
@eriksoe eriksoe Fix for the double-expand bug (op is calculated wrongly the second time) 656f6b5
@eriksoe eriksoe Remove duplicate calculation by introducing 'newolen'.
Calculate oleft without recurrence.
29a40f1
@eriksoe eriksoe Remove NUL-termination stuff. Neither iconv nor Erlang cares about it.
- There is no special reason for the driver to do NUL termination.
- In fact, it may surprise that it doesn't return characters after the first NUL.
- Furthermore, this change does away with the strlen() call.
e76325b
@eriksoe eriksoe Fix tests: not all bytes are valid in all off the iso-8859-* encodings 89a5c46
@eriksoe eriksoe Fix type mismatch in test generator. d2ce256
@eriksoe eriksoe Remove debug output from test code. 0b8fcd3
@eriksoe eriksoe Fail instead of doing the wrong thing on large inputs.
Silently ignoring the last N*65536 bytes of the input is not a good idea...
8c45b61
@eriksoe eriksoe Handle input binaries up to 2^32 bytes, rather than 2^16 bytes. 4d7543e
@eriksoe eriksoe Fix the memory leak. dd6d731
@mihawk mihawk Merge pull request #2 from trifork/master
Fix a number of bugs, and add unit tests
bc855bd
@eriksoe eriksoe Bump application version number to 1.0.2.
I believe it is in order... :-)
5f0962d
@mihawk mihawk Merge pull request #3 from trifork/master
Bump application version number to 1.0.2.
195e464
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jan 2, 2012
  1. @mihawk

    add R15 driver support

    mihawk authored
  2. @mihawk

    add comment R15

    mihawk authored
Commits on Jan 29, 2013
  1. @nbaronov

    Update c_src/iconv_drv.c

    nbaronov authored
    nit: Fix incorrect prototype of driver_send_bin()
  2. @mihawk

    Merge pull request #1 from nbaronov/patch-1

    mihawk authored
    Update c_src/iconv_drv.c
Commits on May 20, 2013
  1. @eriksoe
  2. @eriksoe
  3. @eriksoe
  4. @eriksoe

    Remove duplicate calculation by introducing 'newolen'.

    eriksoe authored
    Calculate oleft without recurrence.
  5. @eriksoe

    Remove NUL-termination stuff. Neither iconv nor Erlang cares about it.

    eriksoe authored
    - There is no special reason for the driver to do NUL termination.
    - In fact, it may surprise that it doesn't return characters after the first NUL.
    - Furthermore, this change does away with the strlen() call.
  6. @eriksoe
  7. @eriksoe
  8. @eriksoe
  9. @eriksoe

    Fail instead of doing the wrong thing on large inputs.

    eriksoe authored
    Silently ignoring the last N*65536 bytes of the input is not a good idea...
  10. @eriksoe
  11. @eriksoe

    Fix the memory leak.

    eriksoe authored
Commits on May 22, 2013
  1. @mihawk

    Merge pull request #2 from trifork/master

    mihawk authored
    Fix a number of bugs, and add unit tests
Commits on Aug 27, 2013
  1. @eriksoe

    Bump application version number to 1.0.2.

    eriksoe authored
    I believe it is in order... :-)
Commits on Aug 29, 2013
  1. @mihawk

    Merge pull request #3 from trifork/master

    mihawk authored
    Bump application version number to 1.0.2.
This page is out of date. Refresh to see the latest.
View
10 .gitignore
@@ -0,0 +1,10 @@
+*.beam
+*.app
+*.dump
+*.o
+*.so
+
+*.bak
+*~
+
+/.eunit
View
4 Rakefile
@@ -169,8 +169,8 @@ task :compile => [:contrib, 'ebin', 'priv'] + HEADERS + OBJ + ['priv/iconv_drv.s
end
task :install => [:compile] do
- sh "mkdir #{ROOTDIR}/lib/iconv-1.0.1" unless File.directory? "#{ROOTDIR}/lib/iconv-1.0.1"
- sh "cp -r src ebin c_src priv #{ROOTDIR}/lib/iconv-1.0.1"
+ sh "mkdir #{ROOTDIR}/lib/iconv-1.0.2" unless File.directory? "#{ROOTDIR}/lib/iconv-1.0.2"
+ sh "cp -r src ebin c_src priv #{ROOTDIR}/lib/iconv-1.0.2"
end
task :contrib do
View
91 c_src/iconv_drv.c
@@ -76,7 +76,17 @@
((vec)[(i)+1] = (size)), \
(i+2))
-static int driver_send_bin();
+/*
+ * R15B changed several driver callbacks to use ErlDrvSizeT and
+ * ErlDrvSSizeT typedefs instead of int.
+ * This provides missing typedefs on older OTP versions.
+ */
+#if ERL_DRV_EXTENDED_MAJOR_VERSION < 2
+typedef int ErlDrvSizeT;
+typedef int ErlDrvSSizeT;
+#endif
+
+static ErlDrvSSizeT driver_send_bin();
/* atoms which are sent to erlang */
static ErlDrvTermData am_ok;
@@ -115,7 +125,7 @@ static void iconvdrv_stop(ErlDrvData drv_data)
/* send {P, value, Bin} to caller */
-static int driver_send_bin(t_iconvdrv *iv, ErlDrvBinary *bin, int len)
+static ErlDrvSSizeT driver_send_bin(t_iconvdrv *iv, ErlDrvBinary *bin, ErlDrvSizeT len)
{
int i = 0;
ErlDrvTermData to, spec[10];
@@ -131,7 +141,7 @@ static int driver_send_bin(t_iconvdrv *iv, ErlDrvBinary *bin, int len)
}
/* send {P, ok} to caller */
-static int driver_send_ok(t_iconvdrv *iv)
+static ErlDrvSSizeT driver_send_ok(t_iconvdrv *iv)
{
int i = 0;
ErlDrvTermData to, spec[10];
@@ -146,7 +156,7 @@ static int driver_send_ok(t_iconvdrv *iv)
}
/* send {P, error, Error} to caller */
-static int driver_send_error(t_iconvdrv *iv, ErlDrvTermData *am)
+static ErlDrvSSizeT driver_send_error(t_iconvdrv *iv, ErlDrvTermData *am)
{
int i = 0;
ErlDrvTermData to, spec[8];
@@ -166,9 +176,10 @@ static int driver_send_error(t_iconvdrv *iv, ErlDrvTermData *am)
#define get_int16(s) ((((unsigned char*) (s))[0] << 8) | \
(((unsigned char*) (s))[1]))
-
-#define put_int16(i, s) {((unsigned char*)(s))[0] = ((i) >> 8) & 0xff; \
- ((unsigned char*)(s))[1] = (i) & 0xff;}
+#define get_int32(s) ((((unsigned char*) (s))[0] << 24) | \
+ (((unsigned char*) (s))[1] << 16) | \
+ (((unsigned char*) (s))[2] << 8) | \
+ (((unsigned char*) (s))[3]))
static void iv_open(t_iconvdrv *iv, char *tocode, char *fromcode)
{
@@ -208,17 +219,17 @@ static void iv_conv(t_iconvdrv *iv, iconv_t cd, char *ip, size_t ileft, char ign
{
size_t oleft=ileft;
char *op, *buf;
- int olen = ileft + 1;
+ int olen = ileft;
ErlDrvBinary *bin;
- /* malloc enough for the input size +1 (null terminated),
+ /* malloc enough for the input size,
* with the assumption that the output length will be close to the input
* length. This isn't always the case, but we realloc on E2BIG below. */
buf = malloc(olen);
if (!buf) {
driver_send_error(iv, &am_enomem);
- return;
+ return;
}
op = buf;
@@ -234,29 +245,28 @@ static void iv_conv(t_iconvdrv *iv, iconv_t cd, char *ip, size_t ileft, char ign
driver_send_error(iv, &am_einval);
} else if (errno == E2BIG) {
char *newbuf;
+ int newolen = olen + ileft + oleft;
/* allocate as much additional space as iconv says we need */
- newbuf = realloc(buf, olen + ileft + oleft);
+ newbuf = realloc(buf, newolen);
if (!newbuf) {
- free(buf); /* realloc failed, make sure we free the old buffer*/
driver_send_error(iv, &am_enomem);
- return;
+ goto free_and_return;
}
+ op = newbuf + (op - buf);
buf = newbuf;
- op = buf + (olen - oleft - 1);
- olen += ileft + oleft;
- oleft += ileft;
+ olen = newolen;
+ oleft = olen - (op - buf);
/* keep going */
continue;
} else {
driver_send_error(iv, &am_unknown);
}
- return;
+ goto free_and_return;
}
- *(op++) = 0; /* ensure we null terminate */
if (ileft == 0) {
- /* find the length of the result, minus the terminating NULL */
- olen = strlen(buf);
+ /* find the length of the result */
+ olen = op - buf;
if (!(bin = driver_alloc_binary(olen))) {
driver_send_error(iv, &am_enomem);
} else {
@@ -266,6 +276,9 @@ static void iv_conv(t_iconvdrv *iv, iconv_t cd, char *ip, size_t ileft, char ign
}
}
+free_and_return:
+ /* To ensure cleanup, this is the only exit point after an initial
+ * successful malloc. */
free(buf);
return;
@@ -278,7 +291,7 @@ static void iv_close(t_iconvdrv *iv, iconv_t cd)
return;
}
-static void iconvdrv_from_erlang(ErlDrvData drv_data, char *buf, int len)
+static void iconvdrv_from_erlang(ErlDrvData drv_data, char *buf, ErlDrvSSizeT len)
{
t_iconvdrv *iv = (t_iconvdrv *) drv_data;
char ignore = 0;
@@ -311,15 +324,15 @@ static void iconvdrv_from_erlang(ErlDrvData drv_data, char *buf, int len)
case IV_CONV: {
/*
- * Format: <cd-len:16><cd><ignore><buf-len:16><buf>
+ * Format: <cd-len:16><cd><ignore><buf-len:32><buf>
*/
i = get_int16(bp);
bp += 2;
memcpy(&cd, bp, i-1);
memcpy(&ignore, bp + i -1, 1);
bp += i;
- i = get_int16(bp);
- bp += 2;
+ i = get_int32(bp);
+ bp += 4;
iv_conv(iv, cd, bp, i, ignore);
break;
@@ -358,14 +371,26 @@ DRIVER_INIT(iconvdrv)
am_e2big = driver_mk_atom("e2big");
am_unknown = driver_mk_atom("unknown");
- iconvdrv_driver_entry.init = NULL; /* Not used */
- iconvdrv_driver_entry.start = iconvdrv_start;
- iconvdrv_driver_entry.stop = iconvdrv_stop;
- iconvdrv_driver_entry.output = iconvdrv_from_erlang;
- iconvdrv_driver_entry.ready_input = NULL;
- iconvdrv_driver_entry.ready_output = NULL;
- iconvdrv_driver_entry.driver_name = "iconv_drv";
- iconvdrv_driver_entry.finish = NULL;
- iconvdrv_driver_entry.outputv = NULL;
+ iconvdrv_driver_entry.init = NULL; /* Not used */
+ iconvdrv_driver_entry.start = iconvdrv_start;
+ iconvdrv_driver_entry.stop = iconvdrv_stop;
+ iconvdrv_driver_entry.output = iconvdrv_from_erlang;
+ iconvdrv_driver_entry.ready_input = NULL;
+ iconvdrv_driver_entry.ready_output = NULL;
+ iconvdrv_driver_entry.driver_name = "iconv_drv";
+ iconvdrv_driver_entry.finish = NULL;
+ iconvdrv_driver_entry.outputv = NULL;
+/* Added in Erlang/OTP R15B: */
+ iconvdrv_driver_entry.ready_async = NULL;
+ iconvdrv_driver_entry.flush = NULL;
+ iconvdrv_driver_entry.call = NULL;
+ iconvdrv_driver_entry.event = NULL;
+ iconvdrv_driver_entry.extended_marker = ERL_DRV_EXTENDED_MARKER;
+ iconvdrv_driver_entry.major_version = ERL_DRV_EXTENDED_MAJOR_VERSION;
+ iconvdrv_driver_entry.minor_version = ERL_DRV_EXTENDED_MINOR_VERSION;
+ iconvdrv_driver_entry.driver_flags = 0;
+ iconvdrv_driver_entry.handle2 = NULL;
+ iconvdrv_driver_entry.process_exit = NULL;
+ iconvdrv_driver_entry.stop_select = NULL;
return &iconvdrv_driver_entry;
}
View
2  src/iconv.app.src
@@ -1,6 +1,6 @@
{application,iconv,
[{description,"Interface to the iconv character set convertion library"},
- {vsn,"1.0.1"},
+ {vsn,"1.0.2"},
{modules,[]},
{registered,[iconv]},
{env, []},
View
10 src/iconv.erl
@@ -112,9 +112,13 @@ handle_call({open, To, From}, _, S) ->
handle_call({conv, Cd, Buf}, _, S) ->
CdLen = byte_size(Cd),
BufLen = byte_size(Buf),
- Msg = <<?IV_CONV,CdLen:16,Cd/binary,BufLen:16,Buf/binary>>,
- Reply = call_drv(S#state.port, Msg),
- {reply, Reply, S};
+ if BufLen >= (1 bsl 32) ->
+ {reply, {error, {cannot_handle_large_input, BufLen}}, S};
+ true ->
+ Msg = <<?IV_CONV,CdLen:16,Cd/binary,BufLen:32,Buf/binary>>,
+ Reply = call_drv(S#state.port, Msg),
+ {reply, Reply, S}
+ end;
%%
handle_call({close, Cd}, _, S) ->
View
151 test/iconv_test.erl
@@ -0,0 +1,151 @@
+-module(iconv_test).
+
+-include_lib("eunit/include/eunit.hrl").
+
+%%%========== Test collection ============================================
+iconv_test_() ->
+ {setup,
+ fun () -> {ok,_} = iconv:start() end,
+ fun(_) -> iconv:stop() end,
+ [
+ {"Convert from latin-1 to utf-8", fun latin1_to_utf8/0}
+ , {"Double-expand corruption", fun double_expand/0}
+ , {"Convert from utf-8 to latin-1 ", fun utf8_to_latin1/0}
+ , {"Big test", fun bigtest/0}
+ , {"Bad-input test", fun errortest/0}
+ , [{"Round-trip test "++CS++"->utf8->"++CS, fun() -> roundtrip(CS) end}
+ || CS <- ["latin1",
+ "ISO-8859-1",
+ "ISO-8859-2",
+ "ISO-8859-3",
+ "ISO-8859-4",
+ "ISO-8859-5",
+ "ISO-8859-6",
+ "ISO-8859-7",
+ "ISO-8859-8",
+ "ISO-8859-9",
+ "ISO-8859-10",
+ "ISO-8859-11",
+ "ISO-8859-13",
+ "ISO-8859-14",
+ "ISO-8859-15",
+ "ISO-8859-16"]]
+ ]}.
+
+-ifdef(WITH_LEAK_TEST).
+leak_test_() ->
+ {setup,
+ fun () -> {ok,_} = iconv:start() end,
+ fun(_) -> iconv:stop() end,
+ {timeout, 120,
+ fun leaktest/0}}.
+-endif.
+
+%%%============================================================
+
+test_strings() ->
+ Latin1Characters = lists:seq(0,255),
+ [%% Basics:
+ "", "Hello, World!",
+ %% Non-ASCII characters:
+ "Blåbærgrød",
+ "test æøå",
+ "æøåÅØÆ",
+ [128,255]] ++
+ %% All one-character and two-character strings:
+ [[X] || X <- Latin1Characters] ++
+ [[X,Y] || X <- Latin1Characters, Y <- Latin1Characters] ++
+ %% Random input:
+ [binary_to_list(crypto:rand_bytes(X)) || X <- lists:seq(1,200)].
+
+double_expand() ->
+ {ok, CD} = iconv:open("utf-8", "ISO-8859-1"),
+ latin1_to_utf8(CD, "Test æøå"),
+ iconv:close(CD).
+
+latin1_to_utf8() ->
+ {ok, CD} = iconv:open("utf-8", "ISO-8859-1"),
+ [latin1_to_utf8(CD, X) || X <- test_strings()],
+ iconv:close(CD).
+
+latin1_to_utf8(CD, S) ->
+ In = list_to_binary(S),
+ Out = unicode:characters_to_binary(S, latin1),
+ ?assertEqual({ok, Out}, iconv:conv(CD, In)).
+
+
+utf8_to_latin1() ->
+ {ok, CD} = iconv:open("ISO-8859-1", "utf-8"),
+ [utf8_to_latin1(CD, X) || X <- test_strings()],
+ iconv:close(CD).
+
+utf8_to_latin1(CD, S) ->
+ In = unicode:characters_to_binary(S, latin1),
+ Out = list_to_binary(S),
+ ?assertEqual({ok, Out}, iconv:conv(CD, In)).
+
+roundtrip(CS) ->
+ IllegalBytes = illegal_bytes_for_encoding(CS),
+ Bytes = lists:seq(0,255) -- IllegalBytes,
+ TestStrings =
+ %% All zero-, one-, and two-byte sequences:
+ [<<>>] ++
+ [<<X>> || X <- Bytes] ++
+ [<<X,Y>> || X <- Bytes, Y <- Bytes] ++
+ %% Random input:
+ [bytes_not_in(crypto:rand_bytes(X), IllegalBytes)
+ || X <- lists:seq(1,200)],
+
+ {ok, CD1} = iconv:open("utf-8", CS),
+ {ok, CD2} = iconv:open(CS, "utf-8"),
+ [roundtrip(CD1, CD2, X) || X <- TestStrings],
+ iconv:close(CD1),
+ iconv:close(CD2).
+
+roundtrip(CD1, CD2, In) ->
+ {ok, Tmp} = iconv:conv(CD1, In),
+ ?assertEqual({ok, In}, iconv:conv(CD2, Tmp)).
+
+bytes_not_in(Bin, Exclude) ->
+ << <<X>> || <<X>> <= Bin, not lists:member(X,Exclude)>>.
+
+illegal_bytes_for_encoding("ISO-8859-3") -> [165,174,190,195,208,227,240];
+illegal_bytes_for_encoding("ISO-8859-6") -> lists:seq(161,163)++lists:seq(165,171)++lists:seq(174,186)++lists:seq(188,190)++[192]++lists:seq(219,223)++lists:seq(243,255);
+illegal_bytes_for_encoding("ISO-8859-7") -> [174,210,255];
+illegal_bytes_for_encoding("ISO-8859-8") -> [161]++lists:seq(191,222)++[251,252,255];
+illegal_bytes_for_encoding("ISO-8859-11") -> lists:seq(219,222)++lists:seq(252,255);
+illegal_bytes_for_encoding(_) -> [].
+
+
+bigtest() ->
+ {ok, CD} = iconv:open("latin1", "utf-8"),
+ [begin
+ In = list_to_binary(string:copies("x",100*N)),
+ {ok,Out} = iconv:conv(CD, In),
+ %% io:format(user, "DB| ~w~n vs ~w~n", [In, iconv:conv(CD, In)]),
+ ?assertMatch({N,X,X}, {N,byte_size(In), byte_size(Out)}),
+ ?assertMatch({N,{ok,In}}, {N,iconv:conv(CD, In)})
+ end
+ || N <- lists:seq(655,1000)],
+ iconv:close(CD).
+
+errortest() ->
+ {ok, CD} = iconv:open("ISO-8859-1", "utf-8"),
+ ?assertEqual({ok, <<>>}, iconv:conv(CD, <<>>)),
+ ?assertEqual({error, eilseq}, iconv:conv(CD, <<2#10000000>>)),
+ ?assertEqual({error, einval}, iconv:conv(CD, <<2#11100000>>)),
+ ?assertEqual({error, einval}, iconv:conv(CD, <<2#11100000, 2#10000000>>)),
+ iconv:close(CD).
+
+leaktest() ->
+ In = list_to_binary(string:copies("x",60000)),
+ {ok, CD} = iconv:open("latin1", "utf-8"),
+ erlang:display(erlang:memory()),
+ [begin
+ ?assertMatch({error,eilseq}, iconv:conv(CD, <<In/binary, 16#80>>)),
+ ?assertMatch({error,einval}, iconv:conv(CD, <<In/binary, 16#E0>>))
+ %% timer:sleep(1)
+ end
+ || _ <- lists:seq(1,600000)],
+ erlang:display(erlang:memory()),
+ iconv:close(CD).
Something went wrong with that request. Please try again.