Fixed error reported by Alexander Wingard

devinus · Apr 10, 2010 · 787f596 · 787f596
1 parent c87a9ee
commit 787f596
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 48 deletions.
diff --git a/src/erlsom_pass2.erl b/src/erlsom_pass2.erl
@@ -71,8 +71,15 @@ secondPass(IntermediateStruct,
   Types2 = pass3(Types1),
   Types3 = pass4(Types2, Info),
   DocType = make_Document(GlobalElements, [], Info),
-  DocType2 =  pass3Type(DocType, Types3), %% this is a list
-  Types5 = DocType2 ++ Types3,
+  %% fiddle a bit more - replace refernces in the _document that point
+  %% to unknown types by {#PCDATA, ...}, assuming that they point to 
+  %% simple types that are no longer visible. Even if that assumption 
+  %% would be wrong, it wouldn't have worked otherwise either, so it won't 
+  %% break anything (but it fixes the error pointer out by Alexander Wingrad 
+  %% for those cases).
+  DocType2 = removeDeadRefsFromDoc(DocType, Types3), 
+  DocType3 =  pass3Type(DocType2, Types2), %% this is a list
+  Types5 = DocType3 ++ Types3,
   Types6 = pass5(Types5, Info),
   #model{tps = Types6, nss = NS, tns = Tns,
          th = TypeHierarchy}.
@@ -176,7 +183,20 @@ make_Document([], Acc, _Info) ->
         els = [#el{alts = Acc, mn = 1,  mx = 1, nr = 1}], 
         atts = [], 
         nr = 1}.
-
+
+removeDeadRefsFromDoc(Type = #type{nm = '_document', els = [El = #el{alts = Alts}]}, Types) ->
+  F = fun(Alt = #alt{tp = {'#PCDATA', _}}) -> Alt;
+         (Alt = #alt{tp = AltType}) -> 
+           case lists:keysearch(AltType, #type.nm, Types) of
+             {value, _} -> Alt;
+             _ -> Alt#alt{tp = list_to_type("##string")}
+           end
+      end,
+  LivingAlts = [F(Alt) || Alt <- Alts],
+  Type#type{els=[El#el{alts = LivingAlts}]};
+removeDeadRefsFromDoc(Type, _Types) ->
+  Type.
+
 
 %% Each Type is of the form {TypeName, TypeType, Elements, Attributes, NrOfElements}.
 %% - TypeName is an atom.
@@ -382,8 +402,14 @@ translateAlternative(#alternative{tag=Tag, type=Type, real=Real, min=Min, max=Ma
       %% debug("Tag " ++ Tag),
       #alt{tag = list_to_atom(Tag), tp = list_to_type("##string"), rl = Real, mn = Min, mx = Max, anyInfo = AnyInfo};
     {value, #typeInfo{typeType = globalElementRefOnly, typeRef=Ref, elements=undefined, attributes=[]}} ->
-      %% debug("Tag: " ++ Tag ++ " Ref: " ++ Ref),
-      #alt{tag = list_to_atom(Tag), tp = list_to_type(Ref), rl = Real, mn = Min, mx = Max, anyInfo = AnyInfo};
+      %% (error reported by Alexander Wingard) this can also be a ref to a simple type (as above)
+      Tp = case lists:keysearch(Ref, #typeInfo.typeName, Types) of
+        {value, #typeInfo{typeRef="##string"}} ->
+          list_to_type("##string");
+        _ -> 
+          list_to_type(Ref)
+      end,
+      #alt{tag = list_to_atom(Tag), tp = Tp, rl = Real, mn = Min, mx = Max, anyInfo = AnyInfo};
     %% If Type has only 1 element and this element has only 1 alternative with tag = '#text' and 'real'  = false
     %% *and* the element has no attributes, then there is no point in referring to that type.
     %% However, this remains on the TODO list for now.

diff --git a/src/ucs.erl b/src/ucs.erl
@@ -53,7 +53,7 @@
 
 
 %%% Test if Ch is a legitimate ISO-10646 character code
-is_iso10646(Ch) when integer(Ch), Ch >= 0 ->
+is_iso10646(Ch) when is_integer(Ch), Ch >= 0 ->
     if Ch  < 16#D800 -> true;
        Ch  < 16#E000 -> false;	% Surrogates
        Ch  < 16#FFFE -> true;
@@ -70,7 +70,7 @@ is_unicode(_) -> false.
 
 %%% Test if Ch is a legitimate ISO-10646 character code belonging to
 %%% the basic multi-lingual plane (BMP).
-is_bmpchar(Ch) when integer(Ch), Ch >= 0 ->
+is_bmpchar(Ch) when is_integer(Ch), Ch >= 0 ->
     if Ch < 16#D800 -> true;
        Ch < 16#E000 -> false;	% Surrogates
        Ch < 16#FFFE -> true;
@@ -79,15 +79,15 @@ is_bmpchar(Ch) when integer(Ch), Ch >= 0 ->
 is_bmpchar(_) -> false.
 
 %%% Test for legitimate Latin-1 code
-is_latin1(Ch) when integer(Ch), Ch >= 0, Ch =< 255 -> true;
+is_latin1(Ch) when is_integer(Ch), Ch >= 0, Ch =< 255 -> true;
 is_latin1(_) -> false.
 
 %%% Test for legitimate ASCII code
-is_ascii(Ch) when integer(Ch), Ch >= 0, Ch =< 127 -> true;
+is_ascii(Ch) when is_integer(Ch), Ch >= 0, Ch =< 127 -> true;
 is_ascii(_) -> false.
 
 %%% Test for char an element of ISO-646.basic set
-is_iso646_basic(Ch) when integer(Ch), Ch >= $\s ->
+is_iso646_basic(Ch) when is_integer(Ch), Ch >= $\s ->
     if Ch =< $Z ->
 	    %% Everything in this range except $# $$ and $@
 	    if Ch > $$ -> Ch =/= $@;
@@ -103,7 +103,7 @@ is_iso646_basic(_) ->
 
 %%% Test for char a visible Latin-1 char, i.e. a non-control Latin-1 char,
 %%% excepting non-break space (but including space).
-is_visible_latin1(Ch) when integer(Ch), Ch >= $\s ->
+is_visible_latin1(Ch) when is_integer(Ch), Ch >= $\s ->
     if Ch =< $~ -> true;
        Ch >= 161 -> Ch =< 255
     end;
@@ -112,77 +112,77 @@ is_visible_latin1(_) ->
 
 %%% Test for char a visible ASCII char, i.e. a non-control ASCII char
 %%% (including space).
-is_visible_ascii(Ch) when integer(Ch), Ch >= $\s -> Ch =< $~;
+is_visible_ascii(Ch) when is_integer(Ch), Ch >= $\s -> Ch =< $~;
 is_visible_ascii(_) -> false.
 
 
 %%% UCS-4, big and little endian versions, encoding and decoding
-to_ucs4be(List) when list(List) -> lists:flatmap(fun to_ucs4be/1, List);
+to_ucs4be(List) when is_list(List) -> lists:flatmap(fun to_ucs4be/1, List);
 to_ucs4be(Ch) -> char_to_ucs4be(Ch).
 
-from_ucs4be(Bin) when binary(Bin) -> from_ucs4be(Bin,[],[]);
+from_ucs4be(Bin) when is_binary(Bin) -> from_ucs4be(Bin,[],[]);
 from_ucs4be(List) -> from_ucs4be(list_to_binary(List),[],[]).
 
-from_ucs4be(Bin,Tail) when binary(Bin) -> from_ucs4be(Bin,[],Tail);
+from_ucs4be(Bin,Tail) when is_binary(Bin) -> from_ucs4be(Bin,[],Tail);
 from_ucs4be(List,Tail) -> from_ucs4be(list_to_binary(List),[],Tail).
 
-to_ucs4le(List) when list(List) -> lists:flatmap(fun to_ucs4le/1, List);
+to_ucs4le(List) when is_list(List) -> lists:flatmap(fun to_ucs4le/1, List);
 to_ucs4le(Ch) -> char_to_ucs4le(Ch).
 
-from_ucs4le(Bin) when binary(Bin) -> from_ucs4le(Bin,[],[]);
+from_ucs4le(Bin) when is_binary(Bin) -> from_ucs4le(Bin,[],[]);
 from_ucs4le(List) -> from_ucs4le(list_to_binary(List),[],[]).
 
-from_ucs4le(Bin,Tail) when binary(Bin) -> from_ucs4le(Bin,[],Tail);
+from_ucs4le(Bin,Tail) when is_binary(Bin) -> from_ucs4le(Bin,[],Tail);
 from_ucs4le(List,Tail) -> from_ucs4le(list_to_binary(List),[],Tail).
 
 %%% UCS-2, big and little endian versions, encoding and decoding
-to_ucs2be(List) when list(List) -> lists:flatmap(fun to_ucs2be/1, List);
+to_ucs2be(List) when is_list(List) -> lists:flatmap(fun to_ucs2be/1, List);
 to_ucs2be(Ch) -> char_to_ucs2be(Ch).
 
-from_ucs2be(Bin) when binary(Bin) -> from_ucs2be(Bin,[],[]);
+from_ucs2be(Bin) when is_binary(Bin) -> from_ucs2be(Bin,[],[]);
 from_ucs2be(List) -> from_ucs2be(list_to_binary(List),[],[]).
 
-from_ucs2be(Bin,Tail) when binary(Bin) -> from_ucs2be(Bin,[],Tail);
+from_ucs2be(Bin,Tail) when is_binary(Bin) -> from_ucs2be(Bin,[],Tail);
 from_ucs2be(List,Tail) -> from_ucs2be(list_to_binary(List),[],Tail).
 
-to_ucs2le(List) when list(List) -> lists:flatmap(fun to_ucs2le/1, List);
+to_ucs2le(List) when is_list(List) -> lists:flatmap(fun to_ucs2le/1, List);
 to_ucs2le(Ch) -> char_to_ucs2le(Ch).
 
-from_ucs2le(Bin) when binary(Bin) -> from_ucs2le(Bin,[],[]);
+from_ucs2le(Bin) when is_binary(Bin) -> from_ucs2le(Bin,[],[]);
 from_ucs2le(List) -> from_ucs2le(list_to_binary(List),[],[]).
 
-from_ucs2le(Bin,Tail) when binary(Bin) -> from_ucs2le(Bin,[],Tail);
+from_ucs2le(Bin,Tail) when is_binary(Bin) -> from_ucs2le(Bin,[],Tail);
 from_ucs2le(List,Tail) -> from_ucs2le(list_to_binary(List),[],Tail).
 
 
 %%% UTF-16, big and little endian versions, encoding and decoding
-to_utf16be(List) when list(List) -> lists:flatmap(fun to_utf16be/1, List);
+to_utf16be(List) when is_list(List) -> lists:flatmap(fun to_utf16be/1, List);
 to_utf16be(Ch) -> char_to_utf16be(Ch).
 
-from_utf16be(Bin) when binary(Bin) -> from_utf16be(Bin,[],[]);
+from_utf16be(Bin) when is_binary(Bin) -> from_utf16be(Bin,[],[]);
 from_utf16be(List) -> from_utf16be(list_to_binary(List),[],[]).
 
-from_utf16be(Bin,Tail) when binary(Bin) -> from_utf16be(Bin,[],Tail);
+from_utf16be(Bin,Tail) when is_binary(Bin) -> from_utf16be(Bin,[],Tail);
 from_utf16be(List,Tail) -> from_utf16be(list_to_binary(List),[],Tail).
 
-to_utf16le(List) when list(List) -> lists:flatmap(fun to_utf16le/1, List);
+to_utf16le(List) when is_list(List) -> lists:flatmap(fun to_utf16le/1, List);
 to_utf16le(Ch) -> char_to_utf16le(Ch).
 
-from_utf16le(Bin) when binary(Bin) -> from_utf16le(Bin,[],[]);
+from_utf16le(Bin) when is_binary(Bin) -> from_utf16le(Bin,[],[]);
 from_utf16le(List) -> from_utf16le(list_to_binary(List),[],[]).
 
-from_utf16le(Bin,Tail) when binary(Bin) -> from_utf16le(Bin,[],Tail);
+from_utf16le(Bin,Tail) when is_binary(Bin) -> from_utf16le(Bin,[],Tail);
 from_utf16le(List,Tail) -> from_utf16le(list_to_binary(List),[],Tail).
 
 
 %%% UTF-8 encoding and decoding
-to_utf8(List) when list(List) -> lists:flatmap(fun to_utf8/1, List);
+to_utf8(List) when is_list(List) -> lists:flatmap(fun to_utf8/1, List);
 to_utf8(Ch) -> char_to_utf8(Ch).
 
-from_utf8(Bin) when binary(Bin) -> from_utf8(Bin,[],[]);
+from_utf8(Bin) when is_binary(Bin) -> from_utf8(Bin,[],[]);
 from_utf8(List) -> from_utf8(list_to_binary(List),[],[]).
 
-from_utf8(Bin,Tail) when binary(Bin) -> from_utf8(Bin,[],Tail);
+from_utf8(Bin,Tail) when is_binary(Bin) -> from_utf8(Bin,[],Tail);
 from_utf8(List,Tail) -> from_utf8(list_to_binary(List),[],Tail).
 
 
@@ -365,7 +365,7 @@ from_ucs2le(Bin,Acc,Tail) ->
 %%% Possible errors decoding UTF-16:
 %%%	- Unmatched surrogate-pair code in string.
 %%%	- 16#FFFE or 16#FFFF character in string.
-char_to_utf16be(Ch) when integer(Ch), Ch >= 0 ->
+char_to_utf16be(Ch) when is_integer(Ch), Ch >= 0 ->
     if Ch =< 16#FFFF ->
 	    if Ch < 16#D800; Ch >= 16#E000, Ch < 16#FFFE ->
 		    [Ch bsr 8, Ch band 16#FF]
@@ -397,7 +397,7 @@ from_utf16be(Bin,Acc,Tail) ->
     io:format("ucs Error: Bin=~p~n     Acc=~p~n     Tail=~p~n",[Bin,Acc,Tail]),
     {error,not_utf16be}.
 
-char_to_utf16le(Ch) when integer(Ch), Ch >= 0 ->
+char_to_utf16le(Ch) when is_integer(Ch), Ch >= 0 ->
     if Ch =< 16#FFFF ->
 	    if Ch < 16#D800; Ch >= 16#E000, Ch < 16#FFFE ->
 		    [Ch band 16#FF, Ch bsr 8]
@@ -442,7 +442,7 @@ from_utf16le(Bin,Acc,Tail) ->
 %%% 	- Non-canonical encoding used.
 %%%	- Surrogate-pair code encoded as UTF-8.
 %%%	- 16#FFFE or 16#FFFF character in string.
-char_to_utf8(Ch) when integer(Ch), Ch >= 0 ->
+char_to_utf8(Ch) when is_integer(Ch), Ch >= 0 ->
     if Ch < 128 ->
 	    %% 0yyyyyyy
 	    [Ch];
@@ -559,43 +559,43 @@ is_incharset(In,Cs) when Cs=='ansi_x3.4-1968';Cs=='iso-ir-6';
 			 Cs=='ascii';Cs=='iso646-us';Cs=='us-ascii';Cs=='us';
 			 Cs=='ibm367';Cs=='cp367';Cs=='csascii' -> % US-ASCII
     if
-	integer(In) -> is_ascii(In);
-	list(In) -> test_charset(fun is_ascii/1,In)
+	is_integer(In) -> is_ascii(In);
+	is_list(In) -> test_charset(fun is_ascii/1,In)
     end;
 is_incharset(In,Cs) when Cs=='iso-10646-utf-1';Cs=='csiso10646utf1' ->
     if
-	integer(In) -> is_unicode(In);
-	list(In) -> test_charset(fun is_unicode/1, In)
+	is_integer(In) -> is_unicode(In);
+	is_list(In) -> test_charset(fun is_unicode/1, In)
     end;
 is_incharset(In,Cs) when Cs=='iso_646.basic:1983';Cs=='ref';
 			 Cs=='csiso646basic1983' ->
     if
-	integer(In) -> is_iso646_basic(In);
-	list(In) -> test_charset(fun is_iso646_basic/1, In)
+	is_integer(In) -> is_iso646_basic(In);
+	is_list(In) -> test_charset(fun is_iso646_basic/1, In)
     end;
 is_incharset(In,Cs) when Cs=='iso_8859-1:1987';Cs=='iso-ir-100';
 			 Cs=='iso_8859-1';Cs=='latin1';Cs=='l1';Cs=='ibm819';
 			 Cs=='cp819';Cs=='csisolatin1' ->
     if
-	integer(In) -> is_latin1(In);
-	list(In) -> test_charset(fun is_latin1/1, In)
+	is_integer(In) -> is_latin1(In);
+	is_list(In) -> test_charset(fun is_latin1/1, In)
     end;
-is_incharset(In,Charset) when integer(In) ->
+is_incharset(In,Charset) when is_integer(In) ->
     case to_unicode([In],Charset) of
 	{error,unsupported_charset} ->
 	    {error,unsupported_charset};
 	{error,_} ->
 	    false;
-	[Int] when integer(Int) ->
+	[Int] when is_integer(Int) ->
 	    true
     end;
-is_incharset(In,Charset) when list(In) ->
+is_incharset(In,Charset) when is_list(In) ->
     case to_unicode(In,Charset) of
 	{error,unsupported_charset} ->
 	    {error,unsupported_charset};
 	{error,_} ->
 	    false;
-	[Int] when integer(Int) ->
+	[Int] when is_integer(Int) ->
 	    true
     end.