Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
  • 4 commits
  • 5 files changed
  • 0 comments
  • 1 contributor
100  ext/nokogiri/xml_sax_parser.c
@@ -177,6 +177,102 @@ static void cdata_block(void * ctx, const xmlChar * value, int len)
177 177
   rb_funcall(doc, rb_intern("cdata_block"), 1, string);
178 178
 }
179 179
 
  180
+static void entity_declaration(void * ctx,
  181
+    const xmlChar *name,
  182
+    int type,
  183
+    const xmlChar *publicId,
  184
+    const xmlChar *systemId,
  185
+    xmlChar *content)
  186
+{
  187
+  VALUE self = (VALUE)ctx;
  188
+  VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
  189
+  VALUE doc = rb_funcall(self, rb_intern("document"), 0);
  190
+
  191
+  char * MAYBE_UNUSED(encoding) = RTEST(enc) ? StringValuePtr(enc) : NULL;
  192
+
  193
+  rb_funcall(doc,
  194
+      rb_intern("entity_declaration"),
  195
+      5,
  196
+      NOKOGIRI_STR_NEW2(name, encoding),
  197
+      INT2NUM(type),
  198
+      publicId == NULL ? Qnil : NOKOGIRI_STR_NEW2(publicId, encoding),
  199
+      systemId == NULL ? Qnil : NOKOGIRI_STR_NEW2(systemId, encoding),
  200
+      content == NULL ? Qnil : NOKOGIRI_STR_NEW2(content, enc)
  201
+  );
  202
+}
  203
+
  204
+static void notation_declaration(void * ctx,
  205
+    const xmlChar *name,
  206
+    const xmlChar *publicId,
  207
+    const xmlChar *systemId)
  208
+{
  209
+  VALUE self = (VALUE)ctx;
  210
+  VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
  211
+  VALUE doc = rb_funcall(self, rb_intern("document"), 0);
  212
+
  213
+  char * MAYBE_UNUSED(encoding) = RTEST(enc) ? StringValuePtr(enc) : NULL;
  214
+
  215
+  rb_funcall(doc,
  216
+      rb_intern("notation_declaration"),
  217
+      3,
  218
+      NOKOGIRI_STR_NEW2(name, encoding),
  219
+      publicId == NULL ? Qnil : NOKOGIRI_STR_NEW2(publicId, encoding),
  220
+      systemId == NULL ? Qnil : NOKOGIRI_STR_NEW2(systemId, encoding)
  221
+  );
  222
+}
  223
+
  224
+static void attribute_declaration(void * ctx,
  225
+    const xmlChar *elem,
  226
+    const xmlChar *fullname,
  227
+    int type,
  228
+    int def,
  229
+    const xmlChar *default_value,
  230
+    xmlEnumerationPtr tree)
  231
+{
  232
+  VALUE self = (VALUE)ctx;
  233
+  VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
  234
+  VALUE doc = rb_funcall(self, rb_intern("document"), 0);
  235
+
  236
+  char * MAYBE_UNUSED(encoding) = RTEST(enc) ? StringValuePtr(enc) : NULL;
  237
+
  238
+  VALUE value_set = rb_ary_new();
  239
+  while(NULL != tree) {
  240
+    rb_ary_push(value_set, NOKOGIRI_STR_NEW2(tree->name, encoding));
  241
+    tree = tree->next;
  242
+  }
  243
+
  244
+  rb_funcall(doc,
  245
+      rb_intern("attribute_declaration"),
  246
+      6,
  247
+      elem == NULL ? Qnil : NOKOGIRI_STR_NEW2(elem, encoding),
  248
+      fullname == NULL ? Qnil : NOKOGIRI_STR_NEW2(fullname, encoding),
  249
+      INT2NUM(type),
  250
+      INT2NUM(def),
  251
+      default_value == NULL ? Qnil : NOKOGIRI_STR_NEW2(default_value, encoding),
  252
+      value_set
  253
+  );
  254
+}
  255
+
  256
+static void internal_subset(void * ctx,
  257
+    const xmlChar *name,
  258
+    const xmlChar *externalId,
  259
+    const xmlChar *systemId)
  260
+{
  261
+  VALUE self = (VALUE)ctx;
  262
+  VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
  263
+  VALUE doc = rb_funcall(self, rb_intern("document"), 0);
  264
+
  265
+  char * MAYBE_UNUSED(encoding) = RTEST(enc) ? StringValuePtr(enc) : NULL;
  266
+
  267
+  rb_funcall(doc,
  268
+      rb_intern("internal_subset"),
  269
+      3,
  270
+      NOKOGIRI_STR_NEW2(name, encoding),
  271
+      externalId == NULL ? Qnil : NOKOGIRI_STR_NEW2(externalId, encoding),
  272
+      systemId == NULL ? Qnil : NOKOGIRI_STR_NEW2(systemId, encoding)
  273
+  );
  274
+}
  275
+
180 276
 static void deallocate(xmlSAXHandlerPtr handler)
181 277
 {
182 278
   NOKOGIRI_DEBUG_START(handler);
@@ -190,6 +286,9 @@ static VALUE allocate(VALUE klass)
190 286
 
191 287
   handler->startDocument = start_document;
192 288
   handler->endDocument = end_document;
  289
+  handler->entityDecl = entity_declaration;
  290
+  handler->notationDecl = notation_declaration;
  291
+  handler->attributeDecl = attribute_declaration;
193 292
   handler->startElement = start_element;
194 293
   handler->endElement = end_element;
195 294
   handler->characters = characters_func;
@@ -197,6 +296,7 @@ static VALUE allocate(VALUE klass)
197 296
   handler->warning = warning_func;
198 297
   handler->error = error_func;
199 298
   handler->cdataBlock = cdata_block;
  299
+  handler->internalSubset = internal_subset;
200 300
 
201 301
   return Data_Wrap_Struct(klass, NULL, deallocate, handler);
202 302
 }
37  lib/nokogiri/xml/sax/document.rb
@@ -119,6 +119,43 @@ def error string
119 119
         # +string+ contains the cdata content
120 120
         def cdata_block string
121 121
         end
  122
+
  123
+        ###
  124
+        # Called when an entity declaration is encountered.
  125
+        # +name+ is the entity name
  126
+        # +type+ is the entity type
  127
+        # +public_id+ is the public ID of the entity
  128
+        # +system_id+ is the system ID of the entity
  129
+        # +content+ is the entyti value (without processing)
  130
+        def entity_declaration name, type, public_id, system_id, content
  131
+        end
  132
+
  133
+        ###
  134
+        # Called when a notation declaration is encountered.
  135
+        # +name+ is the name of the notation
  136
+        # +public_id+ is the public ID of the entity
  137
+        # +system_id+ is the system ID of the entity
  138
+        def notation_declaration name, public_id, system_id
  139
+        end
  140
+
  141
+        ###
  142
+        # Called when an attribute declaration is parsed
  143
+        # +element_name+ is the name of the element
  144
+        # +attribute_name+ is the name of the attribute declared
  145
+        # +type+ is the type of attribute
  146
+        # +default_type+ is the default value type for the attribute
  147
+        # +default_value+ is the default value for the attribute
  148
+        # +value_set+ is a list of possible values (may be empty)
  149
+        def attribute_declaration element_name, attribute_name, type, default_type, default_value, value_set
  150
+        end
  151
+
  152
+        ###
  153
+        # Called when parsing an internal subset declaration
  154
+        # +name+ is the root element name
  155
+        # +external_id+ is the external id
  156
+        # +system_id+ is the system id, filename or URL
  157
+        def internal_subset name, external_id, system_id
  158
+        end
122 159
       end
123 160
     end
124 161
   end
2  lib/nokogiri/xslt.rb
@@ -24,7 +24,7 @@ class << self
24 24
       def parse string
25 25
         Stylesheet.parse_stylesheet_doc(XML.parse(string))
26 26
       end
27  
-      
  27
+
28 28
       ###
29 29
       # Quote parameters in +params+ for stylesheet safety
30 30
       def quote_params params
52  test/helper.rb
@@ -56,8 +56,25 @@ class TestCase < Nokogiri::TestCase
56 56
       class Doc < XML::SAX::Document
57 57
         attr_reader :start_elements, :start_document_called
58 58
         attr_reader :end_elements, :end_document_called
59  
-        attr_reader :data, :comments, :cdata_blocks
60  
-        attr_reader :errors, :warnings
  59
+        attr_reader :data, :comments, :cdata_blocks, :entity_declarations
  60
+        attr_reader :errors, :warnings, :notation_declarations
  61
+        attr_reader :attribute_declarations, :internal_subsets
  62
+
  63
+        def initialize
  64
+          @start_document_called = nil
  65
+          @end_document_called = nil
  66
+          @errors = []
  67
+          @warning = []
  68
+          @start_elements = []
  69
+          @end_elements = []
  70
+          @data = []
  71
+          @comments = []
  72
+          @cdata_blocks = []
  73
+          @entity_declarations = []
  74
+          @notation_declarations = []
  75
+          @attribute_declarations = []
  76
+          @internal_subsets = []
  77
+        end
61 78
 
62 79
         def start_document
63 80
           @start_document_called = true
@@ -70,42 +87,59 @@ def end_document
70 87
         end
71 88
 
72 89
         def error error
73  
-          (@errors ||= []) << error
  90
+          @errors << error
74 91
           super
75 92
         end
76 93
 
77 94
         def warning warning
78  
-          (@warning ||= []) << warning
  95
+          @warning << warning
79 96
           super
80 97
         end
81 98
 
82 99
         def start_element *args
83  
-          (@start_elements ||= []) << args
  100
+          @start_elements << args
84 101
           super
85 102
         end
86 103
 
87 104
         def end_element *args
88  
-          (@end_elements ||= []) << args
  105
+          @end_elements << args
89 106
           super
90 107
         end
91 108
 
92 109
         def characters string
93  
-          @data ||= []
94 110
           @data += [string]
95 111
           super
96 112
         end
97 113
 
98 114
         def comment string
99  
-          @comments ||= []
100 115
           @comments += [string]
101 116
           super
102 117
         end
103 118
 
104 119
         def cdata_block string
105  
-          @cdata_blocks ||= []
106 120
           @cdata_blocks += [string]
107 121
           super
108 122
         end
  123
+
  124
+        def entity_declaration name, type, public_id, system_id, content
  125
+          @entity_declarations << [name, type, public_id, system_id, content]
  126
+          super
  127
+        end
  128
+
  129
+        def notation_declaration name, public_id, system_id
  130
+          @notation_declarations << [name, public_id, system_id]
  131
+          super
  132
+        end
  133
+
  134
+        def attribute_declaration *args
  135
+          @attribute_declarations << args
  136
+          super
  137
+        end
  138
+
  139
+        def internal_subset name, external_id, system_id
  140
+          @internal_subsets << [name, external_id, system_id]
  141
+          super
  142
+        end
109 143
       end
110 144
     end
111 145
   end
38  test/xml/sax/test_parser.rb
@@ -9,6 +9,44 @@ def setup
9 9
           @parser = XML::SAX::Parser.new(Doc.new)
10 10
         end
11 11
 
  12
+        def test_internal_subset
  13
+          File.open(XML_FILE, 'rb') { |f| @parser.parse(f) }
  14
+          assert_equal ['staff', nil, 'staff.dtd'],
  15
+            @parser.document.internal_subsets.first
  16
+        end
  17
+
  18
+        def test_entity_declaration
  19
+          File.open(XML_FILE, 'rb') { |f|
  20
+            @parser.parse(f)
  21
+          }
  22
+          assert_equal 5, @parser.document.entity_declarations.length
  23
+          assert_equal %w{ ent1 ent2 ent3 ent4 ent1 },
  24
+            @parser.document.entity_declarations.map { |x| x.first }
  25
+        end
  26
+
  27
+        def test_notation_declaration
  28
+          File.open(XML_FILE, 'rb') { |f|
  29
+            @parser.parse(f)
  30
+          }
  31
+          assert_equal 2, @parser.document.notation_declarations.length
  32
+          assert_equal %w{ notation1 notation2 },
  33
+            @parser.document.notation_declarations.map { |x| x.first }
  34
+        end
  35
+
  36
+        def test_attribute_declaration
  37
+          @parser.parse(<<-eoxml)
  38
+<?xml version="1.0"?>
  39
+<!DOCTYPE staff SYSTEM "staff.dtd" [
  40
+   <!ELEMENT payment EMPTY>
  41
+   <!ATTLIST payment type (cash|check) "cash">
  42
+]>
  43
+<staff />
  44
+          eoxml
  45
+          assert_equal 1, @parser.document.attribute_declarations.length
  46
+          assert_equal ["payment", "type", 9, 1, "cash", ["cash", "check"]],
  47
+            @parser.document.attribute_declarations.first
  48
+        end
  49
+
12 50
         def test_bad_document_calls_error_handler
13 51
           @parser.parse('<foo><bar></foo>')
14 52
           assert @parser.document.errors

No commit comments for this range

Something went wrong with that request. Please try again.