Skip to content

Commit

Permalink
xml-motor ~ 1st commit ~ version 0.0.1 ~ a (non-native installation) …
Browse files Browse the repository at this point in the history
…XML Parsing Gem with a new algo
  • Loading branch information
abhishekkr committed Nov 5, 2011
0 parents commit cd3f70e
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Gemfile
@@ -0,0 +1,4 @@
source "http://rubygems.org"

# Specify your gem's dependencies in xml-motor.gemspec
gemspec
1 change: 1 addition & 0 deletions Rakefile
@@ -0,0 +1 @@
require "bundler/gem_tasks"
267 changes: 267 additions & 0 deletions lib/xml-motor.rb
@@ -0,0 +1,267 @@
#!/usr/bin/env ruby

module XMLIndexHandler
def self.get_node_indexes(xml_motor, tag)
xml_idx_to_find = []
xml_motor.xmltags[tag.split(".")[0]].each_value do |v|
xml_idx_to_find.push v
end
xml_idx_to_find = xml_idx_to_find.flatten

tag.split(".")[1..-1].each do |tag_i|
outer_idx = xml_idx_to_find
osize=outer_idx.size/2 -1
x_curr=[]
xml_motor.xmltags[tag_i].each_value do |v|
x_curr.push v
end
x_curr = x_curr.flatten
xsize=x_curr.size/2 -1

xml_idx_to_find = expand_node_indexes outer_idx, osize, x_curr, xsize
end
xml_idx_to_find
end

def self.expand_node_indexes(outer_idx, osize, x_curr, xsize)
exapnded_node_indexes = []
0.upto osize do |o|
o1=outer_idx[o*2]
o2=outer_idx[o*2 +1]
0.upto xsize do |x|
x1=x_curr[x*2]
x2=x_curr[x*2 +1]
unless o1>x1 or o2<x2
exapnded_node_indexes.push x1
exapnded_node_indexes.push x2
end
end
end
exapnded_node_indexes.flatten
end
end

module XMLChopper
def self.get_tag_attrib_value(tag_value)
tag_value_split = tag_value.split(/>/)
in_tag = tag_value_split.first
out_tag = tag_value_split[1..-1].join
in_tag_split = in_tag.split(/[ \t]/)
tag_name = in_tag_split.first
attribzone = in_tag_split[1..-1].flatten.join(' ')
attrs = get_attribute_hash attribzone
[[tag_name,attrs],out_tag]
end

def self.get_attribute_hash(attribzone)
attribzone = attribzone.strip unless attribzone.nil?
return nil if attribzone.nil? or attribzone==""
attrs = {}
broken_attrib = attribzone.split(/=/)
attribs = broken_attrib.first.strip
values = nil
broken_attrib[1..-2].each do |attrib_part|
value_n_attrib = attrib_part.split(/[ \t]/)
values = value_n_attrib[0..-2].join(' ')
attrs[attribs] = values
attribs = value_n_attrib[-1].strip
end
values = broken_attrib.last.strip
attrs[attribs] = values
attrs
end
end

module XMLJoiner
def self.dejavu_attributes(attrib_hash)
return nil if attrib_hash.nil?
attributes=""
attrib_hash.each_key do |hash_key|
attributes += " " + hash_key + "=" + attrib_hash[hash_key]
end
attributes
end
end

##
# main class
##

module XMLMotorHandler
def self._splitter_(xmldata)
@xmlnodes=[xmldata.split(/</)[0]]
xmldata.split(/</)[1..-1].each do |x1|
@xmlnodes.push XMLChopper.get_tag_attrib_value(x1)
end
end

def self._indexify_
@xmltags = {}
idx = 1
depth = 0
@xmlnodes[1..-1].each do |xnode|
tag_name = xnode[0][0]
unless tag_name.match(/^\/.*/) then
@xmltags[tag_name] ||= {}
@xmltags[tag_name][depth] ||= []
@xmltags[tag_name][depth].push idx
depth += 1
else
depth -= 1
@xmltags[tag_name[1..-1]][depth] ||= []
@xmltags[tag_name[1..-1]][depth].push idx
end
idx +=1
end
end

def self._grab_my_node_ (tag)
xml_to_find = XMLIndexHandler.get_node_indexes self, tag
nodes = []
node_count = xml_to_find.size/2 -1
0.upto node_count do |ncount|
nodes[ncount] = ""
node_start = xml_to_find[ncount*2]
node_stop = xml_to_find[ncount*2 +1]
nodes[ncount] += @xmlnodes[node_start][1] unless @xmlnodes[node_start][1].nil?
(node_start+1).upto (node_stop-1) do |node_idx|
any_attrib ||= ""
any_attrib = XMLJoiner.dejavu_attributes(@xmlnodes[node_idx][0][1]).to_s unless @xmlnodes[node_idx][0][1].nil?
nodes[ncount] += "<" + @xmlnodes[node_idx][0][0] + any_attrib + ">"
nodes[ncount] += @xmlnodes[node_idx][1] unless @xmlnodes[node_idx][1].nil?
end
end
nodes
end

def self.xml_handler(xmldata, tag_to_find)
_splitter_ xmldata
_indexify_
my_nodes = _grab_my_node_ tag_to_find
my_nodes
end

def self.xmlnodes
@xmlnodes
end

def self.xmltags
@xmltags
end
end

##
# XMLMotor_EXECUTIONER ;)
##

module XMLMotor
def self.if_no_args(argv=[])
if argv.size==0
puts <<-eof
XMLMotor got no OIL to run :)
No Arguments Provided.
[As A Ruby Gem] How To Use:
Loading:
+ $ gem install xml-motor
+ 'require' the 'xml-motor'
[As A Code Library] How To Use:
Loading:
+ 'require' the 'parser.rb'
Usage:
+ To find values of an xml node from an xml file
XMLMotor.get_node_from_file <file_with_path>, <node>
+ To find values of an xml node from an xml string
XMLMotor.get_node_from_content <xml_string>, <node>
Example Calls As Code:
+ XMLMotor.new.get_node_from_content "<A>a</A><B><A>ba</A></B>", "A"
RETURNS: ["a", "ba"]
+ XMLMotor.new.get_node_from_content "<A>a</A><B><A>ba</A></B>", "B.A"
RETURNS: ["ba"]
[Directly As a Tool] How To Use:
Syntax:
+ To find values of an xml node from an xml file
$ ruby run_as_tool.rb -f <xml_file> -n <node_to_find>
+ To find values of an xml node from an xml string
$ ruby run_as_tool.rb -s <xml_string> -n <node_to_find>
+ To find values of an xml node from an xml file & string, both
$ ruby run_as_tool.rb -f <xml_file> -s <xml_string> -n <node_to_find>
Example Run As Tool:
$ ruby run_as_tool.rb -n "A" -s "<A>a</A><B><A>ba</A></B>"
DISPLAYS:
a
ba
$ ruby run_as_tool.rb -n "B.A" -s "<A>a</A><B><A>ba</A></B>"
DISPLAYS:
ba
eof
return false
end
true
end

def self.get_file_str_node(argv)
switch=nil
file_str_node = {}
argv.each do |args|
if ["-f","-s","-n"].include?(switch) and ["-f","-s","-n"].include?(args)
puts "You provided an empty switch #{switch}, still trying to execute with rest of information provided."
switch = args
next
end

case switch
when "-f"
file_str_node["file"] = args
when "-s"
file_str_node["content"] = args
when "-n"
file_str_node["my_node"] = args
end
switch = args
end

if file_str_node["file"].nil? and file_str_node["content"].nil?
puts "No xml string or file provided. Provide it with '-s|-f' switch.\n"
return if_no_args
elsif file_str_node["my_node"].nil?
puts "No node provided to search. Provide it with '-n' switch."
return if_no_args
end

file_str_node
end

def self.get_node_from_file(file, my_node)
unless file.nil?
if File.readable? file
begin
return XMLMotorHandler.xml_handler File.read(file), my_node
rescue
puts "Error: problem parsing File Content"
end
else
puts "Error: #{file} is not readable."
end
end
return ""
end

def self.get_node_from_content(content, my_node)
unless content.nil?
begin
return XMLMotorHandler.xml_handler content, my_node
rescue
puts "Error problem parsing String Content #{content}"
end
end
return ""
end
end

5 changes: 5 additions & 0 deletions lib/xml-motor/version.rb
@@ -0,0 +1,5 @@
module Xml
module Motor
VERSION = "0.0.1"
end
end
2 changes: 2 additions & 0 deletions make_my_gem.sh
@@ -0,0 +1,2 @@
echo "Building GEM File"
gem build xml-motor.gemspec
42 changes: 42 additions & 0 deletions xml-motor.gemspec
@@ -0,0 +1,42 @@
# -*- encoding: utf-8 -*-
$:.push File.expand_path("../lib", __FILE__)
require "xml-motor/version"

Gem::Specification.new do |s|
s.name = "xml-motor"
s.version = Xml::Motor::VERSION
s.authors = ["abhishekkr"]
s.email = ["abhishk@thoughtworks.com"]
s.homepage = "http://github.com/abhishekkr/rubygem_xml_motor"
s.summary = %q{An easy-to-use XML Parser without any Native Dependencies}
s.description = %q{A new short XML Parsing Algorithm implemented directly in >500 lines. An easy-to-use XML Parser without any Native Dependencies.
[How To Use]:
Loading:
+ $ gem install xml-motor
+ 'require' the 'xml-motor'
Usage:
+ To find values of an xml node from an xml file
XMLMotor.get_node_from_file <file_with_path>, <node>
+ To find values of an xml node from an xml string
XMLMotor.get_node_from_content <xml_string>, <node>
Example Calls As Code:
+ XMLMotor.get_node_from_content "<A>a</A><B><A>ba</A></B>", "A"
RETURNS: ["a", "ba"]
+ XMLMotor.get_node_from_content "<A>a</A><B><A>ba</A></B>", "B.A"
RETURNS: ["ba"]
}

s.rubyforge_project = "xml-motor"

s.files = `git ls-files`.split("\n")
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
s.require_paths = ["lib"]

# specify any dependencies here; for example:
# s.add_development_dependency "rspec"
# s.add_runtime_dependency "rest-client"
end

0 comments on commit cd3f70e

Please sign in to comment.