-
Notifications
You must be signed in to change notification settings - Fork 2
/
parser.rb
138 lines (116 loc) · 4.1 KB
/
parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Public: Org File format parser
#
# A simple regexp based parser for orgfiles. Works by simply dividng
# the file in beginning, nodes, and ending. After this it will
# parse the individual nodes extracting remaining detailed information.
#
# Parser is decoupled from object model to make it easy to write updated
# parsers or use a database to serialize an org-mode file out of.
require 'org_mode'
require 'org_mode/node_utils'
require 'date'
module OrgMode
class FileParser
RxNodeTitle = %r{
^ # beginning of line
(
\*+ # multiple stars
\s+ # one or more whitespace
.* # anything
)
$ # untill _end of line
}xs
class << self
# Public: parses buffer into nodes and
# collects there in a OrgMode::File object
#
# Returns OrgMode::File object containing all
# information of the file.
def parse(buffer)
b, nodes, e = parse_buffer(buffer)
parsed_nodes = parse_nodes(nodes)
root_nodes = NodeUtils.convert_sequential_nodelist_into_tree(parsed_nodes)
return File.new(b,root_nodes,e)
end
def parse_nodes(nodes)
nodes.map do |title,content|
NodeParser.parse(title,content)
end
end
def parse_buffer(buffer)
beginning_of_file, nodes, ending_of_file =
parse_into_tokens(buffer)
end
# Private: splits buffer into different parts
# First part is beginning of file
# Second part are the nodetitles in combination
# with the content
# Third part is the ending of the file
#
# buffer - org mode data
#
# Returns beginning_of_file, nodes, and ending
# if beginning is not present and empy string is
# returned. This function will never return nil
#
def parse_into_tokens buffer
tokens = buffer.split(RxNodeTitle).map(&:rstrip)
beginning_of_file = tokens.shift || ''
nodes = []
while !tokens.empty?
nodes << Array.new(2) { tokens.shift || '' }
end
nodes.map! { |t,c| [t,c[1..-1] || ''] }
[ beginning_of_file, nodes, "" ]
end
end
end
class NodeParser
class << self
# Public: Parses a node in the org-mode file-format
#
# title - a org-mode title, can contain date, todo statusses, tags
# everything specified in the org-mod file format
# content - the content block, which can also contain org-mode format
#
# Return a OrgMode::Node containing all parsable information
def parse(title,content)
node = OrgMode::Node.new
parse_title(node, title)
parse_extract_dates(node)
parse_content(node, content)
node
end
private
def parse_title(node,title)
matches = title.match( /^(\*+)\s+(TODO|DONE)?(.*)$/ )
node.stars = matches[1].length
node.todo_state = matches[2]
node.title = matches[3]
#node.indent = node.stars + 1
end
RxDateRegexp = /<(\d+-\d+-\d+)(?:\s(?:\w{3})?(?:\s(\d+:\d+))?)(?:-(\d+:\d+))?>/
def parse_extract_dates(node)
_, extracted_date, start_time, end_time = node.title.match(RxDateRegexp).to_a
node.title = node.title.gsub(RxDateRegexp, '')
node.title.strip!
node.date = DateTime.parse(extracted_date) if extracted_date
node.date_start_time = DateTime.parse("#{extracted_date} #{start_time}") if start_time
node.date_end_time = DateTime.parse("#{extracted_date} #{end_time}") if end_time
end
RxEmptyLine = /^\s*$/
def parse_content(node,content)
return unless content
minimum_indent = ( content.lines.map {|l| l =~ /\S/ }.reject(&:nil?) + [node.indent] ).min
content.gsub!(/^\s{#{minimum_indent}}/m, '')
# remove empty lines at beginning and ending
node.content = content.lines.
drop_while {|e| e =~ RxEmptyLine}.
reverse.
drop_while {|e| e =~ RxEmptyLine}.
reverse.
join
end
end
end
end