-
Notifications
You must be signed in to change notification settings - Fork 1
/
bible.go
132 lines (116 loc) · 2.87 KB
/
bible.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package main
import (
"database/sql"
"encoding/xml"
_ "github.com/mattn/go-sqlite3"
"io"
"log"
"os"
"strconv"
"strings"
"unicode/utf8"
)
var (
dbPath = "../bible.db"
)
type Passage struct {
book string
chapter int
verse int
content string
}
func main() {
xmlFile, _ := os.Open("eng-kjv_usfx.xml")
defer xmlFile.Close()
os.Remove(dbPath)
db, e := sql.Open("sqlite3", dbPath)
if e != nil {
log.Printf("Got error opening database: %+v.", e)
os.Exit(1)
}
defer db.Close()
_, e = db.Exec(`CREATE TABLE bible (book, chapter INTEGER, verse INTEGER, content TEXT, PRIMARY KEY(book, chapter, verse))`)
if e != nil {
log.Printf("Got error creating table: %+v.", e)
os.Exit(1)
}
c := make(chan Passage)
go Parse(xmlFile, c)
for passage := range c {
_, e = db.Exec(`INSERT INTO bible VALUES (?, ?, ?, ?)`, passage.book, passage.chapter, passage.verse, passage.content)
if e != nil {
log.Printf("Got error inserting row: %+v.", e)
os.Exit(1)
}
}
// Inserts are slightly faster if we do this after the table is built
_, e = db.Exec(`CREATE INDEX bible_book_chapter ON bible(book, chapter)`)
if e != nil {
log.Printf("Got error creating index: %+v.", e)
os.Exit(1)
}
}
func Parse(reader io.Reader, c chan Passage) {
var book, content string
var chapter, verse int
var isValidContent bool
decoder := xml.NewDecoder(reader)
for {
token, e := decoder.Token()
if e != nil {
if e != io.EOF {
log.Printf("Error parsing usfx file: %+v", e)
}
close(c)
return
}
switch element := token.(type) {
case xml.StartElement:
switch element.Name.Local {
case "book":
book = getAttribute(element, "id")
case "c":
chapter, _ = strconv.Atoi(getAttribute(element, "id"))
case "v":
verse, _ = strconv.Atoi(getAttribute(element, "id"))
isValidContent = true
case "ve":
// Verse end
c <- Passage{book, chapter, verse, strings.TrimSpace(content)}
content = ""
isValidContent = false
case "f":
// ignore footnotes
isValidContent = false
}
case xml.EndElement:
switch element.Name.Local {
case "f":
// Exclude this text from the verse content
isValidContent = true
}
case xml.CharData:
if isValidContent {
content = strings.TrimSpace(content)
text := strings.TrimSpace(string(element))
text = strings.Replace(text, "¶", "", -1)
first, _ := utf8.DecodeRuneInString(text)
last, _ := utf8.DecodeLastRuneInString(content)
if first == '.' || first == ',' || first == '?' || first == '!' || first == ':' || first == ';' || last == '(' {
content += text
} else {
content += " " + text
}
}
case xml.ProcInst, xml.Directive, xml.Comment:
}
}
}
func getAttribute(element xml.StartElement, attribute string) string {
for i := range element.Attr {
if element.Attr[i].Name.Local == attribute {
return element.Attr[i].Value
}
}
return ""
}