forked from andreaskoch/allmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.go
138 lines (103 loc) · 3.36 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// Copyright 2014 Andreas Koch. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package parser
import (
"bufio"
"bytes"
"fmt"
"io"
"github.com/andreaskoch/allmark/common/logger"
"github.com/andreaskoch/allmark/dataaccess"
"github.com/andreaskoch/allmark/model"
"github.com/andreaskoch/allmark/services/parser/cleanup"
"github.com/andreaskoch/allmark/services/parser/document"
"github.com/andreaskoch/allmark/services/parser/presentation"
"github.com/andreaskoch/allmark/services/parser/typedetection"
)
type Parser struct {
logger logger.Logger
}
func New(logger logger.Logger) (Parser, error) {
return Parser{
logger: logger,
}, nil
}
func (parser *Parser) ParseItem(item dataaccess.Item) (*model.Item, error) {
if item == nil {
return nil, fmt.Errorf("Cannot parse an empty item.")
}
parser.logger.Debug("Parsing item %q", item.String())
route := item.Route()
// convert the files
files := parser.convertFiles(item.Files())
// create a new item model
itemModel := model.NewItem(route, files, item.Type())
// capture the last modified date
lastModifiedDate, err := item.LastModified()
if err != nil {
return nil, fmt.Errorf("Cannot determine last modified date for item %q. Error: %s", item, err.Error())
}
// fetch the item data
data, err := getItemData(item)
if err != nil {
return nil, fmt.Errorf("Cannot get data from item %q. Error: %s", item, err.Error())
}
// capture the markdown
itemModel.Markdown = string(data)
// split the markdown content into separate lines
lines := getLines(bytes.NewReader(data))
lines = cleanup.Cleanup(lines)
// detect the item type
switch itemModel.Type = typedetection.DetectType(lines); itemModel.Type {
case model.TypeDocument, model.TypeRepository:
{
if _, err := document.Parse(itemModel, lastModifiedDate, lines); err != nil {
return nil, fmt.Errorf("Unable to parse item %q (Type: %s, Error: %s)", item, itemModel.Type, err.Error())
}
}
case model.TypePresentation:
{
if err := presentation.Parse(itemModel, lastModifiedDate, lines); err != nil {
return nil, fmt.Errorf("Unable to parse item %q (Type: %s, Error: %s)", item, itemModel.Type, err.Error())
}
}
default:
return nil, fmt.Errorf("Cannot parse item %q. Unknown item type.", item)
}
// item hash
hash, err := item.Hash()
if err != nil {
return nil, fmt.Errorf("Unable to determine the hash for item %q. Error: %s", item, err.Error())
}
itemModel.Hash = hash
return itemModel, nil
}
func (parser *Parser) ParseFile(file dataaccess.File) (*model.File, error) {
return &model.File{
file,
}, nil
}
func (parser *Parser) convertFiles(dataaccessFiles []dataaccess.File) []*model.File {
convertedFiles := make([]*model.File, 0, len(dataaccessFiles))
for _, file := range dataaccessFiles {
if convertedFile, err := parser.ParseFile(file); err == nil {
convertedFiles = append(convertedFiles, convertedFile)
}
}
return convertedFiles
}
func getItemData(item dataaccess.Item) ([]byte, error) {
// fetch the item data
byteBuffer := new(bytes.Buffer)
dataWriter := bufio.NewWriter(byteBuffer)
contentReader := func(content io.ReadSeeker) error {
_, err := io.Copy(dataWriter, content)
dataWriter.Flush()
return err
}
if err := item.Data(contentReader); err != nil {
return nil, err
}
return byteBuffer.Bytes(), nil
}