forked from kkyr/go-recipe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
recipe.go
138 lines (114 loc) · 3.07 KB
/
recipe.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package ld
import (
"encoding/json"
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/piprate/json-gold/ld"
)
const (
contextKey = "@context"
graphKey = "@graph"
typeKey = "@type"
recipeType = "Recipe"
schemaURL = "http://schema.org/"
jsonLdSelector = `script[type="application/ld+json"]`
)
// NewRecipeProcessor returns a RecipeProcessor with default settings.
func NewRecipeProcessor() *RecipeProcessor {
return &RecipeProcessor{
proc: ld.NewJsonLdProcessor(),
opts: ld.NewJsonLdOptions(""),
ctx: map[string]any{
contextKey: schemaURL,
typeKey: recipeType,
},
}
}
// RecipeProcessor is a json-ld Schema Recipe processor.
type RecipeProcessor struct {
proc *ld.JsonLdProcessor
opts *ld.JsonLdOptions
ctx map[string]any
}
// GetRecipeNode searches doc to find a Schema.org Recipe node encoded in ld+json format.
// If found, the Recipe is serialized into a map. Individual recipe fields can be accessed
// in the map using the field names defined in https://schema.org/Recipe.
func (rp *RecipeProcessor) GetRecipeNode(doc *goquery.Document) (map[string]any, error) {
jsonLdDocs := doc.Find(jsonLdSelector).Map(func(_ int, sel *goquery.Selection) string {
return sel.Text()
})
if len(jsonLdDocs) == 0 {
return nil, fmt.Errorf("no ld+json document found")
}
var (
node map[string]any
err error
)
for _, doc := range jsonLdDocs {
// Some websites (e.x. AllRecipes.com) have their schema wrapped in a list
doc = strings.TrimSpace(doc)
if strings.HasPrefix(doc, "[") {
doc, _ = strings.CutPrefix(doc, "[")
}
if strings.HasSuffix(doc, "]") {
doc, _ = strings.CutSuffix(doc, "]")
}
if node, err = rp.parseJSON(doc); err == nil {
return node, nil
}
}
return nil, err
}
func (rp *RecipeProcessor) parseJSON(data string) (map[string]any, error) {
var nodeMap map[string]any
if err := json.Unmarshal([]byte(data), &nodeMap); err != nil {
return nil, fmt.Errorf("unmarshal data failed: %w", err)
}
var nodes []any
if isGraphNode(nodeMap) {
nodes = ld.Arrayify(nodeMap[graphKey])
} else {
nodes = ld.Arrayify(nodeMap)
}
recipeNode, ok := findRecipeNode(nodes)
if !ok {
return nil, fmt.Errorf("could not find Recipe node")
}
addSchemaCtx(recipeNode)
recipeNode, err := rp.proc.Compact(recipeNode, rp.ctx, rp.opts)
if err != nil {
return nil, fmt.Errorf("could not compact Recipe node: %w", err)
}
return recipeNode, nil
}
func isGraphNode(v any) bool {
vMap, isMap := v.(map[string]any)
_, containsGraph := vMap[graphKey]
return isMap && containsGraph
}
func addSchemaCtx(v any) {
vMap, isMap := v.(map[string]any)
_, containsCtx := vMap[contextKey]
if isMap && !containsCtx {
vMap[contextKey] = schemaURL
}
}
func findRecipeNode(nodes []any) (map[string]any, bool) {
for _, node := range nodes {
if m, ok := node.(map[string]any); ok {
if t, ok := m[typeKey].(string); ok {
if t == recipeType {
return m, true
}
} else if t, ok := m[typeKey].([]interface{}); ok {
for _, v := range t {
if v == recipeType {
return m, true
}
}
}
}
}
return nil, false
}