forked from readium/readium-lcp-server
/
reader.go
211 lines (184 loc) · 5.17 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
// Copyright 2019 European Digital Reading Lab. All rights reserved.
// Licensed to the Readium Foundation under one or more contributor license agreements.
// Use of this source code is governed by a BSD-style license
// that can be found in the LICENSE file exposed on Github (readium) in the project repository.
package epub
import (
"archive/zip"
"encoding/xml"
"fmt"
"io"
"path/filepath"
"sort"
"strings"
"github.com/fubaydullaev/readium-lcp-server/epub/opf"
"github.com/fubaydullaev/readium-lcp-server/xmlenc"
"golang.org/x/net/html/charset"
)
// root element of the opf
const (
RootFileElement = "rootfile"
)
type rootFile struct {
FullPath string `xml:"full-path,attr"`
MediaType string `xml:"media-type,attr"`
}
// findRootFiles looks for the epub root files
func findRootFiles(r io.Reader) ([]rootFile, error) {
xd := xml.NewDecoder(r)
// deal with non utf-8 xml files
xd.CharsetReader = charset.NewReaderLabel
var roots []rootFile
for x, err := xd.Token(); x != nil && err == nil; x, err = xd.Token() {
if err != nil {
return nil, err
}
switch x.(type) {
case xml.StartElement:
start := x.(xml.StartElement)
if start.Name.Local == RootFileElement {
var file rootFile
err = xd.DecodeElement(&file, &start)
if err != nil {
return nil, err
}
roots = append(roots, file)
}
}
}
return roots, nil
}
func (ep *Epub) addCleartextResource(name string) {
if ep.cleartextResources == nil {
ep.cleartextResources = []string{}
}
ep.cleartextResources = append(ep.cleartextResources, name)
}
// Read reads the opf file in the zip passed as a parameter,
// selects resources which mustn't be encrypted
// and returns an EPUB object
func Read(r *zip.Reader) (Epub, error) {
var ep Epub
container, err := findFileInZip(r, ContainerFile)
if err != nil {
return ep, err
}
fd, err := container.Open()
if err != nil {
return ep, err
}
defer fd.Close()
rootFiles, err := findRootFiles(fd)
if err != nil {
return ep, err
}
packages := make([]opf.Package, len(rootFiles))
for i, rootFile := range rootFiles {
ep.addCleartextResource(rootFile.FullPath)
file, err := findFileInZip(r, rootFile.FullPath)
if err != nil {
return ep, err
}
packageFile, err := file.Open()
if err != nil {
return ep, err
}
defer packageFile.Close()
packages[i], err = opf.Parse(packageFile)
if err != nil {
fmt.Println("Error parsing the opf file")
return ep, err
}
packages[i].BasePath = filepath.Dir(rootFile.FullPath)
addCleartextResources(&ep, packages[i])
}
var resources []*Resource
var encryption *xmlenc.Manifest
f, err := findFileInZip(r, EncryptionFile)
if err == nil {
r, err := f.Open()
if err != nil {
return Epub{}, err
}
defer r.Close()
m, err := xmlenc.Read(r)
encryption = &m
}
for _, file := range r.File {
// EPUBs do not require us to keep directory entries and we cannot process them
if file.FileInfo().IsDir() {
continue
}
if file.Name != EncryptionFile &&
file.Name != "mimetype" {
rc, err := file.Open()
if err != nil {
return Epub{}, err
}
compressed := false
if encryption != nil {
if data, ok := encryption.DataForFile(file.Name); ok {
if data.Properties != nil {
for _, prop := range data.Properties.Properties {
if prop.Compression.Method == 8 {
compressed = true
break
}
}
}
}
}
resource := &Resource{Path: file.Name, Contents: rc, StorageMethod: file.Method, OriginalSize: file.FileHeader.UncompressedSize64, Compressed: compressed}
if item, ok := findResourceInPackages(resource, packages); ok {
resource.ContentType = item.MediaType
}
resources = append(resources, resource)
}
if strings.HasPrefix(file.Name, "META-INF") {
ep.addCleartextResource(file.Name)
}
}
ep.Package = packages
ep.Resource = resources
ep.Encryption = encryption
sort.Strings(ep.cleartextResources)
return ep, nil
}
// addCleartextResources searches for resources which must no be encrypted
// i.e. cover, nav and NCX
func addCleartextResources(ep *Epub, p opf.Package) {
var coverImageID string
coverImageID = "cover-image"
for _, meta := range p.Metadata.Metas {
if meta.Name == "cover" {
coverImageID = meta.Content
}
}
// Look for cover, nav and NCX items
for _, item := range p.Manifest.Items {
if strings.Contains(item.Properties, "cover-image") ||
item.ID == coverImageID ||
strings.Contains(item.Properties, "nav") ||
item.MediaType == ContentType_NCX {
// re-construct a path, avoid insertion of backslashes as separator on Windows
path := filepath.ToSlash(filepath.Join(p.BasePath, item.Href))
ep.addCleartextResource(path)
}
}
}
// findResourceInPackages returns an opf item which corresponds to
// the path of the resource given as parameter
func findResourceInPackages(r *Resource, packages []opf.Package) (opf.Item, bool) {
for _, p := range packages {
relative, err := filepath.Rel(p.BasePath, r.Path)
if err != nil {
return opf.Item{}, false
}
// avoid insertion of backslashes as separator on Windows
relative = filepath.ToSlash(relative)
if item, ok := p.Manifest.ItemWithPath(relative); ok {
return item, ok
}
}
return opf.Item{}, false
}