Skip to content

Commit

Permalink
web ui: index and basic details page, api refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
derfenix committed Apr 4, 2023
1 parent 2a8b941 commit f47dbef
Show file tree
Hide file tree
Showing 21 changed files with 821 additions and 58 deletions.
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,17 @@ variables:
* **LOGGING_DEBUG** — enable debug logs (default `false`)
* **API**
* **API_ADDRESS** — address the API server will listen (default `0.0.0.0:5001`)
* **UI**
* **UI_ENABLED** — Enable builtin web UI (default `true`)
* **UI_PREFIX** — Prefix for the web UI (default `/`)
* **UI_THEME** — UI theme name (default `basic`). No other values available yet
* **PDF**
* **PDF_LANDSCAPE** — use landscape page orientation instead of portrait (default `false`)
* **PDF_GRAYSCALE** — use grayscale filter for the output pdf (default `false`)
* **PDF_MEDIA_PRINT** — use media type `print` for the request (default `true`)
* **PDF_ZOOM** — zoom page (default `1.0` i.e. no actual zoom)
* **PDF_VIEWPORT** — use specified viewport value (default `1920x1080`)
* **PDF_DPI** — use specified DPI value for the output pdf (default `300`)
* **PDF_VIEWPORT** — use specified viewport value (default `1280x720`)
* **PDF_DPI** — use specified DPI value for the output pdf (default `150`)
* **PDF_FILENAME** — use specified name for output pdf file (default `page.pdf`)


Expand Down Expand Up @@ -60,7 +64,7 @@ docker compose up -d webarchive
### 2. Add a page

```shell
curl -X POST --location "http://localhost:5001/pages" \
curl -X POST --location "http://localhost:5001/api/v1/pages" \
-H "Content-Type: application/json" \
-d "{
\"url\": \"https://github.com/wkhtmltopdf/wkhtmltopdf/issues/1937\",
Expand All @@ -75,13 +79,13 @@ or

```shell
curl -X POST --location \
"http://localhost:5001/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar"
"http://localhost:5001/api/v1/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar"
```

### 3. Get the page's info

```shell
curl -X GET --location "http://localhost:5001/pages/$page_id" | jq .
curl -X GET --location "http://localhost:5001/api/v1/pages/$page_id" | jq .
```
where `$page_id` — value of the `id` field from previous command response.
If `status` field in response is `success` (or `with_errors`) - the `results` field
Expand All @@ -90,15 +94,15 @@ will contain all processed formats with ids of the stored files.
### 4. Open file in browser

```shell
xdg-open "http://localhost:5001/pages/$page_id/file/$file_id"
xdg-open "http://localhost:5001/api/v1/pages/$page_id/file/$file_id"
```
Where `$page_id` — value of the `id` field from previous command response, and
`$file_id` — the id of interesting file.

### 5. List all stored pages

```shell
curl -X GET --location "http://localhost:5001/pages" | jq .
curl -X GET --location "http://localhost:5001/api/v1/pages" | jq .
```

## Roadmap
Expand Down
1 change: 1 addition & 0 deletions adapters/processors/pdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func (p *PDF) Process(_ context.Context, url string) ([]entity.File, error) {
page.FooterFontSize.Set(10)
page.Zoom.Set(p.cfg.Zoom)
page.ViewportSize.Set(p.cfg.Viewport)
page.NoBackground.Set(true)

gen.AddPage(page)

Expand Down
63 changes: 63 additions & 0 deletions adapters/processors/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"net/http/cookiejar"
"time"

"golang.org/x/net/html"

"github.com/derfenix/webarchive/config"
"github.com/derfenix/webarchive/entity"
)
Expand Down Expand Up @@ -52,6 +54,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {
}

procs := Processors{
client: httpClient,
processors: map[entity.Format]processor{
entity.FormatHeaders: NewHeaders(httpClient),
entity.FormatPDF: NewPDF(cfg.PDF),
Expand All @@ -64,6 +67,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {

type Processors struct {
processors map[entity.Format]processor
client *http.Client
}

func (p *Processors) Process(ctx context.Context, format entity.Format, url string) entity.Result {
Expand Down Expand Up @@ -93,3 +97,62 @@ func (p *Processors) OverrideProcessor(format entity.Format, proc processor) err

return nil
}

func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return entity.Meta{}, fmt.Errorf("new request: %w", err)
}

response, err := p.client.Do(req)
if err != nil {
return entity.Meta{}, fmt.Errorf("do request: %w", err)
}

if response.StatusCode != http.StatusOK {
return entity.Meta{}, fmt.Errorf("want status 200, got %d", response.StatusCode)
}

if response.Body == nil {
return entity.Meta{}, fmt.Errorf("empty response body")
}

defer func() {
_ = response.Body.Close()
}()

htmlNode, err := html.Parse(response.Body)
if err != nil {
return entity.Meta{}, fmt.Errorf("parse response body: %w", err)
}

meta := entity.Meta{}
getMetaData(htmlNode, &meta)

return meta, nil
}

func getMetaData(n *html.Node, meta *entity.Meta) {
if n == nil {
return
}

for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.ElementNode && c.Data == "title" {
meta.Title = c.FirstChild.Data
}
if c.Type == html.ElementNode && c.Data == "meta" {
attrs := make(map[string]string)
for _, attr := range c.Attr {
attrs[attr.Key] = attr.Val
}

name, ok := attrs["name"]
if ok && name == "description" {
meta.Description = attrs["content"]
}
}

getMetaData(c, meta)
}
}
64 changes: 61 additions & 3 deletions adapters/repository/badger/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,18 @@ func (p *Page) GetFile(_ context.Context, pageID, fileID uuid.UUID) (*entity.Fil
return file, nil
}

func (p *Page) Save(_ context.Context, site *entity.Page) error {
func (p *Page) Save(_ context.Context, page *entity.Page) error {
if p.db.IsClosed() {
return ErrDBClosed
}

marshaled, err := marshal(site)
marshaled, err := marshal(page)
if err != nil {
return fmt.Errorf("marshal data: %w", err)
}

if err := p.db.Update(func(txn *badger.Txn) error {
if err := txn.Set(p.key(site), marshaled); err != nil {
if err := txn.Set(p.key(page), marshaled); err != nil {
return fmt.Errorf("put data: %w", err)
}

Expand Down Expand Up @@ -151,6 +151,64 @@ func (p *Page) ListAll(ctx context.Context) ([]*entity.Page, error) {
Formats: page.Formats,
Version: page.Version,
Status: page.Status,
Meta: page.Meta,
})
}

return nil
})

if err != nil {
return nil, fmt.Errorf("view: %w", err)
}

sort.Slice(pages, func(i, j int) bool {
return pages[i].Created.After(pages[j].Created)
})

return pages, nil
}

func (p *Page) ListUnprocessed(ctx context.Context) ([]*entity.Page, error) {
pages := make([]*entity.Page, 0, 100)

err := p.db.View(func(txn *badger.Txn) error {
iterator := txn.NewIterator(badger.DefaultIteratorOptions)

defer iterator.Close()

for iterator.Seek(p.prefix); iterator.ValidForPrefix(p.prefix); iterator.Next() {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context canceled: %w", err)
}

var page entity.Page

err := iterator.Item().Value(func(val []byte) error {
if err := unmarshal(val, &page); err != nil {
return fmt.Errorf("unmarshal: %w", err)
}

return nil
})

if err != nil {
return fmt.Errorf("get item: %w", err)
}

if page.Status != entity.StatusProcessing {
continue
}

pages = append(pages, &entity.Page{
ID: page.ID,
URL: page.URL,
Description: page.Description,
Created: page.Created,
Formats: page.Formats,
Version: page.Version,
Status: page.Status,
Meta: page.Meta,
})
}

Expand Down
15 changes: 14 additions & 1 deletion api/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ info:
description: API description in Markdown.
version: 1.0.0
servers:
- url: 'https://api.example.com'
- url: 'https://api.example.com/api/v1'
paths:
/pages:
get:
Expand Down Expand Up @@ -183,12 +183,25 @@ components:
$ref: '#/components/schemas/format'
status:
$ref: '#/components/schemas/status'
meta:
type: object
properties:
title:
type: string
description:
type: string
error:
type: string
required:
- title
- description
required:
- id
- url
- formats
- status
- created
- meta
result:
type: object
properties:
Expand Down

0 comments on commit f47dbef

Please sign in to comment.