Skip to content

Commit 0e30ea5

Browse files
committed
Make URL fetching concurrent
1 parent 72293ed commit 0e30ea5

File tree

8 files changed

+227
-108
lines changed

8 files changed

+227
-108
lines changed

README.mdwn

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,11 @@ a single root for all files in the zipfile.
310310

311311
### `rss`
312312

313-
`rss` is a minimalist rss client. Outputs links as markdown on STDOUT. Takes url
314-
to feed and path to state file. Example usage:
313+
`rss` is a minimalist rss client. Outputs links as markdown on STDOUT. Takes urls
314+
to feeds and path to state file. Example usage:
315315

316316
```bash
317-
$ rss https://blog.afoolishmanifesto.com/index.xml afm.json
317+
$ rss -state feed.json https://blog.afoolishmanifesto.com/index.xml
318318
[Announcing shellquote](https://blog.afoolishmanifesto.com/posts/announcing-shellquote/)
319319
[Detecting who used the EC2 metadata server with BCC](https://blog.afoolishmanifesto.com/posts/detecting-who-used-ec2-metadata-server-bcc/)
320320
[Centralized known_hosts for ssh](https://blog.afoolishmanifesto.com/posts/centralized-known-hosts-for-ssh/)

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ require (
1414
github.com/pierrec/lz4/v3 v3.1.0
1515
github.com/stretchr/testify v1.4.0
1616
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297 // indirect
17+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f
1718
golang.org/x/sys v0.0.0-20190904005037-43c01164e931 // indirect
1819
golang.org/x/text v0.3.2
1920
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect

go.sum

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r
5959
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
6060
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297 h1:k7pJ2yAPLPgbskkFdhRCsA77k2fySZ1zf2zCjvQCiIM=
6161
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
62+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
6263
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
6364
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
6465
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=

help_generated.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -311,11 +311,11 @@ a single root for all files in the zipfile.
311311
312312
### ` + "`" + `rss` + "`" + `
313313
314-
` + "`" + `rss` + "`" + ` is a minimalist rss client. Outputs links as markdown on STDOUT. Takes url
315-
to feed and path to state file. Example usage:
314+
` + "`" + `rss` + "`" + ` is a minimalist rss client. Outputs links as markdown on STDOUT. Takes urls
315+
to feeds and path to state file. Example usage:
316316
317317
` + "`" + `` + "`" + `` + "`" + `bash
318-
$ rss https://blog.afoolishmanifesto.com/index.xml afm.json
318+
$ rss -state feed.json https://blog.afoolishmanifesto.com/index.xml
319319
[Announcing shellquote](https://blog.afoolishmanifesto.com/posts/announcing-shellquote/)
320320
[Detecting who used the EC2 metadata server with BCC](https://blog.afoolishmanifesto.com/posts/detecting-who-used-ec2-metadata-server-bcc/)
321321
[Centralized known_hosts for ssh](https://blog.afoolishmanifesto.com/posts/centralized-known-hosts-for-ssh/)
@@ -466,22 +466,22 @@ func init() {
466466

467467
"replace-unzip": readme[8780:9010],
468468

469-
"rss": readme[9010:9758],
469+
"rss": readme[9010:9768],
470470

471-
"slack-deaddrop": readme[9758:9978],
471+
"slack-deaddrop": readme[9768:9988],
472472

473-
"slack-open": readme[9978:10112],
473+
"slack-open": readme[9988:10122],
474474

475-
"sm-list": readme[10112:10388],
475+
"sm-list": readme[10122:10398],
476476

477-
"srv": readme[10388:10571],
477+
"srv": readme[10398:10581],
478478

479-
"toml2json": readme[10571:10746],
479+
"toml2json": readme[10581:10756],
480480

481-
"undefer": readme[10746:11054],
481+
"undefer": readme[10756:11064],
482482

483-
"uni": readme[11054:11216],
483+
"uni": readme[11064:11226],
484484

485-
"yaml2json": readme[11216:11293],
485+
"yaml2json": readme[11226:11303],
486486
}
487487
}

internal/tool/rss/rss.go

Lines changed: 78 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
package rss
22

33
import (
4-
"encoding/json"
4+
"context"
5+
"flag"
56
"fmt"
67
"io"
78
"net/url"
89
"os"
9-
"sort"
1010

1111
"github.com/frioux/leatherman/internal/lmhttp"
1212
"github.com/mmcdole/gofeed"
13+
"golang.org/x/sync/errgroup"
1314
)
1415

1516
/*
16-
Run is a minimalist rss client. Outputs links as markdown on STDOUT. Takes url
17-
to feed and path to state file. Example usage:
17+
Run is a minimalist rss client. Outputs links as markdown on STDOUT. Takes urls
18+
to feeds and path to state file. Example usage:
1819
1920
```bash
20-
$ rss https://blog.afoolishmanifesto.com/index.xml afm.json
21+
$ rss -state feed.json https://blog.afoolishmanifesto.com/index.xml
2122
[Announcing shellquote](https://blog.afoolishmanifesto.com/posts/announcing-shellquote/)
2223
[Detecting who used the EC2 metadata server with BCC](https://blog.afoolishmanifesto.com/posts/detecting-who-used-ec2-metadata-server-bcc/)
2324
[Centralized known_hosts for ssh](https://blog.afoolishmanifesto.com/posts/centralized-known-hosts-for-ssh/)
@@ -28,44 +29,102 @@ $ rss https://blog.afoolishmanifesto.com/index.xml afm.json
2829
Command: rss
2930
*/
3031
func Run(args []string, _ io.Reader) error {
31-
if len(args) != 3 {
32-
fmt.Fprintf(os.Stderr, "Usage: %s feedURL statefile\n", args[0])
32+
flags := flag.NewFlagSet("rss", flag.ExitOnError)
33+
34+
var statePath string
35+
36+
flags.StringVar(&statePath, "state", "", "location to store state")
37+
if err := flags.Parse(args[1:]); err != nil {
38+
return fmt.Errorf("flags.Parse: %w", err)
39+
}
40+
41+
if len(flags.Args()) == 0 {
42+
fmt.Fprintf(os.Stderr, "Usage: %s -state rss.json <url> [<url>...]\n", args[0])
3343
os.Exit(1)
3444
}
3545

36-
return run(args[1], args[2], os.Stdout)
46+
if statePath == "" {
47+
fmt.Fprintln(os.Stderr, "-state is required")
48+
os.Exit(1)
49+
}
50+
51+
return run(statePath, flags.Args(), os.Stdout)
3752
}
3853

39-
func run(urlString, statePath string, w io.Writer) error {
40-
fp := gofeed.NewParser()
54+
func loadFeed(fp *gofeed.Parser, urlString string) ([]*gofeed.Item, error) {
4155
feedURL, err := url.Parse(urlString)
4256
if err != nil {
43-
return fmt.Errorf("Couldn't parse feed url (%s): %w", feedURL, err)
57+
return nil, fmt.Errorf("Couldn't parse feed url (%s): %w", urlString, err)
4458
}
4559

4660
resp, err := lmhttp.Get(urlString)
4761
if err != nil {
48-
return fmt.Errorf("Couldn't get feed: %w", err)
62+
return nil, fmt.Errorf("Couldn't get feed: %w", err)
4963
}
5064

5165
f, err := fp.Parse(resp.Body)
5266
if err != nil {
53-
return fmt.Errorf("Couldn't fetch feed (%s): %w", feedURL, err)
67+
return nil, fmt.Errorf("Couldn't fetch feed (%s): %w", feedURL, err)
5468
}
5569
fixItems(feedURL, f.Items)
5670

57-
seen, err := syncRead(statePath, f.Items)
58-
if err != nil {
59-
return fmt.Errorf("Couldn't sync read (%s): %w", feedURL, err)
71+
return f.Items, nil
72+
}
73+
74+
func syncFeed(state indexedStates, items []*gofeed.Item, urlString string, w io.Writer) error {
75+
if state[urlString] == nil {
76+
state[urlString] = make(map[string]bool, len(items))
6077
}
6178

62-
items := newItems(seen, f.Items)
79+
items = newItems(state[urlString], items)
80+
81+
for _, i := range items {
82+
state[urlString][i.GUID] = true
83+
}
6384

6485
renderItems(w, items)
6586

66-
err = os.Rename(statePath+".tmp", statePath)
87+
return nil
88+
}
89+
90+
func run(statePath string, urls []string, w io.Writer) error {
91+
state, err := readState(statePath)
6792
if err != nil {
68-
return fmt.Errorf("Couldn't rename state file (%s): %w", feedURL, err)
93+
return fmt.Errorf("couldn't read state: %w", err)
94+
}
95+
fp := gofeed.NewParser()
96+
97+
results := make([][]*gofeed.Item, len(urls))
98+
g, _ := errgroup.WithContext(context.Background())
99+
100+
for i, urlString := range urls {
101+
i, urlString := i, urlString
102+
g.Go(func() error { // O(n) goroutines
103+
items, err := loadFeed(fp, urlString)
104+
if err != nil {
105+
return err
106+
}
107+
results[i] = items
108+
return nil
109+
})
110+
}
111+
112+
if err := g.Wait(); err != nil {
113+
fmt.Fprintf(os.Stderr, "%s\n", err)
114+
os.Exit(1)
115+
}
116+
for i, items := range results {
117+
if err := syncFeed(state, items, urls[i], w); err != nil {
118+
fmt.Fprintf(os.Stderr, "%s\n", err)
119+
os.Exit(1)
120+
}
121+
}
122+
123+
if err := writeState(statePath, state); err != nil {
124+
return fmt.Errorf("Couldn't save state file: %w", err)
125+
}
126+
if err := os.Rename(statePath+".tmp", statePath); err != nil {
127+
return fmt.Errorf("Couldn't rename state file: %w", err)
69128
}
70129

71130
return nil
@@ -108,76 +167,3 @@ func newItems(seen map[string]bool, items []*gofeed.Item) []*gofeed.Item {
108167

109168
return ret
110169
}
111-
112-
// Store JSON containing seen GUIDs for the current feed.
113-
func syncRead(state string, items []*gofeed.Item) (map[string]bool, error) {
114-
ret := make(map[string]bool, len(items))
115-
116-
guids, err := readState(state)
117-
if err != nil {
118-
return nil, fmt.Errorf("couldn't read state: %w", err)
119-
}
120-
121-
for _, g := range guids {
122-
ret[g] = true
123-
}
124-
125-
// Generate news state
126-
newState := make(map[string]bool, len(items)+len(guids))
127-
128-
for _, g := range guids {
129-
newState[g] = true
130-
}
131-
for _, i := range items {
132-
newState[i.GUID] = true
133-
}
134-
toStore := make([]string, 0, len(newState))
135-
136-
for k := range newState {
137-
toStore = append(toStore, k)
138-
}
139-
sort.Strings(toStore)
140-
141-
err = writeState(state, toStore)
142-
if err != nil {
143-
return nil, fmt.Errorf("couldn't write state: %w", err)
144-
}
145-
return ret, nil
146-
}
147-
148-
func readState(state string) ([]string, error) {
149-
file, err := os.Open(state)
150-
if err != nil && !os.IsNotExist(err) {
151-
return nil, fmt.Errorf("couldn't open state file: %w", err)
152-
}
153-
154-
var guids []string
155-
156-
if err == nil {
157-
decoder := json.NewDecoder(file)
158-
err = decoder.Decode(&guids)
159-
if err != nil && !os.IsNotExist(err) {
160-
return nil, fmt.Errorf("couldn't decode state file: %w", err)
161-
}
162-
}
163-
164-
return guids, nil
165-
}
166-
167-
func writeState(state string, guids []string) error {
168-
tmp, err := os.Create(state + ".tmp")
169-
if err != nil {
170-
return fmt.Errorf("couldn't create state file: %w", err)
171-
}
172-
encoder := json.NewEncoder(tmp)
173-
encoder.SetIndent("", "\t")
174-
err = encoder.Encode(guids)
175-
if err != nil {
176-
return fmt.Errorf("couldn't encode state file: %w", err)
177-
}
178-
err = tmp.Close()
179-
if err != nil {
180-
return fmt.Errorf("couldn't write state file: %w", err)
181-
}
182-
return nil
183-
}

internal/tool/rss/rss_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ might realize.&lt;/p&gt;
429429
}
430430

431431
buf := &bytes.Buffer{}
432-
err = run(ts.URL, f.Name(), buf)
432+
err = run(f.Name(), []string{ts.URL}, buf)
433433
assert.NoError(t, err)
434434
assert.Equal(t, `[Sorting Books](https://blog.afoolishmanifesto.com/posts/sorting-books/)
435435
[Automating Email](https://blog.afoolishmanifesto.com/posts/automating-email/)

0 commit comments

Comments
 (0)