Skip to content

Commit

Permalink
Add raw file and pdf fix for http
Browse files Browse the repository at this point in the history
  • Loading branch information
boyter committed Mar 9, 2020
1 parent bd75a19 commit 5e6e828
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
18 changes: 7 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@ try using cs and its right at the top

https://github.com/BurntSushi/ripgrep/issues/95

active bugs
search for cs --hidden --no-gitignore --no-ignore 英文 has highlight issues due to multiple byte ending matches
```
BUGS
search for cs --hidden --no-gitignore --no-ignore 英文 cuts in the middle of a rune
clicking on file in PDF mode shows the binary not the extracted text
TODO
search by filename
search by filename (currently only ranks by it) as addtional OR search
clean up parser so multiple spaces aren't tokens or flag em to be ignored
if someone enables the --pdf flag not in terminal mode should spawn background process to extract the text so its pre cached
if in http mode with --pdf need to pull the extracted text when displaying and possibly a link to raw file

add "raw" link on the http page
```

Snippet generation

Expand All @@ -55,11 +56,6 @@ https://github.com/sourcegraph/src-cli

https://arxiv.org/pdf/1904.03061.pdf

hyperfine './cs "/(i?)test/"' './cs test' 'rg -i test' 'cs test' 'ag -i test'
hyperfine './cs "/([A-Z][a-z]+)\s+([A-Z][a-z]+)/"' 'rg -uu "([A-Z][a-z]+)\s+([A-Z][a-z]+)"'
hyperfine './cs "/[ab]+/"' 'rg -uu "[ab]+"'




https://www.researchgate.net/publication/4004411_Topic_extraction_from_news_archive_using_TFPDF_algorithm
Expand Down
36 changes: 33 additions & 3 deletions processor/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package processor
import (
"crypto/md5"
"encoding/hex"
"errors"
"fmt"
"github.com/boyter/cs/file"
str "github.com/boyter/cs/string"
Expand Down Expand Up @@ -49,6 +50,18 @@ type facet struct {
}

func StartHttpServer() {
http.HandleFunc("/file/raw/", func(w http.ResponseWriter, r *http.Request) {
path := strings.Replace(r.URL.Path, "/file/raw/", "", 1)

log.Info().
Str("unique_code", "f24a4b1d").
Str("path", path).
Msg("raw page")

http.ServeFile(w, r, path)
return
})

http.HandleFunc("/file/", func(w http.ResponseWriter, r *http.Request) {
startTime := makeTimestampMilli()
startPos := tryParseInt(r.URL.Query().Get("sp"), 0)
Expand All @@ -63,15 +76,30 @@ func StartHttpServer() {
Str("path", path).
Msg("file view page")

content, err := ioutil.ReadFile(path)
var content []byte
var err error

// if its a PDF we should go to the cache to fetch it
extension := file.GetExtension(path)
if strings.ToLower(extension) == "pdf" {
c, ok := __pdfCache[path]
if ok {
content = []byte(c)
} else {
err = errors.New("")
}
} else {
content, err = ioutil.ReadFile(path)
}

if err != nil {
log.Error().
Str("unique_code", "d063c1fd").
Int("startpos", startPos).
Int("endpos", endPos).
Str("path", path).
Msg("error reading file")
panic(err)
http.Redirect(w, r, "/", http.StatusTemporaryRedirect)
}

// Create a random string to define where the start and end of
Expand Down Expand Up @@ -130,6 +158,7 @@ func StartHttpServer() {
</div>
<div>
<h4>{{ .Location }}</h4>
<small>[<a href="/file/raw/{{ .Location }}">raw file</a>]</small>
<pre>{{ .Content }}</pre>
</div>
</body>
Expand All @@ -149,6 +178,7 @@ func StartHttpServer() {
panic(err)
}

return
})

http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -384,7 +414,7 @@ func StartHttpServer() {
if err != nil {
panic(err)
}

return
})

log.Info().
Expand Down

0 comments on commit 5e6e828

Please sign in to comment.