Permalink
Browse files

Use golangplus

  • Loading branch information...
1 parent fe8dec8 commit d595bd14fa9a40491f3b99443d121865ce7e9a59 @daviddengcn committed Aug 19, 2015
Showing with 139 additions and 118 deletions.
  1. +4 −2 config.go
  2. +14 −11 crawler.go
  3. +1 −1 crawler/cmain.go
  4. +4 −3 crawler/package.go
  5. +2 −1 crawler_test.go
  6. +16 −14 data.go
  7. +4 −2 db.go
  8. +21 −19 index.go
  9. +10 −11 index_test.go
  10. +10 −11 ranking.go
  11. +5 −3 segment.go
  12. +2 −2 server/api.go
  13. +8 −5 server/db.go
  14. +15 −13 server/search.go
  15. +10 −8 server/smain.go
  16. +5 −4 text.go
  17. +1 −1 tocrawl/tocrawl.go
  18. +6 −6 tokenize_test.go
  19. +1 −1 tools/datacnv.go
View
@@ -30,6 +30,8 @@ import (
"log"
"time"
+ "github.com/golangplus/strings"
+
"github.com/daviddengcn/go-ljson-conf"
"github.com/daviddengcn/go-villa"
)
@@ -99,7 +101,7 @@ var (
*/
CrawlerVersion = 5
- NonCrawlHosts = villa.StrSet{}
+ NonCrawlHosts = stringsp.Set{}
NonStorePackageRegexps = []string{}
)
@@ -135,7 +137,7 @@ func init() {
CrawlerDuePerRun = conf.Duration("crawler.due_per_run", CrawlerDuePerRun)
ncHosts := conf.StringList("crawler.noncrawl_hosts", nil)
- NonCrawlHosts.Put(ncHosts...)
+ NonCrawlHosts.Add(ncHosts...)
CrawlerGithubClientID = conf.String("crawler.github.clientid", "")
CrawlerGithubClientSecret = conf.String("crawler.github.clientsecret", "")
View
@@ -18,6 +18,9 @@ import (
"time"
"unicode/utf8"
+ "github.com/golangplus/bytes"
+ "github.com/golangplus/strings"
+
"github.com/daviddengcn/gddo/doc"
"github.com/daviddengcn/go-index"
"github.com/daviddengcn/go-villa"
@@ -72,7 +75,7 @@ func (br *BlackRequest) Do(req *http.Request) (*http.Response, error) {
br.RUnlock()
if ok {
log.Printf("%s was found in 500 blacklist, return it directly", u)
- r.Body = villa.NewPByteSlice(nil)
+ r.Body = bytesp.NewPSlice(nil)
return &r, nil
}
resp, err := br.client.Do(req)
@@ -198,7 +201,7 @@ func ReadmeToText(fn, data string) string {
func Plusone(httpClient doc.HttpClient, url string) (int, error) {
req, err := http.NewRequest("POST",
"https://clients6.google.com/rpc?key=AIzaSyCKSbrvQasunBoV16zDH9R33D88CeLr9gQ",
- villa.NewPByteSlice([]byte(
+ bytesp.NewPSlice([]byte(
`[{"method":"pos.plusones.get","id":"p","params":{"nolog":true,"id": "`+
url+`","source":"widget","userId":"@viewer","groupId":"@self"},"jsonrpc":"2.0","key":"p","apiVersion":"v1"}]`)))
if err != nil {
@@ -389,20 +392,20 @@ func CrawlPackage(httpClient doc.HttpClient, pkg string,
readmeData = readmeData[:100*1024]
}
- importsSet := villa.NewStrSet(pdoc.Imports...)
+ importsSet := stringsp.NewSet(pdoc.Imports...)
importsSet.Delete(pdoc.ImportPath)
imports := importsSet.Elements()
- testImports := villa.NewStrSet(pdoc.TestImports...)
- testImports.Put(pdoc.XTestImports...)
+ testImports := stringsp.NewSet(pdoc.TestImports...)
+ testImports.Add(pdoc.XTestImports...)
testImports.Delete(imports...)
testImports.Delete(pdoc.ImportPath)
- var exported villa.StrSet
+ var exported stringsp.Set
for _, f := range pdoc.Funcs {
- exported.Put(f.Name)
+ exported.Add(f.Name)
}
for _, t := range pdoc.Types {
- exported.Put(t.Name)
+ exported.Add(t.Name)
}
return &Package{
@@ -536,7 +539,7 @@ type PackedDocDB struct {
}
func (db PackedDocDB) Get(key string, data interface{}) bool {
- var bs villa.ByteSlice
+ var bs bytesp.Slice
if ok := db.MemDB.Get(key, (*[]byte)(&bs)); !ok {
return false
}
@@ -549,7 +552,7 @@ func (db PackedDocDB) Get(key string, data interface{}) bool {
}
func (db PackedDocDB) Put(key string, data interface{}) {
- var bs villa.ByteSlice
+ var bs bytesp.Slice
enc := gob.NewEncoder(&bs)
if err := enc.Encode(data); err != nil {
log.Printf("Put %s failed: %v", key, err)
@@ -562,7 +565,7 @@ func (db PackedDocDB) Put(key string, data interface{}) {
func (db PackedDocDB) Iterate(
output func(key string, val interface{}) error) error {
return db.MemDB.Iterate(func(key string, val interface{}) error {
- dec := gob.NewDecoder(villa.NewPByteSlice(val.([]byte)))
+ dec := gob.NewDecoder(bytesp.NewPSlice(val.([]byte)))
var info DocInfo
if err := dec.Decode(&info); err != nil {
log.Printf("Decode %s failed: %v", key, err)
View
@@ -62,7 +62,7 @@ func loadAllDocsPkgs(in kv.DirInput) error {
}
return err
}
- allDocsPkgs.Put(string(key))
+ allDocsPkgs.Add(string(key))
// value is ignored
}
}
View
@@ -7,9 +7,10 @@ import (
"strings"
"time"
+ "github.com/golangplus/strings"
+
"github.com/daviddengcn/gcse"
"github.com/daviddengcn/gddo/doc"
- "github.com/daviddengcn/go-villa"
"github.com/daviddengcn/sophie"
"github.com/daviddengcn/sophie/kv"
"github.com/daviddengcn/sophie/mr"
@@ -20,7 +21,7 @@ const (
)
var (
- allDocsPkgs villa.StrSet
+ allDocsPkgs stringsp.Set
)
// Schedule a package for next crawling cycle, commonly after a successful
@@ -32,7 +33,7 @@ func schedulePackageNextCrawl(pkg string, etag string) {
}
func appendPackage(pkg string) {
- cDB.AppendPackage(pkg, allDocsPkgs.In)
+ cDB.AppendPackage(pkg, allDocsPkgs.Contain)
}
func packageToDoc(p *gcse.Package) gcse.DocInfo {
View
@@ -7,6 +7,7 @@ import (
"testing"
"time"
+ "github.com/golangplus/bytes"
"github.com/golangplus/testing/assert"
"github.com/daviddengcn/gddo/doc"
@@ -161,7 +162,7 @@ func TestCrawlingEntry(t *testing.T) {
Etag: "Hello",
}
- var buf villa.ByteSlice
+ var buf bytesp.Slice
assert.NoError(t, src.WriteTo(&buf))
var dst CrawlingEntry
View
@@ -7,9 +7,11 @@ import (
"time"
"unicode"
+ "github.com/golangplus/bytes"
+ "github.com/golangplus/strings"
+
"github.com/agonopol/go-stem"
"github.com/daviddengcn/go-index"
- "github.com/daviddengcn/go-villa"
"github.com/daviddengcn/sophie"
)
@@ -96,7 +98,7 @@ func NormWord(word string) string {
return word
}
-var stopWords = villa.NewStrSet(
+var stopWords = stringsp.NewSet(
"the", "on", "in", "as",
)
@@ -155,40 +157,40 @@ func CheckCamel(last, current rune) index.RuneType {
}
// a block does not contain blanks
-func appendTokensOfBlock(tokens villa.StrSet, block []byte) villa.StrSet {
+func appendTokensOfBlock(tokens stringsp.Set, block []byte) stringsp.Set {
lastToken := ""
- index.Tokenize(CheckRuneType, (*villa.ByteSlice)(&block),
+ index.Tokenize(CheckRuneType, (*bytesp.Slice)(&block),
func(token []byte) error {
tokenStr := string(token)
if isCamel(tokenStr) {
last := ""
- index.Tokenize(CheckCamel, villa.NewPByteSlice(token),
+ index.Tokenize(CheckCamel, bytesp.NewPSlice(token),
func(token []byte) error {
tokenStr := string(token)
tokenStr = NormWord(tokenStr)
- if !stopWords.In(tokenStr) {
- tokens.Put(tokenStr)
+ if !stopWords.Contain(tokenStr) {
+ tokens.Add(tokenStr)
}
if last != "" {
- tokens.Put(last + string(tokenStr))
+ tokens.Add(last + string(tokenStr))
}
last = tokenStr
return nil
})
}
tokenStr = NormWord(tokenStr)
- if !stopWords.In(tokenStr) {
- tokens.Put(tokenStr)
+ if !stopWords.Contain(tokenStr) {
+ tokens.Add(tokenStr)
}
if lastToken != "" {
if tokenStr[0] > 128 && lastToken[0] > 128 {
// Chinese bigrams
- tokens.Put(lastToken + tokenStr)
+ tokens.Add(lastToken + tokenStr)
} else if tokenStr[0] <= 128 && lastToken[0] <= 128 {
- tokens.Put(lastToken + "-" + tokenStr)
+ tokens.Add(lastToken + "-" + tokenStr)
}
}
@@ -198,12 +200,12 @@ func appendTokensOfBlock(tokens villa.StrSet, block []byte) villa.StrSet {
return tokens
}
-func AppendTokens(tokens villa.StrSet, text []byte) villa.StrSet {
+func AppendTokens(tokens stringsp.Set, text []byte) stringsp.Set {
textBuf := filterURLs(text)
textBuf = filterEmails(textBuf)
index.Tokenize(index.SeparatorFRuneTypeFunc(unicode.IsSpace),
- (*villa.ByteSlice)(&textBuf), func(block []byte) error {
+ (*bytesp.Slice)(&textBuf), func(block []byte) error {
tokens = appendTokensOfBlock(tokens, block)
return nil
})
View
@@ -9,6 +9,8 @@ import (
"sync"
"time"
+ "github.com/golangplus/strings"
+
"github.com/daviddengcn/go-index"
"github.com/daviddengcn/go-villa"
)
@@ -313,11 +315,11 @@ func (ti *TokenIndexer) Export(root villa.Path, kind string) error {
return ti.TokenIndexer.Save(f)
}
-func (ti *TokenIndexer) Put(id string, tokens villa.StrSet) {
+func (ti *TokenIndexer) Put(id string, tokens stringsp.Set) {
ti.Lock()
defer ti.Unlock()
- ti.TokenIndexer.Put(id, tokens)
+ ti.TokenIndexer.PutTokens(id, tokens)
ti.lastModified = time.Now()
ti.modified = true
}
View
@@ -5,6 +5,8 @@ import (
"log"
"time"
+ "github.com/golangplus/strings"
+
"github.com/daviddengcn/go-index"
"github.com/daviddengcn/go-villa"
"github.com/daviddengcn/sophie"
@@ -22,14 +24,14 @@ var errNotDocInfo = errors.New("Value is not DocInfo")
// Excludes packages in src which has same full-project with any elements in excl.
func excludeImports(src, excl []string) (dst []string) {
- exclPrjsSets := villa.NewStrSet()
+ exclPrjsSets := stringsp.NewSet()
for _, pkg := range excl {
- exclPrjsSets.Put(FullProjectOfPackage(pkg))
+ exclPrjsSets.Add(FullProjectOfPackage(pkg))
}
for _, pkg := range src {
prj := FullProjectOfPackage(string(pkg))
- if !exclPrjsSets.In(prj) {
+ if !exclPrjsSets.Contain(prj) {
dst = append(dst, pkg)
}
}
@@ -88,21 +90,21 @@ func Index(docDB mr.Input) (*index.TokenSetSearcher, error) {
}
filterDocInfo(&docInfo)
- importsDB.Put(string(pkg), villa.NewStrSet(docInfo.Imports...))
- testImportsDB.Put(string(pkg),
- villa.NewStrSet(docInfo.TestImports...))
+ importsDB.PutTokens(string(pkg), stringsp.NewSet(docInfo.Imports...))
+ testImportsDB.PutTokens(string(pkg),
+ stringsp.NewSet(docInfo.TestImports...))
- var projects villa.StrSet
+ var projects stringsp.Set
for _, imp := range docInfo.Imports {
- projects.Put(FullProjectOfPackage(imp))
+ projects.Add(FullProjectOfPackage(imp))
}
for _, imp := range docInfo.TestImports {
- projects.Put(FullProjectOfPackage(imp))
+ projects.Add(FullProjectOfPackage(imp))
}
prj := FullProjectOfPackage(string(pkg))
orgProjects := prjImportsDB.TokensOfId(prj)
- projects.Put(orgProjects...)
- prjImportsDB.Put(prj, projects)
+ projects.Add(orgProjects...)
+ prjImportsDB.PutTokens(prj, projects)
// update stars
if cur, ok := prjStars[prj]; !ok ||
@@ -157,12 +159,12 @@ func Index(docDB mr.Input) (*index.TokenSetSearcher, error) {
perStarCount :=
float64(prjStars[prj].StarCount) / float64(impPrjsCnt)
- var projects villa.StrSet
+ var projects stringsp.Set
for _, imp := range hitInfo.Imported {
- projects.Put(FullProjectOfPackage(imp))
+ projects.Add(FullProjectOfPackage(imp))
}
for _, imp := range hitInfo.TestImported {
- projects.Put(FullProjectOfPackage(imp))
+ projects.Add(FullProjectOfPackage(imp))
}
assignedStarCount = perStarCount * float64(len(projects))
}
@@ -209,11 +211,11 @@ func Index(docDB mr.Input) (*index.TokenSetSearcher, error) {
}
hit.StaticRank = rank
- var nameTokens villa.StrSet
+ var nameTokens stringsp.Set
nameTokens = AppendTokens(nameTokens, []byte(hit.Name))
- var tokens villa.StrSet
- tokens.Put(nameTokens.Elements()...)
+ var tokens stringsp.Set
+ tokens.Add(nameTokens.Elements()...)
tokens = AppendTokens(tokens, []byte(hit.Package))
tokens = AppendTokens(tokens, []byte(hit.Description))
tokens = AppendTokens(tokens, []byte(hit.ReadmeData))
@@ -222,10 +224,10 @@ func Index(docDB mr.Input) (*index.TokenSetSearcher, error) {
AppendTokens(tokens, []byte(word))
}
- ts.AddDoc(map[string]villa.StrSet{
+ ts.AddDoc(map[string]stringsp.Set{
IndexTextField: tokens,
IndexNameField: nameTokens,
- IndexPkgField: villa.NewStrSet(hit.Package),
+ IndexPkgField: stringsp.NewSet(hit.Package),
}, *hit)
}
Oops, something went wrong.

0 comments on commit d595bd1

Please sign in to comment.