Permalink
Browse files

Fix some bugs, and do some refactoring

  • Loading branch information...
1 parent d73baf4 commit 9caa6de1c1a9eb15812dd9bc3284e8eb8b259eca @daviddengcn committed Aug 22, 2016
View
@@ -8,6 +8,7 @@ import (
"github.com/golangplus/strings"
+ "github.com/daviddengcn/gcse/utils"
"github.com/daviddengcn/go-easybi"
"github.com/daviddengcn/go-ljson-conf"
"github.com/daviddengcn/go-villa"
@@ -41,10 +42,6 @@ var (
// producer: crawler, consumer: indexer
DBOutPath villa.Path
- // producer: indexer, consumer: server.
- // server never delete index segments, indexer clear updated segments.
- IndexPath villa.Path
-
// configures of crawler
CrawlByGodocApi = true
CrawlGithubUpdate = true
@@ -80,9 +77,6 @@ func init() {
DBOutPath = DataRoot.Join("dbout")
DBOutPath.MkdirAll(0755)
- IndexPath = DataRoot.Join("index")
- IndexPath.MkdirAll(0755)
-
CrawlByGodocApi = conf.Bool("crawler.godoc", CrawlByGodocApi)
CrawlGithubUpdate = conf.Bool("crawler.github_update", CrawlGithubUpdate)
CrawlerDuePerRun = conf.Duration("crawler.due_per_run", CrawlerDuePerRun)
@@ -108,6 +102,10 @@ func DocsDBPath() villa.Path {
return DataRoot.Join(FnDocs)
}
+func IndexPath() villa.Path {
+ return DataRoot.Join("index")
+}
+
func StoreBoltPath() string {
return DataRoot.Join("store.bolt").S()
}
@@ -118,3 +116,16 @@ func SetTestingDataPath() {
DataRoot.MkdirAll(0755)
log.Printf("DataRoot: %v", DataRoot)
}
+
+// Returns the segments imported from web site.
+func ImportSegments() utils.Segments {
+ return utils.Segments(ImportPath)
+}
+
+func DBOutSegments() utils.Segments {
+ return utils.Segments(DBOutPath)
+}
+
+func IndexSegments() utils.Segments {
+ return utils.Segments(IndexPath())
+}
View
@@ -21,8 +21,10 @@ import (
"github.com/golangplus/strings"
"github.com/golangplus/time"
+ "github.com/daviddengcn/gcse/configs"
"github.com/daviddengcn/gcse/spider/github"
"github.com/daviddengcn/gcse/store"
+ "github.com/daviddengcn/gcse/utils"
"github.com/daviddengcn/gddo/doc"
"github.com/daviddengcn/go-easybi"
"github.com/daviddengcn/go-index"
@@ -56,13 +58,13 @@ const (
// AppendPackages appends a list packages to imports folder for crawler
// backend to read
func AppendPackages(pkgs []string) bool {
- segm, err := ImportSegments.GenNewSegment()
+ segm, err := configs.ImportSegments().GenNewSegment()
if err != nil {
log.Printf("genImportSegment failed: %v", err)
return false
}
log.Printf("Import to %v", segm)
- if err := WriteJsonFile(segm.Join(fnLinks), pkgs); err != nil {
+ if err := utils.WriteJsonFile(segm.Join(fnLinks), pkgs); err != nil {
log.Printf("WriteJsonFile failed: %v", err)
return false
}
@@ -73,8 +75,9 @@ func AppendPackages(pkgs []string) bool {
return true
}
-func ReadPackages(segm Segment) (pkgs []string, err error) {
- err = ReadJsonFile(segm.Join(fnLinks), &pkgs)
+func ReadPackages(segm utils.Segment) ([]string, error) {
+ var pkgs []string
+ err := utils.ReadJsonFile(segm.Join(fnLinks), &pkgs)
return pkgs, err
}
@@ -294,7 +297,7 @@ func newDocGet(httpClient doc.HttpClient, pkg string, etag string) (p *doc.Packa
gp, err := glgddo.Get(httpClient.(*BlackRequest).client.(*http.Client),
pkg, etag)
if err != nil {
- if err == gosrc.ErrNotModified {
+ if _, ok := err.(gosrc.NotModifiedError); ok {
err = doc.ErrNotModified
}
return nil, err
@@ -689,37 +692,6 @@ func (nda *NewDocAction) ReadFrom(r sophie.Reader, l int) error {
return errorsp.WithStacks(nda.DocInfo.ReadFrom(r, -1))
}
-const (
- godocApiUrl = "http://api.godoc.org/packages"
-)
-
-// FetchAllPackagesInGodoc fetches the list of all packages on godoc.org
-func FetchAllPackagesInGodoc(httpClient doc.HttpClient) ([]string, error) {
- req, err := http.NewRequest("GET", godocApiUrl, nil)
- if err != nil {
- return nil, errorsp.WithStacksAndMessage(err, "new request for %v failed", godocApiUrl)
- }
- resp, err := httpClient.Do(req)
- if err != nil {
- return nil, errorsp.WithStacksAndMessage(err, "fetching %v failed", godocApiUrl)
- }
- defer resp.Body.Close()
- if resp.StatusCode != 200 {
- return nil, errorsp.NewWithStacks("StatusCode: %d", resp.StatusCode)
- }
- var results map[string][]map[string]string
- dec := json.NewDecoder(resp.Body)
-
- if err := dec.Decode(&results); err != nil {
- return nil, errorsp.WithStacks(err)
- }
- list := make([]string, 0, len(results["results"]))
- for _, res := range results["results"] {
- list = append(list, res["path"])
- }
- return list, nil
-}
-
func init() {
gob.RegisterName("main.CrawlingEntry", CrawlingEntry{})
}
View
@@ -2,40 +2,5 @@
Package gcse is the core supporting library for go-code-search-engine (GCSE).
Its exported types and functions are mainly for sub packages. If you want
some of the function, copy the code away.
-
-Sub-projects
-
-crawler crawling packages
-
-indexer creating index data for web-server
-
-server providing web services, including home/top/search services.
-
-
-Data-flows
-
-project Read Write
-------- ---- -----
-crawler fnCrawlerDB fnCrawlerDB
- fnDocDB fnDocDB
- DBOutSegments
-indexer DBOutSegments IndexSegments
-
-server IndexSegments
-
*/
package gcse
-
-import "github.com/daviddengcn/gcse/configs"
-
-var (
- ImportSegments Segments
- DBOutSegments Segments
- IndexSegments Segments
-)
-
-func init() {
- ImportSegments = segments(configs.ImportPath)
- DBOutSegments = segments(configs.DBOutPath)
- IndexSegments = segments(configs.IndexPath)
-}
View
@@ -14,6 +14,7 @@ import (
"golang.org/x/crypto/ssh/terminal"
"github.com/cheggaaa/pb"
+ "github.com/daviddengcn/gcse/utils"
"github.com/daviddengcn/go-index"
"github.com/daviddengcn/sophie"
"github.com/daviddengcn/sophie/mr"
@@ -108,12 +109,12 @@ func indexAndSaveHits(ts *index.TokenSetSearcher, hits []HitInfo, idxs []int, sa
if bar != nil {
bar.FinishPrint("Indexing finished!")
}
- DumpMemStats()
+ utils.DumpMemStats()
return nil
}
func Index(docDB mr.Input, outDir string) (*index.TokenSetSearcher, error) {
- DumpMemStats()
+ utils.DumpMemStats()
docPartCnt, err := docDB.PartCount()
if err != nil {
@@ -178,7 +179,7 @@ func Index(docDB mr.Input, outDir string) (*index.TokenSetSearcher, error) {
it.Close()
}
- DumpMemStats()
+ utils.DumpMemStats()
log.Printf("Making HitInfos ...")
hits := make([]HitInfo, 0, docCount)
for i := 0; i < docPartCnt; i++ {
@@ -239,18 +240,18 @@ func Index(docDB mr.Input, outDir string) (*index.TokenSetSearcher, error) {
it.Close()
}
- DumpMemStats()
+ utils.DumpMemStats()
importsDB = nil
testImportsDB = nil
- DumpMemStats()
+ utils.DumpMemStats()
log.Printf("%d hits collected, sorting static-scores in descending order", len(hits))
idxs := sortp.IndexSortF(len(hits), func(i, j int) bool {
return hits[i].StaticScore > hits[j].StaticScore
})
ts := &index.TokenSetSearcher{}
- DumpMemStats()
+ utils.DumpMemStats()
log.Printf("Indexing %d packages to TokenSetSearcher ...", len(idxs))
hitsArr, err := index.CreateConstArray(path.Join(outDir, HitsArrFn))
if err != nil {
@@ -18,6 +18,7 @@ import (
"github.com/daviddengcn/gcse/configs"
"github.com/daviddengcn/gcse/spider"
"github.com/daviddengcn/gcse/spider/github"
+ "github.com/daviddengcn/gcse/utils"
"github.com/daviddengcn/gddo/doc"
"github.com/daviddengcn/go-easybi"
"github.com/daviddengcn/go-villa"
@@ -40,14 +41,14 @@ func init() {
}
func syncDatabases() {
- gcse.DumpMemStats()
+ utils.DumpMemStats()
log.Printf("Synchronizing databases to disk...")
if err := cDB.Sync(); err != nil {
log.Fatalf("cdb.Sync() failed: %v", err)
}
- gcse.DumpMemStats()
+ utils.DumpMemStats()
runtime.GC()
- gcse.DumpMemStats()
+ utils.DumpMemStats()
}
func loadAllDocsPkgs(in kv.DirInput) error {
@@ -4,11 +4,12 @@ import (
"log"
"github.com/daviddengcn/gcse"
+ "github.com/daviddengcn/gcse/configs"
)
// processing sumitted packages (from go-search.org/add path)
func processImports() error {
- dones, err := gcse.ImportSegments.ListDones()
+ dones, err := configs.ImportSegments().ListDones()
if err != nil {
return err
}
@@ -4,14 +4,14 @@ import (
"log"
"runtime"
- "github.com/daviddengcn/gcse"
+ "github.com/daviddengcn/gcse/configs"
)
func main() {
runtime.GOMAXPROCS(2)
log.Println("indexer started...")
- if err := gcse.IndexSegments.ClearUndones(); err != nil {
+ if err := configs.IndexSegments().ClearUndones(); err != nil {
log.Printf("ClearUndones failed: %v", err)
}
@@ -2,62 +2,61 @@ package main
import (
"log"
+ "os"
"runtime"
"github.com/daviddengcn/gcse"
"github.com/daviddengcn/gcse/configs"
"github.com/daviddengcn/gcse/store"
+ "github.com/daviddengcn/gcse/utils"
"github.com/daviddengcn/go-easybi"
"github.com/daviddengcn/sophie"
"github.com/daviddengcn/sophie/kv"
)
func clearOutdatedIndex() error {
- segm, err := gcse.IndexSegments.FindMaxDone()
+ segm, err := configs.IndexSegments().FindMaxDone()
if err != nil {
return err
}
- all, err := gcse.IndexSegments.ListAll()
+ all, err := configs.IndexSegments().ListAll()
if err != nil {
return err
}
-
for _, s := range all {
if s == segm {
continue
}
-
err := s.Remove()
if err != nil {
return err
}
log.Printf("Outdated segment %v removed!", s)
}
-
return nil
}
func doIndex() bool {
- idxSegm, err := gcse.IndexSegments.GenMaxSegment()
+ idxSegm, err := configs.IndexSegments().GenMaxSegment()
if err != nil {
log.Printf("GenMaxSegment failed: %v", err)
return false
}
runtime.GC()
- gcse.DumpMemStats()
+ utils.DumpMemStats()
log.Printf("Indexing to %v ...", idxSegm)
fpDocDB := sophie.LocalFsPath(configs.DocsDBPath().S())
- ts, err := gcse.Index(kv.DirInput(fpDocDB), idxSegm.Join("").S())
+ ts, err := gcse.Index(kv.DirInput(fpDocDB), string(idxSegm))
if err != nil {
log.Printf("Indexing failed: %v", err)
return false
}
if !func() bool {
- f, err := idxSegm.Join(gcse.IndexFn).Create()
+ f, err := os.Create(idxSegm.Join(gcse.IndexFn))
if err != nil {
log.Printf("Create index file failed: %v", err)
return false
@@ -74,11 +73,11 @@ func doIndex() bool {
return false
}
runtime.GC()
- gcse.DumpMemStats()
+ utils.DumpMemStats()
storePath := idxSegm.Join(configs.FnStore)
log.Printf("Saving store snapshot to %v", storePath)
- if err := store.SaveSnapshot(storePath.S()); err != nil {
+ if err := store.SaveSnapshot(storePath); err != nil {
log.Printf("SaveSnapshot %v failed: %v", storePath, err)
}
@@ -91,9 +90,9 @@ func doIndex() bool {
gcse.AddBiValueAndProcess(bi.Average, "index.doc-count", ts.DocCount())
ts = nil
- gcse.DumpMemStats()
+ utils.DumpMemStats()
runtime.GC()
- gcse.DumpMemStats()
+ utils.DumpMemStats()
return true
}
Oops, something went wrong.

0 comments on commit 9caa6de

Please sign in to comment.