Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Add fork detection and removal to admin tool.

Add new subcommand 'prune' to gddo-admin. This subcommand looks for and
removes forks. The subcommand also removes subtrees where all packages
have search rank = 0.
  • Loading branch information...
commit 5b70929f3c6b2af09e90a4a437ce2b0f696e8894 1 parent b5a2d43
@garyburd garyburd authored
View
72 database/database.go
@@ -19,7 +19,7 @@
// terms: space separated search terms
// path: import path
// synopsis: synopsis
-// glob: snappy compressed gob encoded doc.Package
+// gob: snappy compressed gob encoded doc.Package
// rank: document search rank
// etag:
// kind: p=package, c=command, d=directory with no go files
@@ -388,6 +388,12 @@ func (db *Database) Get(path string) (*doc.Package, []Package, time.Time, error)
return pdoc, subdirs, lastCrawl, nil
}
+func (db *Database) GetDoc(path string) (*doc.Package, time.Time, error) {
+ c := db.Pool.Get()
+ defer c.Close()
+ return db.getDoc(c, path)
+}
+
var deleteScript = redis.NewScript(0, `
local path = ARGV[1]
@@ -581,8 +587,15 @@ func (db *Database) Query(q string) ([]Package, error) {
return pkgs, err
}
+type PackageInfo struct {
+ PDoc *doc.Package
+ Pkgs []Package
+ Rank float64
+ Kind string
+}
+
// Do executes function f for each document in the database.
-func (db *Database) Do(f func(*doc.Package, []Package) error) error {
+func (db *Database) Do(f func(*PackageInfo) error) error {
c := db.Pool.Get()
defer c.Close()
keys, err := redis.Values(c.Do("KEYS", "pkg:*"))
@@ -590,58 +603,37 @@ func (db *Database) Do(f func(*doc.Package, []Package) error) error {
return err
}
for _, key := range keys {
- p, err := redis.Bytes(c.Do("HGET", key, "gob"))
- if err == redis.ErrNil {
- continue
- }
+ values, err := redis.Values(c.Do("HMGET", key, "gob", "rank", "kind"))
if err != nil {
return err
}
- p, err = snappy.Decode(nil, p)
- if err != nil {
+
+ var (
+ pi PackageInfo
+ p []byte
+ )
+
+ if _, err := redis.Scan(values, &p, &pi.Rank, &pi.Kind); err != nil {
return err
}
- var pdoc doc.Package
- if err := gob.NewDecoder(bytes.NewReader(p)).Decode(&pdoc); err != nil {
- return err
+
+ if p == nil {
+ continue
}
- pkgs, err := db.getSubdirs(c, pdoc.ImportPath, &pdoc)
+
+ p, err = snappy.Decode(nil, p)
if err != nil {
return err
}
- if err := f(&pdoc, pkgs); err != nil {
+
+ if err := gob.NewDecoder(bytes.NewReader(p)).Decode(&pi.PDoc); err != nil {
return err
}
- }
- return nil
-}
-
-// ProjectDo executes function f for each document in the database.
-func (db *Database) ProjectDo(f func(string, []*doc.Package) error) error {
- c := db.Pool.Get()
- defer c.Close()
-
- keys, err := redis.Values(c.Do("KEYS", "index:project:*"))
- if err != nil {
- return err
- }
-
- var pdocs []*doc.Package
- for _, key := range keys {
- projectRoot := string(key.([]byte)[len("index:project:"):])
- pkgs, err := db.Project(projectRoot)
+ pi.Pkgs, err = db.getSubdirs(c, pi.PDoc.ImportPath, pi.PDoc)
if err != nil {
return err
}
- pdocs = pdocs[:0]
- for _, pkg := range pkgs {
- pdoc, _, err := db.getDoc(c, pkg.Path)
- if err != nil {
- return err
- }
- pdocs = append(pdocs, pdoc)
- }
- if err := f(projectRoot, pdocs); err != nil {
+ if err := f(&pi); err != nil {
return err
}
}
View
8 database/index.go
@@ -92,10 +92,16 @@ func documentTerms(pdoc *doc.Package, rank float64) []string {
}
func documentRank(pdoc *doc.Package) float64 {
- if pdoc.Name == "" || pdoc.IsCmd || len(pdoc.Errors) > 0 {
+ if pdoc.Name == "" || pdoc.IsCmd || len(pdoc.Errors) > 0 || strings.HasSuffix(pdoc.ImportPath, ".go") {
return 0
}
+ for _, p := range pdoc.Imports {
+ if strings.HasSuffix(p, ".go") {
+ return 0
+ }
+ }
+
if !pdoc.Truncated &&
len(pdoc.Consts) == 0 &&
len(pdoc.Vars) == 0 &&
View
41 gddo-admin/delete.go
@@ -0,0 +1,41 @@
+// Copyright 2013 Gary Burd
+//
+// Licensed under the Apache License, Version 2.0 (the "License"): you may
+// not use this file except in compliance with the License. You may obtain
+// a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+
+package main
+
+import (
+ "github.com/garyburd/gopkgdoc/database"
+ "log"
+ "os"
+)
+
+var deleteCommand = &command{
+ name: "delete",
+ run: del,
+ usage: "delete path",
+}
+
+func del(c *command) {
+ if len(c.flag.Args()) != 1 {
+ c.printUsage()
+ os.Exit(1)
+ }
+ db, err := database.New()
+ if err != nil {
+ log.Fatal(err)
+ }
+ if err := db.Delete(c.flag.Args()[0]); err != nil {
+ log.Fatal(err)
+ }
+}
View
2  gddo-admin/main.go
@@ -37,6 +37,8 @@ func (c *command) printUsage() {
var commands = []*command{
blockCommand,
reindexCommand,
+ pruneCommand,
+ deleteCommand,
}
func printUsage() {
View
170 gddo-admin/prune.go
@@ -0,0 +1,170 @@
+// Copyright 2013 Gary Burd
+//
+// Licensed under the Apache License, Version 2.0 (the "License"): you may
+// not use this file except in compliance with the License. You may obtain
+// a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+
+package main
+
+import (
+ "github.com/garyburd/gopkgdoc/database"
+ "github.com/garyburd/gopkgdoc/doc"
+ "log"
+ "os"
+ "strings"
+)
+
+func jaccardIndex(a, b *doc.Package) float64 {
+ set := make(map[string]int)
+ for i, pdoc := range []*doc.Package{a, b} {
+ mask := 1 << uint(i)
+ for _, f := range pdoc.Files {
+ set[f.Name] |= mask
+ }
+ for _, p := range pdoc.Imports {
+ set[p] |= mask
+ }
+ for _, p := range pdoc.TestImports {
+ set[p] |= mask
+ }
+ for _, f := range pdoc.Funcs {
+ set[f.Name] |= mask
+ }
+ for _, t := range pdoc.Types {
+ set[t.Name] |= mask
+ for _, f := range t.Funcs {
+ set[f.Name] |= mask
+ }
+ for _, f := range t.Methods {
+ set[f.Recv+"."+f.Name] |= mask
+ }
+ }
+ }
+ n := 0
+ for _, bits := range set {
+ if bits == 3 {
+ n += 1
+ }
+ }
+ return float64(n) / float64(len(set))
+}
+
+var (
+ pruneCommand = &command{
+ name: "prune",
+ usage: "prune",
+ }
+ pruneDryRun = pruneCommand.flag.Bool("n", false, "Dry run.")
+)
+
+func init() {
+ pruneCommand.run = prune
+}
+
+func prune(c *command) {
+ if len(c.flag.Args()) != 0 {
+ c.printUsage()
+ os.Exit(1)
+ }
+ db, err := database.New()
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ paths := make(map[string]bool)
+
+ err = db.Do(func(pi *database.PackageInfo) error {
+ pdoc := pi.PDoc
+ if pdoc.ProjectRoot == "" {
+ return nil
+ }
+
+ i := strings.LastIndex(pdoc.ImportPath, "/")
+ if i < 0 {
+ return nil
+ }
+ suffix := pdoc.ImportPath[i:]
+
+ imports := make(map[string]bool)
+ for _, p := range pdoc.Imports {
+ imports[p] = true
+ }
+
+ pathLists := [][]string{pdoc.TestImports, pdoc.XTestImports, pdoc.References}
+ if pdoc.ProjectRoot != pdoc.ImportPath {
+ if pdocRoot, _, _ := db.GetDoc(pdoc.ProjectRoot); pdocRoot != nil {
+ pathLists = append(pathLists, pdocRoot.References)
+ }
+ }
+
+ fork := ""
+
+ forkCheck:
+ for _, list := range pathLists {
+ for _, p := range list {
+ if p != pdoc.ImportPath && strings.HasSuffix(p, suffix) && !imports[p] {
+ pdocTest, _, _ := db.GetDoc(p)
+ if pdocTest != nil && pdocTest.Name == pdoc.Name && jaccardIndex(pdocTest, pdoc) > 0.75 {
+ fork = pdocTest.ImportPath
+ break forkCheck
+ }
+ }
+ }
+ }
+
+ if fork != "" {
+ log.Printf("%s is fork of %s", pdoc.ImportPath, fork)
+ if !*pruneDryRun {
+ for _, pkg := range pi.Pkgs {
+ if err := db.Delete(pkg.Path); err != nil {
+ log.Printf("Error deleting %s, %v", pkg.Path, err)
+ }
+ }
+ if err := db.Delete(pdoc.ImportPath); err != nil {
+ log.Printf("Error deleting %s, %v", pdoc.ImportPath, err)
+ }
+ }
+ } else {
+ keep := pi.Rank > 0
+ if pdoc.IsCmd && pdoc.Synopsis != "" && len(pdoc.Doc) > len(pdoc.Synopsis) {
+ // Keep a command if there's actually some documentation.
+ keep = true
+ }
+ p := pdoc.ImportPath
+ for {
+ paths[p] = paths[p] || keep
+ if len(p) <= len(pdoc.ProjectRoot) {
+ break
+ } else if i := strings.LastIndex(p, "/"); i < 0 {
+ break
+ } else {
+ p = p[:i]
+ }
+ }
+ }
+ return nil
+ })
+
+ for p, keep := range paths {
+ if !keep {
+ log.Printf("%s has rank 0", p)
+ if !*pruneDryRun {
+ if err := db.Delete(p); err != nil {
+ log.Printf("Error deleting %s, %v", p, err)
+ }
+ }
+ }
+ }
+
+ if err != nil {
+ log.Fatal(err)
+ }
+}
View
5 gddo-admin/reindex.go
@@ -16,7 +16,6 @@ package main
import (
"github.com/garyburd/gopkgdoc/database"
- "github.com/garyburd/gopkgdoc/doc"
"log"
"os"
)
@@ -37,9 +36,9 @@ func reindex(c *command) {
log.Fatal(err)
}
var n int
- err = db.Do(func(pdoc *doc.Package, pkgs []database.Package) error {
+ err = db.Do(func(pi *database.PackageInfo) error {
n += 1
- return db.Put(pdoc)
+ return db.Put(pi.PDoc)
})
if err != nil {
log.Fatal(err)
Please sign in to comment.
Something went wrong with that request. Please try again.