Skip to content

Commit

Permalink
Remove index management (over manual management), update index docume…
Browse files Browse the repository at this point in the history
…ntation for ES7.

* Add term vectors to allow large content fields.
* Add MinHash/fingerprint to content.
* Default query fields, prevents “Field expansion matches too many fields”.
  • Loading branch information
dokterbob committed Sep 2, 2020
1 parent fd55b07 commit fe00cae
Show file tree
Hide file tree
Showing 18 changed files with 42,789 additions and 1,222 deletions.
28 changes: 3 additions & 25 deletions config/indexes.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package config

import (
"encoding/json"
"github.com/ipfs-search/ipfs-search/index/elasticsearch"
)

Expand All @@ -10,36 +9,15 @@ type Indexes map[string]*elasticsearch.Config

// IndexesDefaults returns the default indexes.
func IndexesDefaults() Indexes {
var indexSettings, fileMapping, dirMapping, invalidMapping map[string]interface{}

if err := json.Unmarshal([]byte(indexSettingsJSON), &indexSettings); err != nil {
panic(err)
}
if err := json.Unmarshal([]byte(fileMappingJSON), &fileMapping); err != nil {
panic(err)
}
if err := json.Unmarshal([]byte(dirMappingJSON), &dirMapping); err != nil {
panic(err)
}
if err := json.Unmarshal([]byte(invalidMappingJSON), &invalidMapping); err != nil {
panic(err)
}

return Indexes{
"files": &elasticsearch.Config{
Name: "ipfs_files_v0",
Settings: indexSettings,
Mapping: fileMapping,
Name: "ipfs_files",
},
"directories": &elasticsearch.Config{
Name: "ipfs_directories_v0",
Settings: indexSettings,
Mapping: dirMapping,
Name: "ipfs_directories",
},
"invalids": &elasticsearch.Config{
Name: "ipfs_invalids_v0",
Settings: indexSettings,
Mapping: invalidMapping,
Name: "ipfs_invalids",
},
}
}
231 changes: 0 additions & 231 deletions config/indexes_json.go

This file was deleted.

4 changes: 0 additions & 4 deletions crawler/factory/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ func New(ctx context.Context, config *Config, errc chan<- error) (*Factory, erro
log.Printf("Connected to ElasticSearch.")

indexes := elasticsearch.NewMulti(es, config.Indexes["files"], config.Indexes["directories"], config.Indexes["invalids"])
if err := index.EnsureExistsAndUpdatedMulti(ctx, indexes...); err != nil {
return nil, err
}
log.Printf("Index configuration up to date.")

return &Factory{
crawlerConfig: config.CrawlerConfig,
Expand Down
File renamed without changes.
60 changes: 60 additions & 0 deletions docs/indices/directories.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"settings": {
"index": {
"refresh_interval": "15m",
"number_of_shards": "20"
}
},
"mappings": {
"dynamic": "strict",
"properties": {
"first-seen": {
"type": "date",
"format": "date_time_no_millis"
},
"last-seen": {
"type": "date",
"format": "date_time_no_millis"
},
"links": {
"dynamic": true,
"properties": {
"Hash": {
"type": "keyword",
"index": true
},
"Name": {
"type": "text"
},
"Size": {
"type": "long",
"ignore_malformed": true
},
"Type": {
"type": "keyword"
}
}
},
"size": {
"type": "long",
"ignore_malformed": true
},
"references": {
"properties": {
"name": {
"type": "text",
"index": true
},
"hash": {
"type": "keyword",
"index": true
},
"parent_hash": {
"type": "keyword",
"index": true
}
}
}
}
}
}
Loading

0 comments on commit fe00cae

Please sign in to comment.