Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

download boltdb files parallelly during reads #2483

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 76 additions & 6 deletions pkg/storage/stores/shipper/downloads/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ import (
)

// timeout for downloading initial files for a table to avoid leaking resources by allowing it to take all the time.
const downloadTimeout = 5 * time.Minute
const (
downloadTimeout = 5 * time.Minute
downloadParallelism = 50
)

type BoltDBIndexClient interface {
QueryDB(ctx context.Context, db *bbolt.DB, query chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error
Expand Down Expand Up @@ -123,6 +126,13 @@ func (t *Table) init(ctx context.Context) (err error) {
return
}

// download the dbs parallelly
err = t.doParallelDownload(ctx, objects, folderPath)
if err != nil {
return err
}

// open all the downloaded dbs
for _, object := range objects {
dbName, err := getDBNameFromObjectKey(object.Key)
if err != nil {
Expand All @@ -132,11 +142,6 @@ func (t *Table) init(ctx context.Context) (err error) {
filePath := path.Join(folderPath, dbName)
df := downloadedFile{}

err = t.getFileFromStorage(ctx, object.Key, filePath)
if err != nil {
return err
}

df.mtime = object.ModifiedAt
df.boltdb, err = local.OpenBoltdbFile(filePath)
if err != nil {
Expand Down Expand Up @@ -413,3 +418,68 @@ func getDBNameFromObjectKey(objectKey string) (string, error) {
}
return ss[1], nil
}

// doParallelDownload downloads objects(dbs) parallelly. It is upto the caller to open the dbs after the download finishes successfully.
func (t *Table) doParallelDownload(ctx context.Context, objects []chunk.StorageObject, folderPathForTable string) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()

queue := make(chan chunk.StorageObject)
n := util.Min(len(objects), downloadParallelism)
incomingErrors := make(chan error)

// Run n parallel goroutines fetching objects to download from the queue
for i := 0; i < n; i++ {
go func() {
// when there is an error, break the loop and send the error to the channel to stop the operation.
var err error
for {
object, ok := <-queue
if !ok {
break
}

var dbName string
dbName, err = getDBNameFromObjectKey(object.Key)
if err != nil {
break
}

filePath := path.Join(folderPathForTable, dbName)
err = t.getFileFromStorage(ctx, object.Key, filePath)
if err != nil {
break
}
}

incomingErrors <- err
return
}()
}

// Send all the objects to download into the queue
go func() {
for _, object := range objects {
select {
case queue <- object:
case <-ctx.Done():
break
}

}
close(queue)
}()

// receive all the errors which also lets us make sure all the goroutines have stopped.
var firstErr error
for i := 0; i < n; i++ {
err := <-incomingErrors
if err != nil && firstErr == nil {
// cancel the download operation in case of error.
cancel()
firstErr = err
}
}

return firstErr
}
46 changes: 41 additions & 5 deletions pkg/storage/stores/shipper/downloads/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package downloads

import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
Expand Down Expand Up @@ -35,11 +36,11 @@ func buildTestClients(t *testing.T, path string) (*local.BoltIndexClient, *local
return boltDBIndexClient, fsObjectClient
}

func buildTestTable(t *testing.T, path string) (*Table, *local.BoltIndexClient, stopFunc) {
func buildTestTable(t *testing.T, tableName, path string) (*Table, *local.BoltIndexClient, stopFunc) {
boltDBIndexClient, fsObjectClient := buildTestClients(t, path)
cachePath := filepath.Join(path, cacheDirName)

table := NewTable("test", cachePath, fsObjectClient, boltDBIndexClient, newMetrics(nil))
table := NewTable(tableName, cachePath, fsObjectClient, boltDBIndexClient, newMetrics(nil))

// wait for either table to get ready or a timeout hits
select {
Expand Down Expand Up @@ -84,7 +85,7 @@ func TestTable_Query(t *testing.T) {

testutil.SetupDBTablesAtPath(t, "test", objectStoragePath, testDBs)

table, _, stopFunc := buildTestTable(t, tempDir)
table, _, stopFunc := buildTestTable(t, "test", tempDir)
defer func() {
stopFunc()
}()
Expand Down Expand Up @@ -129,7 +130,7 @@ func TestTable_Sync(t *testing.T) {
testutil.SetupDBTablesAtPath(t, tableName, objectStoragePath, testDBs)

// create table instance
table, boltdbClient, stopFunc := buildTestTable(t, tempDir)
table, boltdbClient, stopFunc := buildTestTable(t, "test", tempDir)
defer func() {
stopFunc()
}()
Expand Down Expand Up @@ -172,7 +173,7 @@ func TestTable_LastUsedAt(t *testing.T) {
tempDir, err := ioutil.TempDir("", "table-writes")
require.NoError(t, err)

table, _, stopFunc := buildTestTable(t, tempDir)
table, _, stopFunc := buildTestTable(t, "test", tempDir)
defer func() {
stopFunc()
require.NoError(t, os.RemoveAll(tempDir))
Expand All @@ -194,3 +195,38 @@ func TestTable_LastUsedAt(t *testing.T) {
// check whether last used at got update to now.
require.InDelta(t, time.Now().Unix(), table.LastUsedAt().Unix(), 1)
}

func TestTable_doParallelDownload(t *testing.T) {
tempDir, err := ioutil.TempDir("", "table-parallel-download")
require.NoError(t, err)

defer func() {
require.NoError(t, os.RemoveAll(tempDir))
}()

objectStoragePath := filepath.Join(tempDir, objectsStorageDirName)

for _, tc := range []int{0, 10, downloadParallelism, downloadParallelism * 2} {
t.Run(fmt.Sprintf("%d dbs", tc), func(t *testing.T) {
testDBs := map[string]testutil.DBRecords{}

for i := 0; i < tc; i++ {
testDBs[fmt.Sprint(i)] = testutil.DBRecords{
Start: i * 10,
NumRecords: 10,
}
}

testutil.SetupDBTablesAtPath(t, fmt.Sprint(tc), objectStoragePath, testDBs)

table, _, stopFunc := buildTestTable(t, fmt.Sprint(tc), tempDir)
defer func() {
stopFunc()
}()

// ensure that we have `tc` number of files downloaded and opened.
require.Len(t, table.dbs, tc)
testutil.TestSingleQuery(t, chunk.IndexQuery{}, table, 0, tc*10)
})
}
}