Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gitextractor supports incremental collection #7319

Merged
merged 6 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/core/errors/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ var (
Forbidden = register(&Type{httpCode: http.StatusForbidden, meta: "forbidden"})
NotFound = register(&Type{httpCode: http.StatusNotFound, meta: "not-found"})
Conflict = register(&Type{httpCode: http.StatusConflict, meta: "internal"})
NotModified = register(&Type{httpCode: http.StatusNotModified, meta: "not-modified"})

//500+
Internal = register(&Type{httpCode: http.StatusInternalServerError, meta: "internal"})
Expand Down
2 changes: 1 addition & 1 deletion backend/core/models/blueprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (BlueprintScope) TableName() string {

type SyncPolicy struct {
SkipOnFail bool `json:"skipOnFail"`
FullSync bool `json:"fullSync"`
SkipCollectors bool `json:"skipCollectors"`
FullSync bool `json:"fullSync"`
TimeAfter *time.Time `json:"timeAfter"`
}
9 changes: 5 additions & 4 deletions backend/core/runner/run_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,13 @@ func RunPluginSubTasks(
}
subtaskFinsied := false
if !subtaskMeta.ForceRunOnResume {
sfc := errors.Must1(
basicRes.GetDal().Count(
if task.ID > 0 {
sfc := errors.Must1(basicRes.GetDal().Count(
dal.From(&models.Subtask{}), dal.Where("task_id = ? AND name = ? AND finished_at IS NOT NULL", task.ID, subtaskMeta.Name),
),
)
subtaskFinsied = sfc > 0
)
subtaskFinsied = sfc > 0
}
}
if subtaskFinsied {
logger.Info("subtask %s already finished previously", subtaskMeta.Name)
Expand Down
119 changes: 119 additions & 0 deletions backend/helpers/pluginhelper/api/collector_state_manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package api

import (
"time"

"github.com/apache/incubator-devlake/core/context"
"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/errors"
"github.com/apache/incubator-devlake/core/models"
)

// CollectorStateManager manages the state of the collector. It is used to determine whether
// the collector should run in incremental mode or full sync mode and what time range to collect.
type CollectorStateManager struct {
db dal.Dal
state *models.CollectorLatestState
syncPolicy *models.SyncPolicy
// IsIncremental indicates whether the collector should run in incremental mode or full sync mode
isIncremental bool
// Since is the start time of the time range to collect
since *time.Time
// Until is the end time of the time range to collect
until *time.Time
}

// NewCollectorStateManager create a new CollectorStateManager
func NewCollectorStateManager(basicRes context.BasicRes, syncPolicy *models.SyncPolicy, rawTable, rawParams string) (stateManager *CollectorStateManager, err errors.Error) {
// load sync policy and make sure it is not nil
if syncPolicy == nil {
syncPolicy = &models.SyncPolicy{}
}

// load the previous state from the database
db := basicRes.GetDal()
state := &models.CollectorLatestState{}
err = db.First(state, dal.Where(`raw_data_table = ? AND raw_data_params = ?`, rawTable, rawParams))
if err != nil {
if db.IsErrorNotFound(err) {
state = &models.CollectorLatestState{
RawDataTable: rawTable,
RawDataParams: rawParams,
}
err = nil
} else {
err = errors.Default.Wrap(err, "failed to load the previous collector state")
return
}
}

// fullsync by default
now := time.Now()
stateManager = &CollectorStateManager{
db: db,
state: state,
syncPolicy: syncPolicy,
isIncremental: false,
since: syncPolicy.TimeAfter,
until: &now,
}
// fallback to the previous timeAfter if no new value
if stateManager.since == nil {
stateManager.since = state.TimeAfter
}

// if fullsync is set or no previous success start time, we are in the full sync mode
if syncPolicy.FullSync || state.LatestSuccessStart == nil {
return
}

// if timeAfter is not set or NOT before the previous vaule, we are in the incremental mode
if syncPolicy.TimeAfter == nil || state.TimeAfter == nil || !syncPolicy.TimeAfter.Before(*state.TimeAfter) {
stateManager.isIncremental = true
stateManager.since = state.LatestSuccessStart
}

return
}

func (c *CollectorStateManager) IsIncremental() bool {
return c.isIncremental
}

func (c *CollectorStateManager) GetSince() *time.Time {
return c.since
}

func (c *CollectorStateManager) GetUntil() *time.Time {
return c.until
}

func (c *CollectorStateManager) Close() errors.Error {
// update timeAfter in the database only for fullsync mode
if !c.isIncremental {
// prefer non-nil value
if c.syncPolicy.TimeAfter != nil {
c.state.TimeAfter = c.syncPolicy.TimeAfter
}
}
// always update the latest success start time
c.state.LatestSuccessStart = c.until
return c.db.Update(c.state)
}
160 changes: 160 additions & 0 deletions backend/helpers/pluginhelper/api/collector_state_manager_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package api

import (
"testing"
"time"

"github.com/apache/incubator-devlake/core/errors"
"github.com/apache/incubator-devlake/core/models"
"github.com/apache/incubator-devlake/helpers/unithelper"
mockcontext "github.com/apache/incubator-devlake/mocks/core/context"
mockdal "github.com/apache/incubator-devlake/mocks/core/dal"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)

func TestCollectorStateManager(t *testing.T) {
time0 := errors.Must1(time.Parse(time.RFC3339, "2020-01-01T00:00:00Z"))
time1 := errors.Must1(time.Parse(time.RFC3339, "2021-01-01T00:00:00Z"))
time2 := errors.Must1(time.Parse(time.RFC3339, "2022-01-01T00:00:00Z"))
for _, tc := range []struct {
name string
state *models.CollectorLatestState
syncPolicy *models.SyncPolicy
expectedIsIncremental bool
expectedSince *time.Time
expectedNewStateTimeAfter *time.Time
}{
{
name: "syncPolicy has no timeAfter - First run",
state: &models.CollectorLatestState{LatestSuccessStart: nil},
syncPolicy: &models.SyncPolicy{TimeAfter: nil},
expectedIsIncremental: false,
expectedSince: nil,
expectedNewStateTimeAfter: nil,
},
{
name: "syncPolicy has no timeAfter - Second run",
state: &models.CollectorLatestState{LatestSuccessStart: &time1},
syncPolicy: &models.SyncPolicy{TimeAfter: nil},
expectedIsIncremental: true,
expectedSince: &time1,
expectedNewStateTimeAfter: nil,
},
{
name: "syncPolicy has no timeAfter - Third run with timeAfter specified",
state: &models.CollectorLatestState{LatestSuccessStart: &time1},
syncPolicy: &models.SyncPolicy{TimeAfter: &time1},
expectedIsIncremental: true,
expectedSince: &time1,
expectedNewStateTimeAfter: nil,
},
{
name: "syncPolicy has timeAfter - First run",
state: &models.CollectorLatestState{LatestSuccessStart: nil},
syncPolicy: &models.SyncPolicy{TimeAfter: &time1},
expectedIsIncremental: false,
expectedSince: &time1,
expectedNewStateTimeAfter: &time1,
},
{
name: "syncPolicy has timeAfter - Second run with a later timeAfter",
state: &models.CollectorLatestState{TimeAfter: &time1, LatestSuccessStart: &time2},
syncPolicy: &models.SyncPolicy{TimeAfter: &time2},
expectedIsIncremental: true,
expectedSince: &time2,
expectedNewStateTimeAfter: &time1,
},
{
name: "syncPolicy has timeAfter - Third run with a earlier timeAfter",
state: &models.CollectorLatestState{TimeAfter: &time1, LatestSuccessStart: &time1},
syncPolicy: &models.SyncPolicy{TimeAfter: &time0},
expectedIsIncremental: false,
expectedSince: &time0,
expectedNewStateTimeAfter: &time0,
},
{
name: "syncPolicy has timeAfter - Fourth run with a same timeAfter",
state: &models.CollectorLatestState{TimeAfter: &time1, LatestSuccessStart: &time2},
syncPolicy: &models.SyncPolicy{TimeAfter: &time1},
expectedIsIncremental: true,
expectedSince: &time2,
expectedNewStateTimeAfter: &time1,
},
{
name: "Full sync - with timeAfter",
state: &models.CollectorLatestState{TimeAfter: &time1, LatestSuccessStart: &time1},
syncPolicy: &models.SyncPolicy{FullSync: true},
expectedIsIncremental: false,
expectedSince: &time1,
expectedNewStateTimeAfter: &time1,
},
{
name: "Full sync - with newer timeAfter",
state: &models.CollectorLatestState{TimeAfter: &time1, LatestSuccessStart: &time1},
syncPolicy: &models.SyncPolicy{TimeAfter: &time2, FullSync: true},
expectedIsIncremental: false,
expectedSince: &time2,
expectedNewStateTimeAfter: &time2,
},
{
name: "Full sync - with older timeAfter",
state: &models.CollectorLatestState{TimeAfter: &time1, LatestSuccessStart: &time1},
syncPolicy: &models.SyncPolicy{TimeAfter: &time0, FullSync: true},
expectedIsIncremental: false,
expectedSince: &time0,
expectedNewStateTimeAfter: &time0,
},
{
name: "Full sync - without timeAfter",
state: &models.CollectorLatestState{TimeAfter: nil, LatestSuccessStart: &time1},
syncPolicy: &models.SyncPolicy{FullSync: true},
expectedIsIncremental: false,
expectedSince: nil,
expectedNewStateTimeAfter: nil,
},
} {
started := time.Now()
t.Run(tc.name, func(t *testing.T) {
mockBasicRes := newMockBasicRes(tc.state)
stateManager, err := NewCollectorStateManager(mockBasicRes, tc.syncPolicy, "table", "params")
assert.Nil(t, err)
assert.Equal(t, tc.expectedSince, stateManager.since)
assert.Equal(t, tc.expectedIsIncremental, stateManager.isIncremental)
assert.Nil(t, stateManager.Close())
assert.Equal(t, tc.expectedNewStateTimeAfter, stateManager.state.TimeAfter)
// LatestSuccessStart should be updated
assert.GreaterOrEqual(t, stateManager.state.LatestSuccessStart.Unix(), started.Unix())
// First and update should both be called once
mockBasicRes.AssertExpectations(t)
})
}
}

func newMockBasicRes(state *models.CollectorLatestState) *mockcontext.BasicRes {
// Refresh Global Variables and set the sql mock
return unithelper.DummyBasicRes(func(mockDal *mockdal.Dal) {
mockDal.On("First", mock.Anything, mock.Anything).Run(func(args mock.Arguments) {
dst := args.Get(0).(*models.CollectorLatestState)
*dst = *state
}).Return(nil).Once()
mockDal.On("Update", mock.Anything, mock.Anything).Return(nil).Once()
})
}
4 changes: 2 additions & 2 deletions backend/helpers/unithelper/dummy_baesres.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ func DummyBasicRes(callback func(mockDal *mockdal.Dal)) *mockcontext.BasicRes {
callback(mockDal)

mockRes.On("GetDal").Return(mockDal)
mockRes.On("GetLogger").Return(mockLog)
mockRes.On("GetConfig", mock.Anything).Return("")
mockRes.On("GetLogger").Return(mockLog).Maybe()
mockRes.On("GetConfig", mock.Anything).Return("").Maybe()
mockDal.On("AllTables").Return(nil, nil)
return mockRes
}
Loading
Loading