Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix:proving:post check sector handles snap deals replica faults #8177

Merged
merged 1 commit into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/api_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ type StorageMiner interface {
// the path specified when calling CreateBackup is within the base path
CreateBackup(ctx context.Context, fpath string) error //perm:admin

CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, expensive bool) (map[abi.SectorNumber]string, error) //perm:admin
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, expensive bool) (map[abi.SectorNumber]string, error) //perm:admin

ComputeProof(ctx context.Context, ssi []builtin.ExtendedSectorInfo, rand abi.PoStRandomness, poStEpoch abi.ChainEpoch, nv abinetwork.Version) ([]builtin.PoStProof, error) //perm:read
}
Expand Down
8 changes: 4 additions & 4 deletions api/proxy_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion api/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ var (
FullAPIVersion0 = newVer(1, 5, 0)
FullAPIVersion1 = newVer(2, 2, 0)

MinerAPIVersion0 = newVer(1, 3, 0)
MinerAPIVersion0 = newVer(1, 4, 0)
WorkerAPIVersion0 = newVer(1, 5, 0)
)

Expand Down
Binary file modified build/openrpc/miner.json.gz
Binary file not shown.
4 changes: 3 additions & 1 deletion cmd/lotus-miner/proving.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ var provingCheckProvableCmd = &cli.Command{
}

var tocheck []storage.SectorRef
var update []bool
for _, info := range sectorInfos {
si := abi.SectorID{
Miner: abi.ActorID(mid),
Expand All @@ -454,9 +455,10 @@ var provingCheckProvableCmd = &cli.Command{
ProofType: info.SealProof,
ID: si,
})
update = append(update, info.SectorKeyCID != nil)
}

bad, err := sapi.CheckProvable(ctx, info.WindowPoStProofType, tocheck, cctx.Bool("slow"))
bad, err := sapi.CheckProvable(ctx, info.WindowPoStProofType, tocheck, update, cctx.Bool("slow"))
if err != nil {
return err
}
Expand Down
3 changes: 3 additions & 0 deletions documentation/en/api-v0-methods-miner.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ Inputs:
"ProofType": 8
}
],
[
true
],
true
]
```
Expand Down
96 changes: 50 additions & 46 deletions extern/sector-storage/faults.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ import (

// FaultTracker TODO: Track things more actively
type FaultTracker interface {
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error)
CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error)
}

// CheckProvable returns unprovable sectors
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
var bad = make(map[abi.SectorID]string)

ssize, err := pp.SectorSize()
Expand All @@ -32,72 +32,76 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
}

// TODO: More better checks
for _, sector := range sectors {
for i, sector := range sectors {
err := func() error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var fReplica string
var fCache string

locked, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTSealed|storiface.FTCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}

if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}

lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}

// temporary hack to make the check work with snapdeals
// will go away in https://github.com/filecoin-project/lotus/pull/7971
if lp.Sealed == "" || lp.Cache == "" {
// maybe it's update
if update[i] {
lockedUpdate, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}
if lockedUpdate {
lp, _, err = m.localStore.AcquireSector(ctx, sector, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
lp.Sealed, lp.Cache = lp.Update, lp.UpdateCache
if !lockedUpdate {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock on update replica", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}
lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTUpdate|storiface.FTUpdateCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector update replica in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
fReplica, fCache = lp.Update, lp.UpdateCache
} else {
locked, err := m.index.StorageTryLock(ctx, sector.ID, storiface.FTSealed|storiface.FTCache, storiface.FTNone)
if err != nil {
return xerrors.Errorf("acquiring sector lock: %w", err)
}

if !locked {
log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector)
bad[sector.ID] = fmt.Sprint("can't acquire read lock")
return nil
}

lp, _, err := m.localStore.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err)
bad[sector.ID] = fmt.Sprintf("acquire sector failed: %s", err)
return nil
}
fReplica, fCache = lp.Sealed, lp.Cache

}

if lp.Sealed == "" || lp.Cache == "" {
log.Warnw("CheckProvable Sector FAULT: cache and/or sealed paths not found", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache)
bad[sector.ID] = fmt.Sprintf("cache and/or sealed paths not found, cache %q, sealed %q", lp.Cache, lp.Sealed)
if fReplica == "" || fCache == "" {
log.Warnw("CheckProvable Sector FAULT: cache and/or sealed paths not found", "sector", sector, "sealed", fReplica, "cache", fCache)
bad[sector.ID] = fmt.Sprintf("cache and/or sealed paths not found, cache %q, sealed %q", fCache, fReplica)
return nil
}

toCheck := map[string]int64{
lp.Sealed: 1,
filepath.Join(lp.Cache, "p_aux"): 0,
fReplica: 1,
filepath.Join(fCache, "p_aux"): 0,
}

addCachePathsForSectorSize(toCheck, lp.Cache, ssize)
addCachePathsForSectorSize(toCheck, fCache, ssize)

for p, sz := range toCheck {
st, err := os.Stat(p)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: sector file stat error", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "err", err)
log.Warnw("CheckProvable Sector FAULT: sector file stat error", "sector", sector, "sealed", fReplica, "cache", fCache, "file", p, "err", err)
bad[sector.ID] = fmt.Sprintf("%s", err)
return nil
}

if sz != 0 {
if st.Size() != int64(ssize)*sz {
log.Warnw("CheckProvable Sector FAULT: sector file is wrong size", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "file", p, "size", st.Size(), "expectSize", int64(ssize)*sz)
log.Warnw("CheckProvable Sector FAULT: sector file is wrong size", "sector", sector, "sealed", fReplica, "cache", fCache, "file", p, "size", st.Size(), "expectSize", int64(ssize)*sz)
bad[sector.ID] = fmt.Sprintf("%s is wrong size (got %d, expect %d)", p, st.Size(), int64(ssize)*sz)
return nil
}
Expand All @@ -118,14 +122,14 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
sector.ID.Number,
})
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("generating fallback challenges: %s", err)
return nil
}

commr, err := rg(ctx, sector.ID)
if err != nil {
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("getting commR: %s", err)
return nil
}
Expand All @@ -136,12 +140,12 @@ func (m *Manager) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof,
SectorNumber: sector.ID.Number,
SealedCID: commr,
},
CacheDirPath: lp.Cache,
CacheDirPath: fCache,
PoStProofType: wpp,
SealedSectorPath: lp.Sealed,
SealedSectorPath: fReplica,
}, ch.Challenges[sector.ID.Number])
if err != nil {
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "sealed", lp.Sealed, "cache", lp.Cache, "err", err)
log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "sealed", fReplica, "cache", fCache, "err", err)
bad[sector.ID] = fmt.Sprintf("generating vanilla proof: %s", err)
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion extern/sector-storage/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ func (mgr *SectorMgr) Remove(ctx context.Context, sector storage.SectorRef) erro
return nil
}

func (mgr *SectorMgr) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, ids []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (mgr *SectorMgr) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, ids []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
bad := map[abi.SectorID]string{}

for _, sid := range ids {
Expand Down
4 changes: 2 additions & 2 deletions node/impl/storminer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,7 @@ func (sm *StorageMinerAPI) CreateBackup(ctx context.Context, fpath string) error
return backup(ctx, sm.DS, fpath)
}

func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []sto.SectorRef, expensive bool) (map[abi.SectorNumber]string, error) {
func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []sto.SectorRef, update []bool, expensive bool) (map[abi.SectorNumber]string, error) {
var rg storiface.RGetter
if expensive {
rg = func(ctx context.Context, id abi.SectorID) (cid.Cid, error) {
Expand All @@ -1143,7 +1143,7 @@ func (sm *StorageMinerAPI) CheckProvable(ctx context.Context, pp abi.RegisteredP
}
}

bad, err := sm.StorageMgr.CheckProvable(ctx, pp, sectors, rg)
bad, err := sm.StorageMgr.CheckProvable(ctx, pp, sectors, update, rg)
if err != nil {
return nil, err
}
Expand Down
4 changes: 3 additions & 1 deletion storage/wdpost_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B

sectors := make(map[abi.SectorNumber]struct{})
var tocheck []storage.SectorRef
var update []bool
for _, info := range sectorInfos {
sectors[info.SectorNumber] = struct{}{}
tocheck = append(tocheck, storage.SectorRef{
Expand All @@ -215,9 +216,10 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B
Number: info.SectorNumber,
},
})
update = append(update, info.SectorKeyCID != nil)
}

bad, err := s.faultTracker.CheckProvable(ctx, s.proofType, tocheck, nil)
bad, err := s.faultTracker.CheckProvable(ctx, s.proofType, tocheck, update, nil)
if err != nil {
return bitfield.BitField{}, xerrors.Errorf("checking provable sectors: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion storage/wdpost_run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func (m mockVerif) GenerateWinningPoStSectorChallenge(context.Context, abi.Regis
type mockFaultTracker struct {
}

func (m mockFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) {
func (m mockFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storage.SectorRef, update []bool, rg storiface.RGetter) (map[abi.SectorID]string, error) {
// Returns "bad" sectors so just return empty map meaning all sectors are good
return map[abi.SectorID]string{}, nil
}
Expand Down