Skip to content

Commit e12f28e

Browse files
committed
[FAB-13238] handle container exit during launch
When a chaincode container has successfully started, it is possible for the chaincode inside the container to fail before registration is complete. Without this change, the container termination is not observed by the peer and so, instead of failing quickly, the launch must time out. This commit watches for container termination during launch and will report container exit before registration. This results in better failure semantics and easier diagnostics. Change-Id: If3071ee9e406b55a54d4d49e69dac9796e565e5d Signed-off-by: Matthew Sykes <sykesmat@us.ibm.com>
1 parent 1661082 commit e12f28e

14 files changed

+498
-14
lines changed

core/chaincode/chaincode_support.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
type Runtime interface {
2828
Start(ccci *ccprovider.ChaincodeContainerInfo, codePackage []byte) error
2929
Stop(ccci *ccprovider.ChaincodeContainerInfo) error
30+
Wait(ccci *ccprovider.ChaincodeContainerInfo) (int, error)
3031
}
3132

3233
// Launcher is used to launch chaincode runtimes.

core/chaincode/container_runtime.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,33 @@ func (c *ContainerRuntime) Stop(ccci *ccprovider.ChaincodeContainerInfo) error {
9999
return nil
100100
}
101101

102+
// Wait waits for the container runtime to terminate.
103+
func (c *ContainerRuntime) Wait(ccci *ccprovider.ChaincodeContainerInfo) (int, error) {
104+
type result struct {
105+
exitCode int
106+
err error
107+
}
108+
109+
resultCh := make(chan result, 1)
110+
wcr := container.WaitContainerReq{
111+
CCID: ccintf.CCID{
112+
Name: ccci.Name,
113+
Version: ccci.Version,
114+
},
115+
Exited: func(exitCode int, err error) {
116+
resultCh <- result{exitCode: exitCode, err: err}
117+
close(resultCh)
118+
},
119+
}
120+
121+
if err := c.Processor.Process(ccci.ContainerType, wcr); err != nil {
122+
return -1, err
123+
}
124+
r := <-resultCh
125+
126+
return r.exitCode, r.err
127+
}
128+
102129
const (
103130
// Mutual TLS auth client key and cert paths in the chaincode container
104131
TLSClientKeyPath string = "/etc/hyperledger/fabric/client.key"

core/chaincode/container_runtime_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,37 @@ func TestContainerRuntimeStopErrors(t *testing.T) {
280280
assert.NoError(t, err)
281281
}
282282
}
283+
284+
func TestContainerRuntimeWait(t *testing.T) {
285+
fakeProcessor := &mock.Processor{}
286+
fakeProcessor.ProcessStub = func(containerType string, req container.VMCReq) error {
287+
waitReq := req.(container.WaitContainerReq)
288+
waitReq.Exited(0, nil)
289+
return nil
290+
}
291+
cr := &chaincode.ContainerRuntime{
292+
Processor: fakeProcessor,
293+
}
294+
295+
ccci := &ccprovider.ChaincodeContainerInfo{
296+
Type: pb.ChaincodeSpec_GOLANG.String(),
297+
Name: "chaincode-id-name",
298+
Version: "chaincode-version",
299+
ContainerType: "container-type",
300+
}
301+
302+
exitCode, err := cr.Wait(ccci)
303+
assert.NoError(t, err)
304+
assert.Equal(t, 0, exitCode)
305+
306+
assert.Equal(t, 1, fakeProcessor.ProcessCallCount())
307+
vmType, req := fakeProcessor.ProcessArgsForCall(0)
308+
assert.Equal(t, vmType, "container-type")
309+
waitReq, ok := req.(container.WaitContainerReq)
310+
assert.True(t, ok)
311+
assert.Equal(t, ccintf.CCID{Name: "chaincode-id-name", Version: "chaincode-version"}, waitReq.CCID)
312+
313+
fakeProcessor.ProcessReturns(errors.New("moles-and-trolls"))
314+
_, err = cr.Wait(ccci)
315+
assert.EqualError(t, err, "moles-and-trolls")
316+
}

core/chaincode/mock/runtime.go

Lines changed: 78 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/chaincode/runtime_launcher.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ func (r *RuntimeLauncher) Launch(ccci *ccprovider.ChaincodeContainerInfo) error
4141

4242
startTime := time.Now()
4343
cname := ccci.Name + ":" + ccci.Version
44-
launchState, started := r.Registry.Launching(cname)
45-
if !started {
44+
launchState, alreadyStarted := r.Registry.Launching(cname)
45+
if !alreadyStarted {
4646
startFailCh = make(chan error, 1)
4747
timeoutCh = time.NewTimer(r.StartupTimeout).C
4848

@@ -54,7 +54,13 @@ func (r *RuntimeLauncher) Launch(ccci *ccprovider.ChaincodeContainerInfo) error
5454
go func() {
5555
if err := r.Runtime.Start(ccci, codePackage); err != nil {
5656
startFailCh <- errors.WithMessage(err, "error starting container")
57+
return
5758
}
59+
exitCode, err := r.Runtime.Wait(ccci)
60+
if err != nil {
61+
launchState.Notify(errors.Wrap(err, "failed to wait on container exit"))
62+
}
63+
launchState.Notify(errors.Errorf("container exited with %d", exitCode))
5864
}()
5965
}
6066

@@ -64,19 +70,15 @@ func (r *RuntimeLauncher) Launch(ccci *ccprovider.ChaincodeContainerInfo) error
6470
err = errors.WithMessage(launchState.Err(), "chaincode registration failed")
6571
case err = <-startFailCh:
6672
launchState.Notify(err)
67-
r.Metrics.LaunchFailures.With(
68-
"chaincode", cname,
69-
).Add(1)
73+
r.Metrics.LaunchFailures.With("chaincode", cname).Add(1)
7074
case <-timeoutCh:
7175
err = errors.Errorf("timeout expired while starting chaincode %s for transaction", cname)
7276
launchState.Notify(err)
73-
r.Metrics.LaunchTimeouts.With(
74-
"chaincode", cname,
75-
).Add(1)
77+
r.Metrics.LaunchTimeouts.With("chaincode", cname).Add(1)
7678
}
7779

7880
success := true
79-
if err != nil && !started {
81+
if err != nil && !alreadyStarted {
8082
success = false
8183
chaincodeLogger.Debugf("stopping due to error while launching: %+v", err)
8284
defer r.Registry.Deregister(cname)

core/chaincode/runtime_launcher_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ var _ = Describe("RuntimeLauncher", func() {
2828
fakeLaunchDuration *metricsfakes.Histogram
2929
fakeLaunchFailures *metricsfakes.Counter
3030
fakeLaunchTimeouts *metricsfakes.Counter
31+
exitedCh chan int
3132

3233
ccci *ccprovider.ChaincodeContainerInfo
3334

@@ -44,6 +45,11 @@ var _ = Describe("RuntimeLauncher", func() {
4445
launchState.Notify(nil)
4546
return nil
4647
}
48+
exitedCh = make(chan int)
49+
waitExitCh := exitedCh // shadow to avoid race
50+
fakeRuntime.WaitStub = func(*ccprovider.ChaincodeContainerInfo) (int, error) {
51+
return <-waitExitCh, nil
52+
}
4753

4854
fakePackageProvider = &mock.PackageProvider{}
4955
fakePackageProvider.GetChaincodeCodePackageReturns([]byte("code-package"), nil)
@@ -77,6 +83,10 @@ var _ = Describe("RuntimeLauncher", func() {
7783
}
7884
})
7985

86+
AfterEach(func() {
87+
close(exitedCh)
88+
})
89+
8090
It("registers the chaincode as launching", func() {
8191
err := runtimeLauncher.Launch(ccci)
8292
Expect(err).NotTo(HaveOccurred())
@@ -172,6 +182,34 @@ var _ = Describe("RuntimeLauncher", func() {
172182
})
173183
})
174184

185+
Context("when the contaienr terminates before registration", func() {
186+
BeforeEach(func() {
187+
fakeRuntime.StartReturns(nil)
188+
fakeRuntime.WaitReturns(-99, nil)
189+
})
190+
191+
It("returns an error", func() {
192+
err := runtimeLauncher.Launch(ccci)
193+
Expect(err).To(MatchError("chaincode registration failed: container exited with -99"))
194+
})
195+
196+
It("attempts to stop the runtime", func() {
197+
runtimeLauncher.Launch(ccci)
198+
199+
Expect(fakeRuntime.StopCallCount()).To(Equal(1))
200+
ccciArg := fakeRuntime.StopArgsForCall(0)
201+
Expect(ccciArg).To(Equal(ccci))
202+
})
203+
204+
It("deregisters the chaincode", func() {
205+
runtimeLauncher.Launch(ccci)
206+
207+
Expect(fakeRegistry.DeregisterCallCount()).To(Equal(1))
208+
cname := fakeRegistry.DeregisterArgsForCall(0)
209+
Expect(cname).To(Equal("chaincode-name:chaincode-version"))
210+
})
211+
})
212+
175213
Context("when handler registration fails", func() {
176214
BeforeEach(func() {
177215
fakeRuntime.StartStub = func(*ccprovider.ChaincodeContainerInfo, []byte) error {

core/container/controller.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type Builder interface {
3030
type VM interface {
3131
Start(ccid ccintf.CCID, args []string, env []string, filesToUpload map[string][]byte, builder Builder) error
3232
Stop(ccid ccintf.CCID, timeout uint, dontkill bool, dontremove bool) error
33+
Wait(ccid ccintf.CCID) (int, error)
3334
HealthCheck(context.Context) error
3435
}
3536

@@ -172,6 +173,31 @@ func (si StopContainerReq) GetCCID() ccintf.CCID {
172173
return si.CCID
173174
}
174175

176+
//go:generate counterfeiter -o mock/exitedfunc.go --fake-name ExitedFunc ExitedFunc
177+
178+
// ExitedFunc is the prototype for the function called when a container exits.
179+
type ExitedFunc func(exitCode int, err error)
180+
181+
// WaitContainerReq provides the chaincode ID of the container to wait on and a
182+
// callback to call upon chaincode termination.
183+
type WaitContainerReq struct {
184+
CCID ccintf.CCID
185+
Exited ExitedFunc
186+
}
187+
188+
func (w WaitContainerReq) Do(v VM) error {
189+
exited := w.Exited
190+
go func() {
191+
exitCode, err := v.Wait(w.CCID)
192+
exited(exitCode, err)
193+
}()
194+
return nil
195+
}
196+
197+
func (w WaitContainerReq) GetCCID() ccintf.CCID {
198+
return w.CCID
199+
}
200+
175201
func (vmc *VMController) Process(vmtype string, req VMCReq) error {
176202
v := vmc.newVM(vmtype)
177203
ccid := req.GetCCID()

core/container/controller_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@ import (
1313
"github.com/hyperledger/fabric/core/chaincode/platforms"
1414
"github.com/hyperledger/fabric/core/chaincode/platforms/golang"
1515
"github.com/hyperledger/fabric/core/container"
16+
"github.com/hyperledger/fabric/core/container/ccintf"
17+
"github.com/hyperledger/fabric/core/container/mock"
1618
pb "github.com/hyperledger/fabric/protos/peer"
19+
. "github.com/onsi/gomega"
20+
"github.com/pkg/errors"
1721
"github.com/stretchr/testify/assert"
1822
)
1923

@@ -32,3 +36,32 @@ func TestVM_GetChaincodePackageBytes(t *testing.T) {
3236
assert.Error(t, err,
3337
"GetChaincodePackageBytes did not return error when chaincode ID is nil")
3438
}
39+
40+
func TestWaitContainerReq(t *testing.T) {
41+
gt := NewGomegaWithT(t)
42+
43+
exited := &mock.ExitedFunc{}
44+
done := make(chan struct{})
45+
exited.Stub = func(int, error) { close(done) }
46+
47+
req := container.WaitContainerReq{
48+
CCID: ccintf.CCID{Name: "the-name", Version: "the-version"},
49+
Exited: exited.Spy,
50+
}
51+
gt.Expect(req.GetCCID()).To(Equal(ccintf.CCID{Name: "the-name", Version: "the-version"}))
52+
53+
fakeVM := &mock.VM{}
54+
fakeVM.WaitReturns(99, errors.New("boing-boing"))
55+
56+
err := req.Do(fakeVM)
57+
gt.Expect(err).NotTo(HaveOccurred())
58+
gt.Eventually(done).Should(BeClosed())
59+
60+
gt.Expect(fakeVM.WaitCallCount()).To(Equal(1))
61+
ccid := fakeVM.WaitArgsForCall(0)
62+
gt.Expect(ccid).To(Equal(ccintf.CCID{Name: "the-name", Version: "the-version"}))
63+
64+
ec, exitErr := exited.ArgsForCall(0)
65+
gt.Expect(ec).To(Equal(99))
66+
gt.Expect(exitErr).To(MatchError("boing-boing"))
67+
}

0 commit comments

Comments
 (0)