Skip to content

Commit

Permalink
[FABG-911] PKCS11 context resilience for errors
Browse files Browse the repository at this point in the history
- current logic validates session handles returned from pool or the ones
opened recently. this recovery logic kicks in if open session fails with
error in case of any technical or connectivity issues
- after this fix, context handle will not cause panic if recovery
attempts are exhausted, instead it will throw error so that recovery can
be retried periodically or later once systems are available.

Change-Id: Ie90914bb4c9011a66cebc705039f05c476b6a5c9
Signed-off-by: sudesh.shetty <sudesh.shetty@securekey.com>
  • Loading branch information
sudeshrshetty committed Sep 24, 2019
1 parent 5d7ae7a commit 4a6db41
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 55 deletions.
152 changes: 97 additions & 55 deletions pkg/core/cryptosuite/common/pkcs11/contextHandle.go
Expand Up @@ -82,18 +82,7 @@ func (handle *ContextHandle) OpenSession() (mPkcs11.SessionHandle, error) {
handle.lock.RLock()
defer handle.lock.RUnlock()

var session mPkcs11.SessionHandle
var err error
for i := 0; i < handle.opts.openSessionRetry; i++ {
session, err = handle.ctx.OpenSession(handle.slot, mPkcs11.CKF_SERIAL_SESSION|mPkcs11.CKF_RW_SESSION)
if err != nil {
logger.Warnf("OpenSession failed, retrying [%s]\n", err)
} else {
logger.Debug("OpenSession succeeded")
break
}
}
return session, err
return handle.ctx.OpenSession(handle.slot, mPkcs11.CKF_SERIAL_SESSION|mPkcs11.CKF_RW_SESSION)
}

// Login logs a user into a token
Expand All @@ -119,13 +108,20 @@ func (handle *ContextHandle) ReturnSession(session mPkcs11.SessionHandle) {
handle.lock.RLock()
defer handle.lock.RUnlock()

_, e := handle.ctx.GetSessionInfo(session)
e := isEmpty(session)
if e != nil {
logger.Warnf("not returning session [%d], due to error [%s]. Discarding it", session, e)
return
}

_, e = handle.ctx.GetSessionInfo(session)
if e != nil {
logger.Warnf("not returning session [%d], due to error [%s]. Discarding it", session, e)
e = handle.ctx.CloseSession(session)
if e != nil {
logger.Warn("unable to close session:", e)
}
cachebridge.ClearSession(fmt.Sprintf("%d", session))
return
}

Expand All @@ -137,6 +133,7 @@ func (handle *ContextHandle) ReturnSession(session mPkcs11.SessionHandle) {
default:
// have plenty of sessions in cache, dropping
e = handle.ctx.CloseSession(session)
cachebridge.ClearSession(fmt.Sprintf("%d", session))
if e != nil {
logger.Warn("unable to close session: ", e)
}
Expand All @@ -151,20 +148,35 @@ func (handle *ContextHandle) GetSession() (session mPkcs11.SessionHandle) {
select {
case session = <-handle.sessions:
logger.Debugf("Reusing existing pkcs11 session %+v on slot %d\n", session, handle.slot)

handle.lock.RUnlock()
default:
handle.lock.RUnlock()
logger.Debug("opening a new session since cache is empty/full")
// cache is empty (or completely in use), create a new session
s, err := handle.OpenSession()
if err != nil {
handle.lock.RUnlock()
panic(fmt.Errorf("OpenSession failed [%s]", err))
logger.Debugf("opening a new session failed [%v], will retry %d times", err, handle.opts.openSessionRetry)
handle.lock.Lock()
defer handle.lock.Unlock()
for i := 0; i < handle.opts.openSessionRetry; i++ {
logger.Debugf("Trying re-login and open session attempt[%v]", i+1)
s, err = handle.reLogin()
if err != nil {
logger.Debugf("Failed to re-login, attempt[%d], error[%s], trying again now", i+1, err)
continue
} else {
logger.Debugf("Successfully able to re-login and open session[%d], attempt[%d], clearing cache now for new session", s, i+1)
cachebridge.ClearSession(fmt.Sprintf("%d", s))
return s
}
}
logger.Debugf("Exhausted all attempts to recover session, failed with error [%s], returning 0 session", err)
return s
}
logger.Debugf("Created new pkcs11 session %+v on slot %d", s, handle.slot)
session = s
cachebridge.ClearSession(fmt.Sprintf("%d", session))
}
handle.lock.RUnlock()
return handle.validateSession(session)
}

Expand Down Expand Up @@ -192,6 +204,11 @@ func (handle *ContextHandle) GenerateKeyPair(session mPkcs11.SessionHandle, m []
handle.lock.RLock()
defer handle.lock.RUnlock()

err := isEmpty(session)
if err != nil {
return 0, 0, errors.Wrap(err, "failed to generate key pair")
}

return handle.ctx.GenerateKeyPair(session, m, public, private)
}

Expand Down Expand Up @@ -336,6 +353,11 @@ func (handle *ContextHandle) FindKeyPairFromSKI(session mPkcs11.SessionHandle, s
handle.lock.RLock()
defer handle.lock.RUnlock()

err := isEmpty(session)
if err != nil {
return nil, errors.Wrap(err, "Failed to find key pair from SKI")
}

return cachebridge.GetKeyPairFromSessionSKI(&cachebridge.KeyPairCacheKey{Mod: handle.ctx, Session: session, SKI: ski, KeyType: keyType})
}

Expand Down Expand Up @@ -364,45 +386,10 @@ func (handle *ContextHandle) validateSession(currentSession mPkcs11.SessionHandl
handle.lock.Lock()
defer handle.lock.Unlock()

handle.disposePKCS11Ctx()

//create new context
newCtx := handle.createNewPKCS11Ctx()
if newCtx == nil {
logger.Warn("Failed to recreate new pkcs11 context for given library")
return 0
}

//find slot
slot, found := handle.findSlot(newCtx)
if !found {
logger.Warnf("Unable to find slot for label :%s", handle.label)
return 0
}
logger.Debug("got the slot ", slot)

//open new session for given slot
newSession, err := createNewSession(newCtx, slot)
newSession, err := handle.reLogin()
if err != nil {
logger.Fatalf("OpenSession [%s]\n", err)
return 0
}
logger.Debugf("Recreated new pkcs11 session %+v on slot %d\n", newSession, slot)

//login with new session
err = newCtx.Login(newSession, mPkcs11.CKU_USER, handle.pin)
if err != nil && err != mPkcs11.Error(mPkcs11.CKR_USER_ALREADY_LOGGED_IN) {
logger.Warnf("Unable to login with new session :%s", newSession)
return 0
logger.Warnf("Re-login Failed : %s,", err)
}

handle.sendNotification()

handle.ctx = newCtx
handle.slot = slot
handle.sessions = make(chan mPkcs11.SessionHandle, handle.opts.sessionCacheSize)

logger.Infof("Able to login with recreated session successfully")
return newSession

case mPkcs11.Error(mPkcs11.CKR_DEVICE_MEMORY),
Expand All @@ -417,6 +404,52 @@ func (handle *ContextHandle) validateSession(currentSession mPkcs11.SessionHandl
}
}

// reLogin destroys pkcs11 context and tries to re-login and returns new session
// Note: this function isn't thread safe, recommended to use write lock for calling this function
func (handle *ContextHandle) reLogin() (mPkcs11.SessionHandle, error) {

// dispose existing pkcs11 ctx
handle.disposePKCS11Ctx()

// create new context
newCtx := handle.createNewPKCS11Ctx()
if newCtx == nil {
logger.Warn("Failed to recreate new pkcs11 context for given library")
return 0, errors.New("failed to recreate new pkcs11 context for given library")
}
handle.ctx = newCtx

// find slot
slot, found := handle.findSlot(handle.ctx)
if !found {
logger.Warnf("Unable to find slot for label :%s", handle.label)
return 0, errors.Errorf("unable to find slot for label :%s", handle.label)
}
logger.Debugf("Able to find slot : %d ", slot)

// open new session for given slot
newSession, err := createNewSession(handle.ctx, slot)
if err != nil {
logger.Errorf("Failed to open session with given slot [%s]\n", err)
return 0, errors.Errorf("failed to open session with given slot :%s", err)
}
logger.Debugf("Recreated new pkcs11 session %+v on slot %d\n", newSession, slot)

// login with new session
err = handle.ctx.Login(newSession, mPkcs11.CKU_USER, handle.pin)
if err != nil && err != mPkcs11.Error(mPkcs11.CKR_USER_ALREADY_LOGGED_IN) {
logger.Warnf("Unable to login with new session :%d", newSession)
return 0, errors.Errorf("unable to login with new session :%d", newSession)
}

handle.sendNotification()
handle.slot = slot
handle.sessions = make(chan mPkcs11.SessionHandle, handle.opts.sessionCacheSize)

logger.Infof("Able to login with recreated session successfully")
return newSession, nil
}

//detectErrorCondition checks if given session handle has errors
func (handle *ContextHandle) detectErrorCondition(currentSession mPkcs11.SessionHandle) error {
var e error
Expand Down Expand Up @@ -455,7 +488,7 @@ func (handle *ContextHandle) disposePKCS11Ctx() {
//ignore error on close all sessions
err := handle.ctx.CloseAllSessions(handle.slot)
if err != nil {
logger.Warnf("Unable to close session", err)
logger.Warn("Unable to close session", err)
}

//clear cache
Expand Down Expand Up @@ -645,3 +678,12 @@ func loadLibInitializer() lazycache.EntryInitializer {
return &ContextHandle{ctx: ctx, slot: slot, pin: ctxKey.pin, lib: ctxKey.lib, label: ctxKey.label, sessions: sessions, opts: ctxKey.opts}, nil
}
}

// isEmpty validates if session is valid (not default zero handle)
func isEmpty(session mPkcs11.SessionHandle) error {

if session > 0 {
return nil
}
return errors.New("invalid session detected")
}
111 changes: 111 additions & 0 deletions pkg/core/cryptosuite/common/pkcs11/contextHandle_test.go
Expand Up @@ -492,6 +492,117 @@ func TestContextHandlerConcurrency(t *testing.T) {
assert.Equal(t, concurrency, testsReturned)
}

func TestSessionHandle(t *testing.T) {
handle, err := LoadPKCS11ContextHandle(lib, label, pin)
assert.NoError(t, err)
assert.NotNil(t, handle)
assert.NotNil(t, handle.ctx)

//make sure session pool is empty
for len(handle.sessions) > 0 {
<-handle.sessions
}

//get session
session := handle.GetSession()
err = isEmpty(session)
assert.NoError(t, err)

//tamper pin, so that get session should fail
pinBackup := handle.pin
slotBackup := handle.slot

handle.pin = "9999"
handle.slot = 8888

//get session should fail
session = handle.GetSession()
err = isEmpty(session)
assert.Error(t, err)

//try again
session = handle.GetSession()
err = isEmpty(session)
assert.Error(t, err)

//recover tampered pin and slot
handle.pin = pinBackup
handle.slot = slotBackup

//try again
session = handle.GetSession()
err = isEmpty(session)
assert.NoError(t, err)
}

func TestGetSessionResilience(t *testing.T) {

handle, err := LoadPKCS11ContextHandle(lib, label, pin)
assert.NoError(t, err)
assert.NotNil(t, handle)
assert.NotNil(t, handle.ctx)

//make sure session pool is empty
for len(handle.sessions) > 0 {
<-handle.sessions
}

//get session
session := handle.GetSession()
err = isEmpty(session)
assert.NoError(t, err)

//tamper pin, so that get session should fail
pinBackup := handle.pin
slotBackup := handle.slot

resetPinAndSlot := func() {
handle.lock.Lock()
defer handle.lock.Unlock()
handle.pin = pinBackup
handle.slot = slotBackup
}

handle.pin = "1111"
handle.slot = 8888

//make sure get session should fail
session = handle.GetSession()
err = isEmpty(session)
assert.Error(t, err)

const retry = 5
interval := 200 * time.Millisecond
done := make(chan bool)

// launch get session with retry
go func() {
for i := 0; i < retry; i++ {
session = handle.GetSession()
if err := isEmpty(session); err == nil {
done <- true
break
}
time.Sleep(interval)
continue
}
}()

time.Sleep(500 * time.Millisecond)

go resetPinAndSlot()

select {
case <-done:
t.Log("session recovered")
handle.pin = pinBackup
handle.slot = slotBackup
case <-time.After(ctxReloadTimeout):
t.Fatal("couldn't recover session")
}

}

func TestMain(m *testing.M) {

possibilities := strings.Split(allLibs, ",")
Expand Down

0 comments on commit 4a6db41

Please sign in to comment.