Skip to content

Commit

Permalink
Add kube credentials lockfile to prevent possibility of excessive log…
Browse files Browse the repository at this point in the history
…in attempts
  • Loading branch information
AntonAM committed Jun 5, 2023
1 parent e21e62c commit d03d33e
Show file tree
Hide file tree
Showing 5 changed files with 268 additions and 1 deletion.
10 changes: 10 additions & 0 deletions api/utils/keypaths/keypaths.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ const (
kubeDirSuffix = "-kube"
// kubeConfigSuffix is the suffix of a kubeconfig file stored under the keys directory.
kubeConfigSuffix = "-kubeconfig"
// fileNameKubeCredLock is file name of lockfile used to prevent excessive login attempts.
fileNameKubeCredLock = "kube_credentials.lock"
// casDir is the directory name for where clusters certs are stored.
casDir = "cas"
// fileExtPem is the extension of a file where a public certificate is stored.
Expand All @@ -76,6 +78,7 @@ const (
// │ ├── foo --> Private Key for user "foo"
// │ ├── foo.pub --> Public Key
// │ ├── foo.ppk --> PuTTY PPK-formatted keypair for user "foo"
// │ ├── kube_credentials.lock --> Kube credential lockfile, used to prevent excessive relogin attempts
// │ ├── foo-x509.pem --> TLS client certificate for Auth Server
// │ ├── foo-ssh --> SSH certs for user "foo"
// │ │ ├── root-cert.pub --> SSH cert for Teleport cluster "root"
Expand Down Expand Up @@ -311,6 +314,13 @@ func KubeConfigPath(baseDir, proxy, username, cluster, kubename string) string {
return filepath.Join(KubeCertDir(baseDir, proxy, username, cluster), kubename+kubeConfigSuffix)
}

// KubeCredLockfilePath returns the kube credentials lock file for given proxy
//
// <baseDir>/keys/<proxy>/kube_credentials.lock
func KubeCredLockfilePath(baseDir, proxy string) string {
return filepath.Join(ProxyKeyDir(baseDir, proxy), fileNameKubeCredLock)
}

// IsProfileKubeConfigPath makes a best effort attempt to check if the given
// path is a profile specific kubeconfig path generated by this package.
func IsProfileKubeConfigPath(path string) (bool, error) {
Expand Down
15 changes: 14 additions & 1 deletion lib/client/keystore.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package client

import (
"errors"
iofs "io/fs"
"os"
"path/filepath"
"runtime"
Expand Down Expand Up @@ -122,6 +124,11 @@ func (fs *FSKeyStore) ppkFilePath(idx KeyIndex) string {
return keypaths.PPKFilePath(fs.KeyDir, idx.ProxyHost, idx.Username)
}

// kubeCredLockfilePath returns kube credentials lockfile path for the given KeyIndex.
func (fs *FSKeyStore) kubeCredLockfilePath(idx KeyIndex) string {
return keypaths.KubeCredLockfilePath(fs.KeyDir, idx.ProxyHost)
}

// publicKeyPath returns the public key path for the given KeyIndex.
func (fs *FSKeyStore) publicKeyPath(idx KeyIndex) string {
return keypaths.PublicKeyPath(fs.KeyDir, idx.ProxyHost, idx.Username)
Expand Down Expand Up @@ -236,7 +243,13 @@ func (fs *FSKeyStore) DeleteKey(idx KeyIndex) error {
// but it may not exist when upgrading from v9 -> v10 and logging into an existing cluster.
// as such, deletion should be best-effort and not generate an error if it fails.
if runtime.GOOS == constants.WindowsOS {
os.Remove(fs.ppkFilePath(idx))
_ = os.Remove(fs.ppkFilePath(idx))
}

// And try to delete kube credentials lockfile in case it exists
err := os.Remove(fs.kubeCredLockfilePath(idx))
if err != nil && !errors.Is(err, iofs.ErrNotExist) {
log.Debugf("Could not remove kube credentials file: %v", err)
}

// Clear ClusterName to delete the user certs stored for all clusters.
Expand Down
8 changes: 8 additions & 0 deletions lib/utils/fs_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,16 @@ limitations under the License.
// locking. Repeatedly removing them on unlock when acquiring dozens of locks in a short timespan
// was causing flock.Flock.TryRLock to return either "access denied" or "The process cannot access
// the file because it is being used by another process".

import "strings"

const lockPostfix = ".lock.tmp"

func getPlatformLockFilePath(path string) string {
// If target file is itself dedicated lockfile, we don't create another lockfile, since
// we don't intend to read/write the target file itself.
if strings.HasSuffix(path, ".lock") {
return path
}
return path + lockPostfix
}
70 changes: 70 additions & 0 deletions tool/tsh/kube.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package main

import (
"context"
"errors"
"fmt"
"io"
"net"
"net/url"
"os"
"sort"
Expand Down Expand Up @@ -594,6 +596,51 @@ func newKubeCredentialsCommand(parent *kingpin.CmdClause) *kubeCredentialsComman
return c
}

func getKubeCredLockfilePath(homePath, proxy string) (string, error) {
profilePath := profile.FullProfilePath(homePath)
// tsh stores the profiles using the proxy host as the profile name.
profileName, err := utils.Host(proxy)
if err != nil {
return "", trace.Wrap(err)
}

return keypaths.KubeCredLockfilePath(profilePath, profileName), nil
}

// errKubeCredLockfileFound is returned when kube credentials lockfile is found and user should resolve login problems manually.
var errKubeCredLockfileFound = trace.AlreadyExists("Having problems with relogin, please use 'tsh login/tsh kube login' manually")

func takeKubeCredLock(ctx context.Context, homePath, proxy string) (func(bool), error) {
kubeCredLockfilePath, err := getKubeCredLockfilePath(homePath, proxy)
if err != nil {
return nil, trace.Wrap(err)
}

// If kube credentials lockfile already exists, it means last time kube credentials was called
// we had an error while trying to issue certificate, return an error asking user to login manually.
if _, err := os.Stat(kubeCredLockfilePath); err == nil {
log.Debugf("Kube credentials lockfile was found at %q, aborting.", kubeCredLockfilePath)
return nil, trace.Wrap(errKubeCredLockfileFound)
}

if _, err := utils.EnsureLocalPath(kubeCredLockfilePath, "", ""); err != nil {
return nil, trace.Wrap(err)
}
// Take a lock while we're trying to issue certificate and possibly relogin
unlock, err := utils.FSTryWriteLockTimeout(ctx, kubeCredLockfilePath, 5*time.Second)
if err != nil {
log.Debugf("could not take kube credentials lock: %v", err.Error())
return nil, trace.Wrap(errKubeCredLockfileFound)
}

return func(removeFile bool) {
if removeFile {
os.Remove(kubeCredLockfilePath)
}
unlock()
}, nil
}

func (c *kubeCredentialsCommand) run(cf *CLIConf) error {
profile, err := cf.GetProfile()
if err != nil {
Expand Down Expand Up @@ -660,6 +707,7 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {
}
if crt != nil && time.Until(crt.NotAfter) > time.Minute {
log.Debugf("Re-using existing TLS cert for Kubernetes cluster %q", c.kubeCluster)

return c.writeKeyResponse(cf.Stdout(), k, c.kubeCluster)
}
// Otherwise, cert for this k8s cluster is missing or expired. Request
Expand All @@ -668,6 +716,15 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {

log.Debugf("Requesting TLS cert for Kubernetes cluster %q", c.kubeCluster)

unlockKubeCred, err := takeKubeCredLock(cf.Context, cf.HomePath, cf.Proxy)
if err != nil {
return trace.Wrap(err)
}
deleteKubeCredsLock := false
defer func() {
unlockKubeCred(deleteKubeCredsLock) // by default (in case of an error) we don't delete lockfile.
}()

ctx, span := tc.Tracer.Start(cf.Context, "tsh.kubeCredentials/RetryWithRelogin")
err = client.RetryWithRelogin(ctx, tc, func() error {
// The requirement may change after a new login so check again just in
Expand All @@ -685,6 +742,11 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {
})
span.End()
if err != nil {
// If we've got network error we remove the lockfile, so we could restore from temporary connection
// problems without requiring user intervention.
if isNetworkError(err) {
deleteKubeCredsLock = true
}
return trace.Wrap(err)
}
// Make sure the cert is allowed to access the cluster.
Expand All @@ -700,9 +762,17 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {
return trace.Wrap(err)
}

// Remove the lockfile so subsequent tsh kube credentials calls don't exit early
deleteKubeCredsLock = true

return c.writeKeyResponse(cf.Stdout(), k, c.kubeCluster)
}

func isNetworkError(err error) bool {
var opErr *net.OpError
return errors.As(err, &opErr) || trace.IsConnectionProblem(err)
}

func (c *kubeCredentialsCommand) checkLocalProxyRequirement(connUpgradeRequired bool) error {
if connUpgradeRequired {
return trace.BadParameter("Cannot connect Kubernetes clients to Teleport Proxy directly. Please use `tsh proxy kube` or `tsh kubectl` instead.")
Expand Down
166 changes: 166 additions & 0 deletions tool/tsh/tsh_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ import (
"github.com/gravitational/teleport/api/types"
"github.com/gravitational/teleport/api/types/events"
"github.com/gravitational/teleport/api/types/wrappers"
apiutils "github.com/gravitational/teleport/api/utils"
"github.com/gravitational/teleport/api/utils/keypaths"
"github.com/gravitational/teleport/api/utils/keys"
"github.com/gravitational/teleport/integration/kube"
"github.com/gravitational/teleport/lib"
"github.com/gravitational/teleport/lib/auth"
"github.com/gravitational/teleport/lib/auth/mocku2f"
Expand Down Expand Up @@ -1927,6 +1930,169 @@ func tryCreateTrustedCluster(t *testing.T, authServer *auth.Server, trustedClust
require.FailNow(t, "Timeout creating trusted cluster")
}

func TestKubeCredentialsLock(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
const kubeClusterName = "kube-cluster"

t.Run("failed client creation doesn't create lockfile", func(t *testing.T) {
tmpHomePath := t.TempDir()

firstErr := Run(ctx, []string{
"kube",
"credentials",
"--proxy", "fake-proxy",
"--teleport-cluster", "teleport",
"--kube-cluster", kubeClusterName,
}, setHomePath(tmpHomePath))
require.Error(t, firstErr) // Fails because fake proxy doesn't exist
require.NoFileExists(t, keypaths.KubeCredLockfilePath(tmpHomePath, "fake-proxy"))
})

t.Run("kube credentials called multiple times, SSO login called only once", func(t *testing.T) {
tmpHomePath := t.TempDir()
connector := mockConnector(t)
alice, err := types.NewUser("alice@example.com")
require.NoError(t, err)

kubeRole, err := types.NewRole("kube-access", types.RoleSpecV6{
Allow: types.RoleConditions{
KubernetesLabels: types.Labels{types.Wildcard: apiutils.Strings{types.Wildcard}},
KubeGroups: []string{kube.TestImpersonationGroup},
KubeUsers: []string{alice.GetName()},
KubernetesResources: []types.KubernetesResource{
{
Kind: types.KindKubePod, Name: types.Wildcard, Namespace: types.Wildcard,
},
},
},
})
require.NoError(t, err)
alice.SetRoles([]string{"access", kubeRole.GetName()})

require.NoError(t, err)
authProcess, proxyProcess := makeTestServers(t, withBootstrap(connector, alice, kubeRole))
authServer := authProcess.GetAuthServer()
require.NotNil(t, authServer)
proxyAddr, err := proxyProcess.ProxyWebAddr()
require.NoError(t, err)

teleportClusterName, err := authServer.GetClusterName()
require.NoError(t, err)

kubeCluster, err := types.NewKubernetesClusterV3(types.Metadata{
Name: kubeClusterName,
Labels: map[string]string{},
},
types.KubernetesClusterSpecV3{},
)
require.NoError(t, err)
kubeServer, err := types.NewKubernetesServerV3FromCluster(kubeCluster, kubeClusterName, kubeClusterName)
require.NoError(t, err)
_, err = authServer.UpsertKubernetesServer(context.Background(), kubeServer)
require.NoError(t, err)

var ssoCalls atomic.Int32
mockSSO := mockSSOLogin(t, authServer, alice)
ssoFunc := func(ctx context.Context, connectorID string, priv *keys.PrivateKey, protocol string) (*auth.SSHLoginResponse, error) {
ssoCalls.Add(1)
return mockSSO(ctx, connectorID, priv, protocol)
}

err = Run(context.Background(), []string{
"login",
"--insecure",
"--debug",
"--auth", connector.GetName(),
"--proxy", proxyAddr.String(),
}, setHomePath(tmpHomePath), func(cf *CLIConf) error {
cf.mockSSOLogin = ssoFunc
return nil
})
require.NoError(t, err)
_, err = profile.FromDir(tmpHomePath, "")
require.NoError(t, err)
ssoCalls.Store(0) // Reset number of calls after setup login

// Overwrite profile data to simulate expired user certificate
expiredSSHCert := `
ssh-rsa-cert-v01@openssh.com AAAAHHNzaC1yc2EtY2VydC12MDFAb3BlbnNzaC5jb20AAAAgefu/ZQ70TbBMZfGUFHluE7PCu6PiWN0SsA5xrKbkzCkAAAADAQABAAABAQCyGzVvW7vgsK1P2Rtg55DTjL4We0WjSYYdzXJnVbyTxqrEYDOkhSnw4tZTS9KgALb698g0vrqy5bSJXB90d8uLdTmCmPngPbYpSN+p3P2SbIdkB5cRIMspB22qSkfHUARQlYM4PrMYIznWwQRFBvrRNOVdTdbMywlQGMUb0jdxK7JFBx1LC76qfHJhrD7jZS+MtygFIqhAJS9CQXW314p3FmL9s1cPV5lQfY527np8580qMKPkdeowPd/hVGcPA/C+ZxLcN9LqnuTZEFoDvYtwjfofOGUpANwtENBNZbNTxHDk7shYCRN9aZJ50zdFq3rMNdzFlEyJwm2ca+7aRDLlAAAAAAAAAAAAAAABAAAAEWFsaWNlQGV4YW1wbGUuY29tAAAAVQAAADYtdGVsZXBvcnQtbm9sb2dpbi1hZDVhYTViMi00MDBlLTQ2ZmUtOThjOS02ZjRhNDA2YzdlZGMAAAAXLXRlbGVwb3J0LWludGVybmFsLWpvaW4AAAAAZG9PKAAAAABkb0+gAAAAAAAAAQkAAAAXcGVybWl0LWFnZW50LWZvcndhcmRpbmcAAAAAAAAAFnBlcm1pdC1wb3J0LWZvcndhcmRpbmcAAAAAAAAACnBlcm1pdC1wdHkAAAAAAAAAEnByaXZhdGUta2V5LXBvbGljeQAAAAgAAAAEbm9uZQAAAA50ZWxlcG9ydC1yb2xlcwAAADUAAAAxeyJ2ZXJzaW9uIjoidjEiLCJyb2xlcyI6WyJhY2Nlc3MiLCJrdWJlLWFjY2VzcyJdfQAAABl0ZWxlcG9ydC1yb3V0ZS10by1jbHVzdGVyAAAADQAAAAlsb2NhbGhvc3QAAAAPdGVsZXBvcnQtdHJhaXRzAAAACAAAAARudWxsAAAAAAAAARcAAAAHc3NoLXJzYQAAAAMBAAEAAAEBAK/vBVOnf+QLSF0aKsEpQuof1o/5EJJ25C07tljSWvF2wNixHOyHZj8kAwO3f2XmWQd/XBddvZtLETvTbdBum8T37oOLepnDR32TzTV7cR7XVvo0pSqwrg0jWuAxt67b2n2BnWOCULdV9mPM8X9q4wRhqQHFGB3+7dD24x5YmVIBFUFJKYfFYh516giKAcNPFSK6eD381+cNXYx3yDO6i/iyrsuhbYVcTlWSV2Zhc0Gytf83QRmpM6hXW8b8hGCui36ffXSYu/9nWHcK7OeaHZzePT7jHptyqcrYSs52VuzikO74jpw8htU6maUeEcR5TBbeBlB+hmHQfwl8bEUeszMAAAEUAAAADHJzYS1zaGEyLTUxMgAAAQAwb7OpVkJP8pz9M4VIoG0DzXe5W2GN2pH0eWq+n/+YgshzcWPyHsPbckCu9IleHhrp6IK8ZyCt2qLi77o9XJxJUCiJxmsnfJYTs5DtWoCqiIRWKtYvSNpML1PH/badQsS/Stg7VUs48Yftg4eJOo4PYfJqDoHRfGimMwTNQ+aIWAYek3QwydlDdEtJ6X8kkHhNnZb0fzUbUyQLzFDXPu++di+AHNOpMOWDBtNZ1Lm3WWja/t5zIv7j6L67ZTzS5JtV7TlcD+lJ7RcBfWow8OtEW5RyyI8918A2q/zbe3OhGD4D2dZxPUhyGPHLLgy9NJqJoueR8qLPQQyMdDl4JKUq
`
privKey := `
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEAshs1b1u74LCtT9kbYOeQ04y+FntFo0mGHc1yZ1W8k8aqxGAz
pIUp8OLWU0vSoAC2+vfINL66suW0iVwfdHfLi3U5gpj54D22KUjfqdz9kmyHZAeX
ESDLKQdtqkpHx1AEUJWDOD6zGCM51sEERQb60TTlXU3WzMsJUBjFG9I3cSuyRQcd
Swu+qnxyYaw+42UvjLcoBSKoQCUvQkF1t9eKdxZi/bNXD1eZUH2Odu56fOfNKjCj
5HXqMD3f4VRnDwPwvmcS3DfS6p7k2RBaA72LcI36HzhlKQDcLRDQTWWzU8Rw5O7I
WAkTfWmSedM3Rat6zDXcxZRMicJtnGvu2kQy5QIDAQABAoIBAFBPIn4PAB1lrRBX
FhhQ8iXhzYi3lwP00Cu6Cr77kueTakbYFhE2Fl5O+lNe2h9ZkyiA996IrgiiuRBC
4NAUgExm1ELGFc3+JZhiCrA+PHx8wWPiZETN462hctqZWdpOg1OOxzdiVkEpCRiD
uhgh+JDC6DV1NsjrOEzMjnxoAqXdS8R8HRSr7ATV/28rXCpzBevtsQFWsHFQ069H
uL9JY4AnBdvnu749ClFYuhux/C0zSAsZIu47WUmmyZdIXY/B32vkhbDfKPpCp7Il
5sE9reNGf22jkqjC4wLpCT0L8wuUnJil0Sj1JUQjtZc4vn7Fc1RWZWRZKC4IlI/+
OUSdXhUCgYEA6krEs7B1W3mO8sCXEUpFZv8KfLaa4BeDncGS++qGrz4qSI8JiuUI
M4uLBec+9FHAAMVd5F/JxhcA5J1BA2e9mITEf582ur/lTU2cSYBIY64IPBMR0ask
Q9UdAdu0r/xQ91cdwKiaSrC3bPgCX/Xe6MzaEWMrmdnse3Kl+E49n0cCgYEAwpvB
gtCk/L6lOsQDgLH3zO27qlYUGSPqhy8iIF+4HMMIIvdIOrSMHspEHhbnypQe9cYO
GRcimlb4U0FbHsOmpkfcNHhvBTegmYEYjuWJR0AN8cgaV2b6XytqYB7Gv2kXhjnF
9dvamhy9+4SngywbqZshUlazVW/RfO4+OqXsKnMCgYBwJJOcUqUJwNhsV0S30O4B
S6gwY5MkGf00oHgDPpFzBfVlP5nYsqHHUk6b58DZXtvhQpcbfcHtoAscYizBPYGh
pEMNtx6SKtHNu41IHTAJDj8AyjvoONul4Db/MbN93O7ARSGHmuwnPgi+DsPMPLqS
gaMLWYWAIbAwsoLApGqYdwKBgAZALHoAK5x2nyYBD7+9d6Ecba+t7h1Umv7Wk7kI
eghqd0NwP+Cq1elTQ9bXk4BdO5VXVDKYHKNqcbVy3vNhA2RJ4JfK2n4HaGAl1l0Y
oE0qkIgYjkgKZbZS1arasjWJsZi9GE+qTR4wGCYQ/7Rl4UmUUwCrCj2PRuJFYLhP
hgNjAoGBAKwqiQpwNzbKOq3+pxau6Y32BqUaTV5ut9FEUz0/qzuNoc2S5bCf4wq+
cc/nvPBnXrP+rsubJXjFDfcIjcZ7x41bRMENvP50xD/J94IpK88TGTVa04VHKExx
iUK/veLmZ6XoouiWLCdU1VJz/1Fcwe/IEamg6ETfofvsqOCgcNYJ
-----END RSA PRIVATE KEY-----
`
pubKey := `ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCyGzVvW7vgsK1P2Rtg55DTjL4We0WjSYYdzXJnVbyTxqrEYDOkhSnw4tZTS9KgALb698g0vrqy5bSJXB90d8uLdTmCmPngPbYpSN+p3P2SbIdkB5cRIMspB22qSkfHUARQlYM4PrMYIznWwQRFBvrRNOVdTdbMywlQGMUb0jdxK7JFBx1LC76qfHJhrD7jZS+MtygFIqhAJS9CQXW314p3FmL9s1cPV5lQfY527np8580qMKPkdeowPd/hVGcPA/C+ZxLcN9LqnuTZEFoDvYtwjfofOGUpANwtENBNZbNTxHDk7shYCRN9aZJ50zdFq3rMNdzFlEyJwm2ca+7aRDLl
`
err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com"), []byte(privKey), 0666)
require.NoError(t, err)
err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com.pub"), []byte(pubKey), 0666)
require.NoError(t, err)
err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com-ssh/localhost-cert.pub"), []byte(expiredSSHCert), 0666)
require.NoError(t, err)

errChan := make(chan error)
runCreds := func() {
credErr := Run(context.Background(), []string{
"kube",
"credentials",
"--insecure",
"--proxy", proxyAddr.String(),
"--auth", connector.GetName(),
"--teleport-cluster", teleportClusterName.GetClusterName(),
"--kube-cluster", kubeClusterName,
}, setHomePath(tmpHomePath), func(cf *CLIConf) error {
cf.mockSSOLogin = ssoFunc
return nil
})
errChan <- credErr
}

// Run kube credentials calls in parallel, only one should actually call SSO login
runsCount := 3
for i := 0; i < runsCount; i++ {
go runCreds()
}
for i := 0; i < runsCount; i++ {
select {
case err := <-errChan:
if err != nil {
require.ErrorIs(t, err, errKubeCredLockfileFound)
}

case <-time.After(time.Second * 5):
require.Fail(t, "Running kube credentials timed out")
}
}
require.Equal(t, 1, int(ssoCalls.Load()), "SSO login should have been called exactly once")
})
}

func TestSSHHeadlessCLIFlags(t *testing.T) {
t.Parallel()

Expand Down

0 comments on commit d03d33e

Please sign in to comment.