From 9831f99953ee96a1e66540a14942cdedb2b7b553 Mon Sep 17 00:00:00 2001 From: Anton Miniailo Date: Mon, 5 Jun 2023 13:52:11 +0200 Subject: [PATCH] Add kube credentials lockfile to prevent possibility of excessive login attempts --- api/utils/keypaths/keypaths.go | 10 ++ lib/client/keystore.go | 15 ++- lib/utils/fs_windows.go | 8 ++ tool/tsh/kube.go | 70 ++++++++++++++ tool/tsh/tsh_test.go | 166 +++++++++++++++++++++++++++++++++ 5 files changed, 268 insertions(+), 1 deletion(-) diff --git a/api/utils/keypaths/keypaths.go b/api/utils/keypaths/keypaths.go index 0bca633b098e0..dc8edc63e69e9 100644 --- a/api/utils/keypaths/keypaths.go +++ b/api/utils/keypaths/keypaths.go @@ -52,6 +52,8 @@ const ( kubeDirSuffix = "-kube" // kubeConfigSuffix is the suffix of a kubeconfig file stored under the keys directory. kubeConfigSuffix = "-kubeconfig" + // fileNameKubeCredLock is file name of lockfile used to prevent excessive login attempts. + fileNameKubeCredLock = "kube_credentials.lock" // casDir is the directory name for where clusters certs are stored. casDir = "cas" // fileExtPem is the extension of a file where a public certificate is stored. @@ -76,6 +78,7 @@ const ( // │ ├── foo --> Private Key for user "foo" // │ ├── foo.pub --> Public Key // │ ├── foo.ppk --> PuTTY PPK-formatted keypair for user "foo" +// │ ├── kube_credentials.lock --> Kube credential lockfile, used to prevent excessive relogin attempts // │ ├── foo-x509.pem --> TLS client certificate for Auth Server // │ ├── foo-ssh --> SSH certs for user "foo" // │ │ ├── root-cert.pub --> SSH cert for Teleport cluster "root" @@ -311,6 +314,13 @@ func KubeConfigPath(baseDir, proxy, username, cluster, kubename string) string { return filepath.Join(KubeCertDir(baseDir, proxy, username, cluster), kubename+kubeConfigSuffix) } +// KubeCredLockfilePath returns the kube credentials lock file for given proxy +// +// /keys//kube_credentials.lock +func KubeCredLockfilePath(baseDir, proxy string) string { + return filepath.Join(ProxyKeyDir(baseDir, proxy), fileNameKubeCredLock) +} + // IsProfileKubeConfigPath makes a best effort attempt to check if the given // path is a profile specific kubeconfig path generated by this package. func IsProfileKubeConfigPath(path string) (bool, error) { diff --git a/lib/client/keystore.go b/lib/client/keystore.go index 739516a2b2601..f4c58e14cc952 100644 --- a/lib/client/keystore.go +++ b/lib/client/keystore.go @@ -17,6 +17,8 @@ limitations under the License. package client import ( + "errors" + iofs "io/fs" "os" "path/filepath" "runtime" @@ -122,6 +124,11 @@ func (fs *FSKeyStore) ppkFilePath(idx KeyIndex) string { return keypaths.PPKFilePath(fs.KeyDir, idx.ProxyHost, idx.Username) } +// kubeCredLockfilePath returns kube credentials lockfile path for the given KeyIndex. +func (fs *FSKeyStore) kubeCredLockfilePath(idx KeyIndex) string { + return keypaths.KubeCredLockfilePath(fs.KeyDir, idx.ProxyHost) +} + // publicKeyPath returns the public key path for the given KeyIndex. func (fs *FSKeyStore) publicKeyPath(idx KeyIndex) string { return keypaths.PublicKeyPath(fs.KeyDir, idx.ProxyHost, idx.Username) @@ -236,7 +243,13 @@ func (fs *FSKeyStore) DeleteKey(idx KeyIndex) error { // but it may not exist when upgrading from v9 -> v10 and logging into an existing cluster. // as such, deletion should be best-effort and not generate an error if it fails. if runtime.GOOS == constants.WindowsOS { - os.Remove(fs.ppkFilePath(idx)) + _ = os.Remove(fs.ppkFilePath(idx)) + } + + // And try to delete kube credentials lockfile in case it exists + err := os.Remove(fs.kubeCredLockfilePath(idx)) + if err != nil && !errors.Is(err, iofs.ErrNotExist) { + log.Debugf("Could not remove kube credentials file: %v", err) } // Clear ClusterName to delete the user certs stored for all clusters. diff --git a/lib/utils/fs_windows.go b/lib/utils/fs_windows.go index 8961a13eec088..f758b47f6bbde 100644 --- a/lib/utils/fs_windows.go +++ b/lib/utils/fs_windows.go @@ -26,8 +26,16 @@ limitations under the License. // locking. Repeatedly removing them on unlock when acquiring dozens of locks in a short timespan // was causing flock.Flock.TryRLock to return either "access denied" or "The process cannot access // the file because it is being used by another process". + +import "strings" + const lockPostfix = ".lock.tmp" func getPlatformLockFilePath(path string) string { + // If target file is itself dedicated lockfile, we don't create another lockfile, since + // we don't intend to read/write the target file itself. + if strings.HasSuffix(path, ".lock") { + return path + } return path + lockPostfix } diff --git a/tool/tsh/kube.go b/tool/tsh/kube.go index 2dc584ebc42ff..8a0b4244af548 100644 --- a/tool/tsh/kube.go +++ b/tool/tsh/kube.go @@ -18,8 +18,10 @@ package main import ( "context" + "errors" "fmt" "io" + "net" "net/url" "os" "sort" @@ -594,6 +596,51 @@ func newKubeCredentialsCommand(parent *kingpin.CmdClause) *kubeCredentialsComman return c } +func getKubeCredLockfilePath(homePath, proxy string) (string, error) { + profilePath := profile.FullProfilePath(homePath) + // tsh stores the profiles using the proxy host as the profile name. + profileName, err := utils.Host(proxy) + if err != nil { + return "", trace.Wrap(err) + } + + return keypaths.KubeCredLockfilePath(profilePath, profileName), nil +} + +// errKubeCredLockfileFound is returned when kube credentials lockfile is found and user should resolve login problems manually. +var errKubeCredLockfileFound = trace.AlreadyExists("Having problems with relogin, please use 'tsh login/tsh kube login' manually") + +func takeKubeCredLock(ctx context.Context, homePath, proxy string) (func(bool), error) { + kubeCredLockfilePath, err := getKubeCredLockfilePath(homePath, proxy) + if err != nil { + return nil, trace.Wrap(err) + } + + // If kube credentials lockfile already exists, it means last time kube credentials was called + // we had an error while trying to issue certificate, return an error asking user to login manually. + if _, err := os.Stat(kubeCredLockfilePath); err == nil { + log.Debugf("Kube credentials lockfile was found at %q, aborting.", kubeCredLockfilePath) + return nil, trace.Wrap(errKubeCredLockfileFound) + } + + if _, err := utils.EnsureLocalPath(kubeCredLockfilePath, "", ""); err != nil { + return nil, trace.Wrap(err) + } + // Take a lock while we're trying to issue certificate and possibly relogin + unlock, err := utils.FSTryWriteLockTimeout(ctx, kubeCredLockfilePath, 5*time.Second) + if err != nil { + log.Debugf("could not take kube credentials lock: %v", err.Error()) + return nil, trace.Wrap(errKubeCredLockfileFound) + } + + return func(removeFile bool) { + if removeFile { + os.Remove(kubeCredLockfilePath) + } + unlock() + }, nil +} + func (c *kubeCredentialsCommand) run(cf *CLIConf) error { profile, err := cf.GetProfile() if err != nil { @@ -660,6 +707,7 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error { } if crt != nil && time.Until(crt.NotAfter) > time.Minute { log.Debugf("Re-using existing TLS cert for Kubernetes cluster %q", c.kubeCluster) + return c.writeKeyResponse(cf.Stdout(), k, c.kubeCluster) } // Otherwise, cert for this k8s cluster is missing or expired. Request @@ -668,6 +716,15 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error { log.Debugf("Requesting TLS cert for Kubernetes cluster %q", c.kubeCluster) + unlockKubeCred, err := takeKubeCredLock(cf.Context, cf.HomePath, cf.Proxy) + if err != nil { + return trace.Wrap(err) + } + deleteKubeCredsLock := false + defer func() { + unlockKubeCred(deleteKubeCredsLock) // by default (in case of an error) we don't delete lockfile. + }() + ctx, span := tc.Tracer.Start(cf.Context, "tsh.kubeCredentials/RetryWithRelogin") err = client.RetryWithRelogin(ctx, tc, func() error { // The requirement may change after a new login so check again just in @@ -685,6 +742,11 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error { }) span.End() if err != nil { + // If we've got network error we remove the lockfile, so we could restore from temporary connection + // problems without requiring user intervention. + if isNetworkError(err) { + deleteKubeCredsLock = true + } return trace.Wrap(err) } // Make sure the cert is allowed to access the cluster. @@ -700,9 +762,17 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error { return trace.Wrap(err) } + // Remove the lockfile so subsequent tsh kube credentials calls don't exit early + deleteKubeCredsLock = true + return c.writeKeyResponse(cf.Stdout(), k, c.kubeCluster) } +func isNetworkError(err error) bool { + var opErr *net.OpError + return errors.As(err, &opErr) || trace.IsConnectionProblem(err) +} + func (c *kubeCredentialsCommand) checkLocalProxyRequirement(connUpgradeRequired bool) error { if connUpgradeRequired { return trace.BadParameter("Cannot connect Kubernetes clients to Teleport Proxy directly. Please use `tsh proxy kube` or `tsh kubectl` instead.") diff --git a/tool/tsh/tsh_test.go b/tool/tsh/tsh_test.go index 6fa872017af5f..eabfab06c43f5 100644 --- a/tool/tsh/tsh_test.go +++ b/tool/tsh/tsh_test.go @@ -53,7 +53,10 @@ import ( "github.com/gravitational/teleport/api/types" "github.com/gravitational/teleport/api/types/events" "github.com/gravitational/teleport/api/types/wrappers" + apiutils "github.com/gravitational/teleport/api/utils" + "github.com/gravitational/teleport/api/utils/keypaths" "github.com/gravitational/teleport/api/utils/keys" + "github.com/gravitational/teleport/integration/kube" "github.com/gravitational/teleport/lib" "github.com/gravitational/teleport/lib/auth" "github.com/gravitational/teleport/lib/auth/mocku2f" @@ -1925,6 +1928,169 @@ func tryCreateTrustedCluster(t *testing.T, authServer *auth.Server, trustedClust require.FailNow(t, "Timeout creating trusted cluster") } +func TestKubeCredentialsLock(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + const kubeClusterName = "kube-cluster" + + t.Run("failed client creation doesn't create lockfile", func(t *testing.T) { + tmpHomePath := t.TempDir() + + firstErr := Run(ctx, []string{ + "kube", + "credentials", + "--proxy", "fake-proxy", + "--teleport-cluster", "teleport", + "--kube-cluster", kubeClusterName, + }, setHomePath(tmpHomePath)) + require.Error(t, firstErr) // Fails because fake proxy doesn't exist + require.NoFileExists(t, keypaths.KubeCredLockfilePath(tmpHomePath, "fake-proxy")) + }) + + t.Run("kube credentials called multiple times, SSO login called only once", func(t *testing.T) { + tmpHomePath := t.TempDir() + connector := mockConnector(t) + alice, err := types.NewUser("alice@example.com") + require.NoError(t, err) + + kubeRole, err := types.NewRole("kube-access", types.RoleSpecV6{ + Allow: types.RoleConditions{ + KubernetesLabels: types.Labels{types.Wildcard: apiutils.Strings{types.Wildcard}}, + KubeGroups: []string{kube.TestImpersonationGroup}, + KubeUsers: []string{alice.GetName()}, + KubernetesResources: []types.KubernetesResource{ + { + Kind: types.KindKubePod, Name: types.Wildcard, Namespace: types.Wildcard, + }, + }, + }, + }) + require.NoError(t, err) + alice.SetRoles([]string{"access", kubeRole.GetName()}) + + require.NoError(t, err) + authProcess, proxyProcess := makeTestServers(t, withBootstrap(connector, alice, kubeRole)) + authServer := authProcess.GetAuthServer() + require.NotNil(t, authServer) + proxyAddr, err := proxyProcess.ProxyWebAddr() + require.NoError(t, err) + + teleportClusterName, err := authServer.GetClusterName() + require.NoError(t, err) + + kubeCluster, err := types.NewKubernetesClusterV3(types.Metadata{ + Name: kubeClusterName, + Labels: map[string]string{}, + }, + types.KubernetesClusterSpecV3{}, + ) + require.NoError(t, err) + kubeServer, err := types.NewKubernetesServerV3FromCluster(kubeCluster, kubeClusterName, kubeClusterName) + require.NoError(t, err) + _, err = authServer.UpsertKubernetesServer(context.Background(), kubeServer) + require.NoError(t, err) + + var ssoCalls atomic.Int32 + mockSSO := mockSSOLogin(t, authServer, alice) + ssoFunc := func(ctx context.Context, connectorID string, priv *keys.PrivateKey, protocol string) (*auth.SSHLoginResponse, error) { + ssoCalls.Add(1) + return mockSSO(ctx, connectorID, priv, protocol) + } + + err = Run(context.Background(), []string{ + "login", + "--insecure", + "--debug", + "--auth", connector.GetName(), + "--proxy", proxyAddr.String(), + }, setHomePath(tmpHomePath), func(cf *CLIConf) error { + cf.mockSSOLogin = ssoFunc + return nil + }) + require.NoError(t, err) + _, err = profile.FromDir(tmpHomePath, "") + require.NoError(t, err) + ssoCalls.Store(0) // Reset number of calls after setup login + + // Overwrite profile data to simulate expired user certificate + expiredSSHCert := ` + ssh-rsa-cert-v01@openssh.com AAAAHHNzaC1yc2EtY2VydC12MDFAb3BlbnNzaC5jb20AAAAgefu/ZQ70TbBMZfGUFHluE7PCu6PiWN0SsA5xrKbkzCkAAAADAQABAAABAQCyGzVvW7vgsK1P2Rtg55DTjL4We0WjSYYdzXJnVbyTxqrEYDOkhSnw4tZTS9KgALb698g0vrqy5bSJXB90d8uLdTmCmPngPbYpSN+p3P2SbIdkB5cRIMspB22qSkfHUARQlYM4PrMYIznWwQRFBvrRNOVdTdbMywlQGMUb0jdxK7JFBx1LC76qfHJhrD7jZS+MtygFIqhAJS9CQXW314p3FmL9s1cPV5lQfY527np8580qMKPkdeowPd/hVGcPA/C+ZxLcN9LqnuTZEFoDvYtwjfofOGUpANwtENBNZbNTxHDk7shYCRN9aZJ50zdFq3rMNdzFlEyJwm2ca+7aRDLlAAAAAAAAAAAAAAABAAAAEWFsaWNlQGV4YW1wbGUuY29tAAAAVQAAADYtdGVsZXBvcnQtbm9sb2dpbi1hZDVhYTViMi00MDBlLTQ2ZmUtOThjOS02ZjRhNDA2YzdlZGMAAAAXLXRlbGVwb3J0LWludGVybmFsLWpvaW4AAAAAZG9PKAAAAABkb0+gAAAAAAAAAQkAAAAXcGVybWl0LWFnZW50LWZvcndhcmRpbmcAAAAAAAAAFnBlcm1pdC1wb3J0LWZvcndhcmRpbmcAAAAAAAAACnBlcm1pdC1wdHkAAAAAAAAAEnByaXZhdGUta2V5LXBvbGljeQAAAAgAAAAEbm9uZQAAAA50ZWxlcG9ydC1yb2xlcwAAADUAAAAxeyJ2ZXJzaW9uIjoidjEiLCJyb2xlcyI6WyJhY2Nlc3MiLCJrdWJlLWFjY2VzcyJdfQAAABl0ZWxlcG9ydC1yb3V0ZS10by1jbHVzdGVyAAAADQAAAAlsb2NhbGhvc3QAAAAPdGVsZXBvcnQtdHJhaXRzAAAACAAAAARudWxsAAAAAAAAARcAAAAHc3NoLXJzYQAAAAMBAAEAAAEBAK/vBVOnf+QLSF0aKsEpQuof1o/5EJJ25C07tljSWvF2wNixHOyHZj8kAwO3f2XmWQd/XBddvZtLETvTbdBum8T37oOLepnDR32TzTV7cR7XVvo0pSqwrg0jWuAxt67b2n2BnWOCULdV9mPM8X9q4wRhqQHFGB3+7dD24x5YmVIBFUFJKYfFYh516giKAcNPFSK6eD381+cNXYx3yDO6i/iyrsuhbYVcTlWSV2Zhc0Gytf83QRmpM6hXW8b8hGCui36ffXSYu/9nWHcK7OeaHZzePT7jHptyqcrYSs52VuzikO74jpw8htU6maUeEcR5TBbeBlB+hmHQfwl8bEUeszMAAAEUAAAADHJzYS1zaGEyLTUxMgAAAQAwb7OpVkJP8pz9M4VIoG0DzXe5W2GN2pH0eWq+n/+YgshzcWPyHsPbckCu9IleHhrp6IK8ZyCt2qLi77o9XJxJUCiJxmsnfJYTs5DtWoCqiIRWKtYvSNpML1PH/badQsS/Stg7VUs48Yftg4eJOo4PYfJqDoHRfGimMwTNQ+aIWAYek3QwydlDdEtJ6X8kkHhNnZb0fzUbUyQLzFDXPu++di+AHNOpMOWDBtNZ1Lm3WWja/t5zIv7j6L67ZTzS5JtV7TlcD+lJ7RcBfWow8OtEW5RyyI8918A2q/zbe3OhGD4D2dZxPUhyGPHLLgy9NJqJoueR8qLPQQyMdDl4JKUq + ` + privKey := ` +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEAshs1b1u74LCtT9kbYOeQ04y+FntFo0mGHc1yZ1W8k8aqxGAz +pIUp8OLWU0vSoAC2+vfINL66suW0iVwfdHfLi3U5gpj54D22KUjfqdz9kmyHZAeX +ESDLKQdtqkpHx1AEUJWDOD6zGCM51sEERQb60TTlXU3WzMsJUBjFG9I3cSuyRQcd +Swu+qnxyYaw+42UvjLcoBSKoQCUvQkF1t9eKdxZi/bNXD1eZUH2Odu56fOfNKjCj +5HXqMD3f4VRnDwPwvmcS3DfS6p7k2RBaA72LcI36HzhlKQDcLRDQTWWzU8Rw5O7I +WAkTfWmSedM3Rat6zDXcxZRMicJtnGvu2kQy5QIDAQABAoIBAFBPIn4PAB1lrRBX +FhhQ8iXhzYi3lwP00Cu6Cr77kueTakbYFhE2Fl5O+lNe2h9ZkyiA996IrgiiuRBC +4NAUgExm1ELGFc3+JZhiCrA+PHx8wWPiZETN462hctqZWdpOg1OOxzdiVkEpCRiD +uhgh+JDC6DV1NsjrOEzMjnxoAqXdS8R8HRSr7ATV/28rXCpzBevtsQFWsHFQ069H +uL9JY4AnBdvnu749ClFYuhux/C0zSAsZIu47WUmmyZdIXY/B32vkhbDfKPpCp7Il +5sE9reNGf22jkqjC4wLpCT0L8wuUnJil0Sj1JUQjtZc4vn7Fc1RWZWRZKC4IlI/+ +OUSdXhUCgYEA6krEs7B1W3mO8sCXEUpFZv8KfLaa4BeDncGS++qGrz4qSI8JiuUI +M4uLBec+9FHAAMVd5F/JxhcA5J1BA2e9mITEf582ur/lTU2cSYBIY64IPBMR0ask +Q9UdAdu0r/xQ91cdwKiaSrC3bPgCX/Xe6MzaEWMrmdnse3Kl+E49n0cCgYEAwpvB +gtCk/L6lOsQDgLH3zO27qlYUGSPqhy8iIF+4HMMIIvdIOrSMHspEHhbnypQe9cYO +GRcimlb4U0FbHsOmpkfcNHhvBTegmYEYjuWJR0AN8cgaV2b6XytqYB7Gv2kXhjnF +9dvamhy9+4SngywbqZshUlazVW/RfO4+OqXsKnMCgYBwJJOcUqUJwNhsV0S30O4B +S6gwY5MkGf00oHgDPpFzBfVlP5nYsqHHUk6b58DZXtvhQpcbfcHtoAscYizBPYGh +pEMNtx6SKtHNu41IHTAJDj8AyjvoONul4Db/MbN93O7ARSGHmuwnPgi+DsPMPLqS +gaMLWYWAIbAwsoLApGqYdwKBgAZALHoAK5x2nyYBD7+9d6Ecba+t7h1Umv7Wk7kI +eghqd0NwP+Cq1elTQ9bXk4BdO5VXVDKYHKNqcbVy3vNhA2RJ4JfK2n4HaGAl1l0Y +oE0qkIgYjkgKZbZS1arasjWJsZi9GE+qTR4wGCYQ/7Rl4UmUUwCrCj2PRuJFYLhP +hgNjAoGBAKwqiQpwNzbKOq3+pxau6Y32BqUaTV5ut9FEUz0/qzuNoc2S5bCf4wq+ +cc/nvPBnXrP+rsubJXjFDfcIjcZ7x41bRMENvP50xD/J94IpK88TGTVa04VHKExx +iUK/veLmZ6XoouiWLCdU1VJz/1Fcwe/IEamg6ETfofvsqOCgcNYJ +-----END RSA PRIVATE KEY----- +` + pubKey := `ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCyGzVvW7vgsK1P2Rtg55DTjL4We0WjSYYdzXJnVbyTxqrEYDOkhSnw4tZTS9KgALb698g0vrqy5bSJXB90d8uLdTmCmPngPbYpSN+p3P2SbIdkB5cRIMspB22qSkfHUARQlYM4PrMYIznWwQRFBvrRNOVdTdbMywlQGMUb0jdxK7JFBx1LC76qfHJhrD7jZS+MtygFIqhAJS9CQXW314p3FmL9s1cPV5lQfY527np8580qMKPkdeowPd/hVGcPA/C+ZxLcN9LqnuTZEFoDvYtwjfofOGUpANwtENBNZbNTxHDk7shYCRN9aZJ50zdFq3rMNdzFlEyJwm2ca+7aRDLl +` + err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com"), []byte(privKey), 0666) + require.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com.pub"), []byte(pubKey), 0666) + require.NoError(t, err) + err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com-ssh/localhost-cert.pub"), []byte(expiredSSHCert), 0666) + require.NoError(t, err) + + errChan := make(chan error) + runCreds := func() { + credErr := Run(context.Background(), []string{ + "kube", + "credentials", + "--insecure", + "--proxy", proxyAddr.String(), + "--auth", connector.GetName(), + "--teleport-cluster", teleportClusterName.GetClusterName(), + "--kube-cluster", kubeClusterName, + }, setHomePath(tmpHomePath), func(cf *CLIConf) error { + cf.mockSSOLogin = ssoFunc + return nil + }) + errChan <- credErr + } + + // Run kube credentials calls in parallel, only one should actually call SSO login + runsCount := 3 + for i := 0; i < runsCount; i++ { + go runCreds() + } + for i := 0; i < runsCount; i++ { + select { + case err := <-errChan: + if err != nil { + require.ErrorIs(t, err, errKubeCredLockfileFound) + } + + case <-time.After(time.Second * 5): + require.Fail(t, "Running kube credentials timed out") + } + } + require.Equal(t, 1, int(ssoCalls.Load()), "SSO login should have been called exactly once") + }) +} + func TestSSHHeadlessCLIFlags(t *testing.T) { t.Parallel()