Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v13] Add kube credentials lockfile to prevent possibility of excessive login attempts #27366

Merged
merged 1 commit into from Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions api/utils/keypaths/keypaths.go
Expand Up @@ -52,6 +52,8 @@ const (
kubeDirSuffix = "-kube"
// kubeConfigSuffix is the suffix of a kubeconfig file stored under the keys directory.
kubeConfigSuffix = "-kubeconfig"
// fileNameKubeCredLock is file name of lockfile used to prevent excessive login attempts.
fileNameKubeCredLock = "kube_credentials.lock"
// casDir is the directory name for where clusters certs are stored.
casDir = "cas"
// fileExtPem is the extension of a file where a public certificate is stored.
Expand All @@ -76,6 +78,7 @@ const (
// │ ├── foo --> Private Key for user "foo"
// │ ├── foo.pub --> Public Key
// │ ├── foo.ppk --> PuTTY PPK-formatted keypair for user "foo"
// │ ├── kube_credentials.lock --> Kube credential lockfile, used to prevent excessive relogin attempts
// │ ├── foo-x509.pem --> TLS client certificate for Auth Server
// │ ├── foo-ssh --> SSH certs for user "foo"
// │ │ ├── root-cert.pub --> SSH cert for Teleport cluster "root"
Expand Down Expand Up @@ -311,6 +314,13 @@ func KubeConfigPath(baseDir, proxy, username, cluster, kubename string) string {
return filepath.Join(KubeCertDir(baseDir, proxy, username, cluster), kubename+kubeConfigSuffix)
}

// KubeCredLockfilePath returns the kube credentials lock file for given proxy
//
// <baseDir>/keys/<proxy>/kube_credentials.lock
func KubeCredLockfilePath(baseDir, proxy string) string {
return filepath.Join(ProxyKeyDir(baseDir, proxy), fileNameKubeCredLock)
}

// IsProfileKubeConfigPath makes a best effort attempt to check if the given
// path is a profile specific kubeconfig path generated by this package.
func IsProfileKubeConfigPath(path string) (bool, error) {
Expand Down
15 changes: 14 additions & 1 deletion lib/client/keystore.go
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package client

import (
"errors"
iofs "io/fs"
"os"
"path/filepath"
"runtime"
Expand Down Expand Up @@ -122,6 +124,11 @@ func (fs *FSKeyStore) ppkFilePath(idx KeyIndex) string {
return keypaths.PPKFilePath(fs.KeyDir, idx.ProxyHost, idx.Username)
}

// kubeCredLockfilePath returns kube credentials lockfile path for the given KeyIndex.
func (fs *FSKeyStore) kubeCredLockfilePath(idx KeyIndex) string {
return keypaths.KubeCredLockfilePath(fs.KeyDir, idx.ProxyHost)
}

// publicKeyPath returns the public key path for the given KeyIndex.
func (fs *FSKeyStore) publicKeyPath(idx KeyIndex) string {
return keypaths.PublicKeyPath(fs.KeyDir, idx.ProxyHost, idx.Username)
Expand Down Expand Up @@ -236,7 +243,13 @@ func (fs *FSKeyStore) DeleteKey(idx KeyIndex) error {
// but it may not exist when upgrading from v9 -> v10 and logging into an existing cluster.
// as such, deletion should be best-effort and not generate an error if it fails.
if runtime.GOOS == constants.WindowsOS {
os.Remove(fs.ppkFilePath(idx))
_ = os.Remove(fs.ppkFilePath(idx))
}

// And try to delete kube credentials lockfile in case it exists
err := os.Remove(fs.kubeCredLockfilePath(idx))
if err != nil && !errors.Is(err, iofs.ErrNotExist) {
log.Debugf("Could not remove kube credentials file: %v", err)
}

// Clear ClusterName to delete the user certs stored for all clusters.
Expand Down
8 changes: 8 additions & 0 deletions lib/utils/fs_windows.go
Expand Up @@ -26,8 +26,16 @@ limitations under the License.
// locking. Repeatedly removing them on unlock when acquiring dozens of locks in a short timespan
// was causing flock.Flock.TryRLock to return either "access denied" or "The process cannot access
// the file because it is being used by another process".

import "strings"

const lockPostfix = ".lock.tmp"

func getPlatformLockFilePath(path string) string {
// If target file is itself dedicated lockfile, we don't create another lockfile, since
// we don't intend to read/write the target file itself.
if strings.HasSuffix(path, ".lock") {
return path
}
return path + lockPostfix
}
70 changes: 70 additions & 0 deletions tool/tsh/kube.go
Expand Up @@ -18,8 +18,10 @@ package main

import (
"context"
"errors"
"fmt"
"io"
"net"
"net/url"
"os"
"sort"
Expand Down Expand Up @@ -594,6 +596,51 @@ func newKubeCredentialsCommand(parent *kingpin.CmdClause) *kubeCredentialsComman
return c
}

func getKubeCredLockfilePath(homePath, proxy string) (string, error) {
profilePath := profile.FullProfilePath(homePath)
// tsh stores the profiles using the proxy host as the profile name.
profileName, err := utils.Host(proxy)
if err != nil {
return "", trace.Wrap(err)
}

return keypaths.KubeCredLockfilePath(profilePath, profileName), nil
}

// errKubeCredLockfileFound is returned when kube credentials lockfile is found and user should resolve login problems manually.
var errKubeCredLockfileFound = trace.AlreadyExists("Having problems with relogin, please use 'tsh login/tsh kube login' manually")

func takeKubeCredLock(ctx context.Context, homePath, proxy string) (func(bool), error) {
kubeCredLockfilePath, err := getKubeCredLockfilePath(homePath, proxy)
if err != nil {
return nil, trace.Wrap(err)
}

// If kube credentials lockfile already exists, it means last time kube credentials was called
// we had an error while trying to issue certificate, return an error asking user to login manually.
if _, err := os.Stat(kubeCredLockfilePath); err == nil {
log.Debugf("Kube credentials lockfile was found at %q, aborting.", kubeCredLockfilePath)
return nil, trace.Wrap(errKubeCredLockfileFound)
}

if _, err := utils.EnsureLocalPath(kubeCredLockfilePath, "", ""); err != nil {
return nil, trace.Wrap(err)
}
// Take a lock while we're trying to issue certificate and possibly relogin
unlock, err := utils.FSTryWriteLockTimeout(ctx, kubeCredLockfilePath, 5*time.Second)
if err != nil {
log.Debugf("could not take kube credentials lock: %v", err.Error())
return nil, trace.Wrap(errKubeCredLockfileFound)
}

return func(removeFile bool) {
if removeFile {
os.Remove(kubeCredLockfilePath)
}
unlock()
}, nil
}

func (c *kubeCredentialsCommand) run(cf *CLIConf) error {
profile, err := cf.GetProfile()
if err != nil {
Expand Down Expand Up @@ -660,6 +707,7 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {
}
if crt != nil && time.Until(crt.NotAfter) > time.Minute {
log.Debugf("Re-using existing TLS cert for Kubernetes cluster %q", c.kubeCluster)

return c.writeKeyResponse(cf.Stdout(), k, c.kubeCluster)
}
// Otherwise, cert for this k8s cluster is missing or expired. Request
Expand All @@ -668,6 +716,15 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {

log.Debugf("Requesting TLS cert for Kubernetes cluster %q", c.kubeCluster)

unlockKubeCred, err := takeKubeCredLock(cf.Context, cf.HomePath, cf.Proxy)
if err != nil {
return trace.Wrap(err)
}
deleteKubeCredsLock := false
defer func() {
unlockKubeCred(deleteKubeCredsLock) // by default (in case of an error) we don't delete lockfile.
}()

ctx, span := tc.Tracer.Start(cf.Context, "tsh.kubeCredentials/RetryWithRelogin")
err = client.RetryWithRelogin(ctx, tc, func() error {
// The requirement may change after a new login so check again just in
Expand All @@ -685,6 +742,11 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {
})
span.End()
if err != nil {
// If we've got network error we remove the lockfile, so we could restore from temporary connection
// problems without requiring user intervention.
if isNetworkError(err) {
deleteKubeCredsLock = true
}
return trace.Wrap(err)
}
// Make sure the cert is allowed to access the cluster.
Expand All @@ -700,9 +762,17 @@ func (c *kubeCredentialsCommand) issueCert(cf *CLIConf) error {
return trace.Wrap(err)
}

// Remove the lockfile so subsequent tsh kube credentials calls don't exit early
deleteKubeCredsLock = true

return c.writeKeyResponse(cf.Stdout(), k, c.kubeCluster)
}

func isNetworkError(err error) bool {
var opErr *net.OpError
return errors.As(err, &opErr) || trace.IsConnectionProblem(err)
}

func (c *kubeCredentialsCommand) checkLocalProxyRequirement(connUpgradeRequired bool) error {
if connUpgradeRequired {
return trace.BadParameter("Cannot connect Kubernetes clients to Teleport Proxy directly. Please use `tsh proxy kube` or `tsh kubectl` instead.")
Expand Down
166 changes: 166 additions & 0 deletions tool/tsh/tsh_test.go
Expand Up @@ -53,7 +53,10 @@ import (
"github.com/gravitational/teleport/api/types"
"github.com/gravitational/teleport/api/types/events"
"github.com/gravitational/teleport/api/types/wrappers"
apiutils "github.com/gravitational/teleport/api/utils"
"github.com/gravitational/teleport/api/utils/keypaths"
"github.com/gravitational/teleport/api/utils/keys"
"github.com/gravitational/teleport/integration/kube"
"github.com/gravitational/teleport/lib"
"github.com/gravitational/teleport/lib/auth"
"github.com/gravitational/teleport/lib/auth/mocku2f"
Expand Down Expand Up @@ -1925,6 +1928,169 @@ func tryCreateTrustedCluster(t *testing.T, authServer *auth.Server, trustedClust
require.FailNow(t, "Timeout creating trusted cluster")
}

func TestKubeCredentialsLock(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
const kubeClusterName = "kube-cluster"

t.Run("failed client creation doesn't create lockfile", func(t *testing.T) {
tmpHomePath := t.TempDir()

firstErr := Run(ctx, []string{
"kube",
"credentials",
"--proxy", "fake-proxy",
"--teleport-cluster", "teleport",
"--kube-cluster", kubeClusterName,
}, setHomePath(tmpHomePath))
require.Error(t, firstErr) // Fails because fake proxy doesn't exist
require.NoFileExists(t, keypaths.KubeCredLockfilePath(tmpHomePath, "fake-proxy"))
})

t.Run("kube credentials called multiple times, SSO login called only once", func(t *testing.T) {
tmpHomePath := t.TempDir()
connector := mockConnector(t)
alice, err := types.NewUser("alice@example.com")
require.NoError(t, err)

kubeRole, err := types.NewRole("kube-access", types.RoleSpecV6{
Allow: types.RoleConditions{
KubernetesLabels: types.Labels{types.Wildcard: apiutils.Strings{types.Wildcard}},
KubeGroups: []string{kube.TestImpersonationGroup},
KubeUsers: []string{alice.GetName()},
KubernetesResources: []types.KubernetesResource{
{
Kind: types.KindKubePod, Name: types.Wildcard, Namespace: types.Wildcard,
},
},
},
})
require.NoError(t, err)
alice.SetRoles([]string{"access", kubeRole.GetName()})

require.NoError(t, err)
authProcess, proxyProcess := makeTestServers(t, withBootstrap(connector, alice, kubeRole))
authServer := authProcess.GetAuthServer()
require.NotNil(t, authServer)
proxyAddr, err := proxyProcess.ProxyWebAddr()
require.NoError(t, err)

teleportClusterName, err := authServer.GetClusterName()
require.NoError(t, err)

kubeCluster, err := types.NewKubernetesClusterV3(types.Metadata{
Name: kubeClusterName,
Labels: map[string]string{},
},
types.KubernetesClusterSpecV3{},
)
require.NoError(t, err)
kubeServer, err := types.NewKubernetesServerV3FromCluster(kubeCluster, kubeClusterName, kubeClusterName)
require.NoError(t, err)
_, err = authServer.UpsertKubernetesServer(context.Background(), kubeServer)
require.NoError(t, err)

var ssoCalls atomic.Int32
mockSSO := mockSSOLogin(t, authServer, alice)
ssoFunc := func(ctx context.Context, connectorID string, priv *keys.PrivateKey, protocol string) (*auth.SSHLoginResponse, error) {
ssoCalls.Add(1)
return mockSSO(ctx, connectorID, priv, protocol)
}

err = Run(context.Background(), []string{
"login",
"--insecure",
"--debug",
"--auth", connector.GetName(),
"--proxy", proxyAddr.String(),
}, setHomePath(tmpHomePath), func(cf *CLIConf) error {
cf.mockSSOLogin = ssoFunc
return nil
})
require.NoError(t, err)
_, err = profile.FromDir(tmpHomePath, "")
require.NoError(t, err)
ssoCalls.Store(0) // Reset number of calls after setup login

// Overwrite profile data to simulate expired user certificate
expiredSSHCert := `
ssh-rsa-cert-v01@openssh.com AAAAHHNzaC1yc2EtY2VydC12MDFAb3BlbnNzaC5jb20AAAAgefu/ZQ70TbBMZfGUFHluE7PCu6PiWN0SsA5xrKbkzCkAAAADAQABAAABAQCyGzVvW7vgsK1P2Rtg55DTjL4We0WjSYYdzXJnVbyTxqrEYDOkhSnw4tZTS9KgALb698g0vrqy5bSJXB90d8uLdTmCmPngPbYpSN+p3P2SbIdkB5cRIMspB22qSkfHUARQlYM4PrMYIznWwQRFBvrRNOVdTdbMywlQGMUb0jdxK7JFBx1LC76qfHJhrD7jZS+MtygFIqhAJS9CQXW314p3FmL9s1cPV5lQfY527np8580qMKPkdeowPd/hVGcPA/C+ZxLcN9LqnuTZEFoDvYtwjfofOGUpANwtENBNZbNTxHDk7shYCRN9aZJ50zdFq3rMNdzFlEyJwm2ca+7aRDLlAAAAAAAAAAAAAAABAAAAEWFsaWNlQGV4YW1wbGUuY29tAAAAVQAAADYtdGVsZXBvcnQtbm9sb2dpbi1hZDVhYTViMi00MDBlLTQ2ZmUtOThjOS02ZjRhNDA2YzdlZGMAAAAXLXRlbGVwb3J0LWludGVybmFsLWpvaW4AAAAAZG9PKAAAAABkb0+gAAAAAAAAAQkAAAAXcGVybWl0LWFnZW50LWZvcndhcmRpbmcAAAAAAAAAFnBlcm1pdC1wb3J0LWZvcndhcmRpbmcAAAAAAAAACnBlcm1pdC1wdHkAAAAAAAAAEnByaXZhdGUta2V5LXBvbGljeQAAAAgAAAAEbm9uZQAAAA50ZWxlcG9ydC1yb2xlcwAAADUAAAAxeyJ2ZXJzaW9uIjoidjEiLCJyb2xlcyI6WyJhY2Nlc3MiLCJrdWJlLWFjY2VzcyJdfQAAABl0ZWxlcG9ydC1yb3V0ZS10by1jbHVzdGVyAAAADQAAAAlsb2NhbGhvc3QAAAAPdGVsZXBvcnQtdHJhaXRzAAAACAAAAARudWxsAAAAAAAAARcAAAAHc3NoLXJzYQAAAAMBAAEAAAEBAK/vBVOnf+QLSF0aKsEpQuof1o/5EJJ25C07tljSWvF2wNixHOyHZj8kAwO3f2XmWQd/XBddvZtLETvTbdBum8T37oOLepnDR32TzTV7cR7XVvo0pSqwrg0jWuAxt67b2n2BnWOCULdV9mPM8X9q4wRhqQHFGB3+7dD24x5YmVIBFUFJKYfFYh516giKAcNPFSK6eD381+cNXYx3yDO6i/iyrsuhbYVcTlWSV2Zhc0Gytf83QRmpM6hXW8b8hGCui36ffXSYu/9nWHcK7OeaHZzePT7jHptyqcrYSs52VuzikO74jpw8htU6maUeEcR5TBbeBlB+hmHQfwl8bEUeszMAAAEUAAAADHJzYS1zaGEyLTUxMgAAAQAwb7OpVkJP8pz9M4VIoG0DzXe5W2GN2pH0eWq+n/+YgshzcWPyHsPbckCu9IleHhrp6IK8ZyCt2qLi77o9XJxJUCiJxmsnfJYTs5DtWoCqiIRWKtYvSNpML1PH/badQsS/Stg7VUs48Yftg4eJOo4PYfJqDoHRfGimMwTNQ+aIWAYek3QwydlDdEtJ6X8kkHhNnZb0fzUbUyQLzFDXPu++di+AHNOpMOWDBtNZ1Lm3WWja/t5zIv7j6L67ZTzS5JtV7TlcD+lJ7RcBfWow8OtEW5RyyI8918A2q/zbe3OhGD4D2dZxPUhyGPHLLgy9NJqJoueR8qLPQQyMdDl4JKUq
`
privKey := `
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEAshs1b1u74LCtT9kbYOeQ04y+FntFo0mGHc1yZ1W8k8aqxGAz
pIUp8OLWU0vSoAC2+vfINL66suW0iVwfdHfLi3U5gpj54D22KUjfqdz9kmyHZAeX
ESDLKQdtqkpHx1AEUJWDOD6zGCM51sEERQb60TTlXU3WzMsJUBjFG9I3cSuyRQcd
Swu+qnxyYaw+42UvjLcoBSKoQCUvQkF1t9eKdxZi/bNXD1eZUH2Odu56fOfNKjCj
5HXqMD3f4VRnDwPwvmcS3DfS6p7k2RBaA72LcI36HzhlKQDcLRDQTWWzU8Rw5O7I
WAkTfWmSedM3Rat6zDXcxZRMicJtnGvu2kQy5QIDAQABAoIBAFBPIn4PAB1lrRBX
FhhQ8iXhzYi3lwP00Cu6Cr77kueTakbYFhE2Fl5O+lNe2h9ZkyiA996IrgiiuRBC
4NAUgExm1ELGFc3+JZhiCrA+PHx8wWPiZETN462hctqZWdpOg1OOxzdiVkEpCRiD
uhgh+JDC6DV1NsjrOEzMjnxoAqXdS8R8HRSr7ATV/28rXCpzBevtsQFWsHFQ069H
uL9JY4AnBdvnu749ClFYuhux/C0zSAsZIu47WUmmyZdIXY/B32vkhbDfKPpCp7Il
5sE9reNGf22jkqjC4wLpCT0L8wuUnJil0Sj1JUQjtZc4vn7Fc1RWZWRZKC4IlI/+
OUSdXhUCgYEA6krEs7B1W3mO8sCXEUpFZv8KfLaa4BeDncGS++qGrz4qSI8JiuUI
M4uLBec+9FHAAMVd5F/JxhcA5J1BA2e9mITEf582ur/lTU2cSYBIY64IPBMR0ask
Q9UdAdu0r/xQ91cdwKiaSrC3bPgCX/Xe6MzaEWMrmdnse3Kl+E49n0cCgYEAwpvB
gtCk/L6lOsQDgLH3zO27qlYUGSPqhy8iIF+4HMMIIvdIOrSMHspEHhbnypQe9cYO
GRcimlb4U0FbHsOmpkfcNHhvBTegmYEYjuWJR0AN8cgaV2b6XytqYB7Gv2kXhjnF
9dvamhy9+4SngywbqZshUlazVW/RfO4+OqXsKnMCgYBwJJOcUqUJwNhsV0S30O4B
S6gwY5MkGf00oHgDPpFzBfVlP5nYsqHHUk6b58DZXtvhQpcbfcHtoAscYizBPYGh
pEMNtx6SKtHNu41IHTAJDj8AyjvoONul4Db/MbN93O7ARSGHmuwnPgi+DsPMPLqS
gaMLWYWAIbAwsoLApGqYdwKBgAZALHoAK5x2nyYBD7+9d6Ecba+t7h1Umv7Wk7kI
eghqd0NwP+Cq1elTQ9bXk4BdO5VXVDKYHKNqcbVy3vNhA2RJ4JfK2n4HaGAl1l0Y
oE0qkIgYjkgKZbZS1arasjWJsZi9GE+qTR4wGCYQ/7Rl4UmUUwCrCj2PRuJFYLhP
hgNjAoGBAKwqiQpwNzbKOq3+pxau6Y32BqUaTV5ut9FEUz0/qzuNoc2S5bCf4wq+
cc/nvPBnXrP+rsubJXjFDfcIjcZ7x41bRMENvP50xD/J94IpK88TGTVa04VHKExx
iUK/veLmZ6XoouiWLCdU1VJz/1Fcwe/IEamg6ETfofvsqOCgcNYJ
-----END RSA PRIVATE KEY-----
`
pubKey := `ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCyGzVvW7vgsK1P2Rtg55DTjL4We0WjSYYdzXJnVbyTxqrEYDOkhSnw4tZTS9KgALb698g0vrqy5bSJXB90d8uLdTmCmPngPbYpSN+p3P2SbIdkB5cRIMspB22qSkfHUARQlYM4PrMYIznWwQRFBvrRNOVdTdbMywlQGMUb0jdxK7JFBx1LC76qfHJhrD7jZS+MtygFIqhAJS9CQXW314p3FmL9s1cPV5lQfY527np8580qMKPkdeowPd/hVGcPA/C+ZxLcN9LqnuTZEFoDvYtwjfofOGUpANwtENBNZbNTxHDk7shYCRN9aZJ50zdFq3rMNdzFlEyJwm2ca+7aRDLl
`
err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com"), []byte(privKey), 0666)
require.NoError(t, err)
err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com.pub"), []byte(pubKey), 0666)
require.NoError(t, err)
err = os.WriteFile(fmt.Sprintf("%s/%s", tmpHomePath, "keys/127.0.0.1/alice@example.com-ssh/localhost-cert.pub"), []byte(expiredSSHCert), 0666)
require.NoError(t, err)

errChan := make(chan error)
runCreds := func() {
credErr := Run(context.Background(), []string{
"kube",
"credentials",
"--insecure",
"--proxy", proxyAddr.String(),
"--auth", connector.GetName(),
"--teleport-cluster", teleportClusterName.GetClusterName(),
"--kube-cluster", kubeClusterName,
}, setHomePath(tmpHomePath), func(cf *CLIConf) error {
cf.mockSSOLogin = ssoFunc
return nil
})
errChan <- credErr
}

// Run kube credentials calls in parallel, only one should actually call SSO login
runsCount := 3
for i := 0; i < runsCount; i++ {
go runCreds()
}
for i := 0; i < runsCount; i++ {
select {
case err := <-errChan:
if err != nil {
require.ErrorIs(t, err, errKubeCredLockfileFound)
}

case <-time.After(time.Second * 5):
require.Fail(t, "Running kube credentials timed out")
}
}
require.Equal(t, 1, int(ssoCalls.Load()), "SSO login should have been called exactly once")
})
}

func TestSSHHeadlessCLIFlags(t *testing.T) {
t.Parallel()

Expand Down