diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fbde700a8..809e814fa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -158,7 +158,7 @@ jobs: run: | sudo add-apt-repository ppa:dqlite/dev -y --no-update sudo apt-get update - sudo apt-get install --no-install-recommends -y libdqlite-dev pkg-config + sudo apt-get install --no-install-recommends -y libdqlite-dev pkg-config openvswitch-switch - name: Build run: | diff --git a/Makefile b/Makefile index 162127985..a1d7c7921 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -GOMIN=1.22.5 +GOMIN=1.22.6 .PHONY: default default: build diff --git a/api/services.go b/api/services.go index 0d9072224..8feee10e9 100644 --- a/api/services.go +++ b/api/services.go @@ -23,7 +23,7 @@ var ServicesCmd = func(sh *service.Handler) rest.Endpoint { Name: "services", Path: "services", - Put: rest.EndpointAction{Handler: authHandler(sh, servicesPut), AllowUntrusted: true, ProxyTarget: true}, + Put: rest.EndpointAction{Handler: authHandlerMTLS(sh, servicesPut), ProxyTarget: true}, } } diff --git a/api/services_auth.go b/api/services_auth.go index 21b8c75ec..c967d5d96 100644 --- a/api/services_auth.go +++ b/api/services_auth.go @@ -1,12 +1,14 @@ package api import ( + "errors" "fmt" "net/http" "github.com/canonical/lxd/lxd/response" "github.com/canonical/lxd/lxd/util" "github.com/canonical/lxd/shared/logger" + "github.com/canonical/lxd/shared/trust" "github.com/canonical/microcluster/v2/state" "github.com/canonical/microcloud/microcloud/service" @@ -15,8 +17,8 @@ import ( // endpointHandler is just a convenience for writing clean return types. type endpointHandler func(state.State, *http.Request) response.Response -// authHandler ensures a request has been authenticated with the mDNS broadcast secret. -func authHandler(sh *service.Handler, f endpointHandler) endpointHandler { +// authHandlerMTLS ensures a request has been authenticated using mTLS. +func authHandlerMTLS(sh *service.Handler, f endpointHandler) endpointHandler { return func(s state.State, r *http.Request) response.Response { if r.RemoteAddr == "@" { logger.Debug("Allowing unauthenticated request through unix socket") @@ -25,27 +27,64 @@ func authHandler(sh *service.Handler, f endpointHandler) endpointHandler { } // Use certificate based authentication between cluster members. - if r.TLS != nil && r.Host == s.Address().URL.Host { + if r.TLS != nil { trustedCerts := s.Remotes().CertificatesNative() for _, cert := range r.TLS.PeerCertificates { + // First evaluate the permanent turst store. trusted, _ := util.CheckMutualTLS(*cert, trustedCerts) if trusted { return f(s, r) } + + // Second evaluate the temporary trust store. + // This is the fallback during the forming of the cluster. + trusted, _ = util.CheckMutualTLS(*cert, sh.TemporaryTrustStore()) + if trusted { + return f(s, r) + } } } - secret := r.Header.Get("X-MicroCloud-Auth") - if secret == "" { - return response.BadRequest(fmt.Errorf("No auth secret in response")) - } + return response.Forbidden(fmt.Errorf("Failed to authenticate using mTLS")) + } +} + +// authHandlerHMAC ensures a request has been authenticated using the HMAC in the Authorization header. +func authHandlerHMAC(sh *service.Handler, f endpointHandler) endpointHandler { + return func(s state.State, r *http.Request) response.Response { + sessionFunc := func(session *service.Session) error { + h, err := trust.NewHMACArgon2([]byte(session.Passphrase()), nil, trust.NewDefaultHMACConf(HMACMicroCloud10)) + if err != nil { + return err + } + + err = trust.HMACEqual(h, r) + if err != nil { + attemptErr := session.RegisterFailedAttempt() + if attemptErr != nil { + errorCause := errors.New("Stopping session after too many failed attempts") + + // Immediately stop the session to not allow further join attempts. + stopErr := session.Stop(errorCause) + if stopErr != nil { + return fmt.Errorf("Cannot stop session after too many failed attempts: %w", stopErr) + } + + // Log the error and return it to the caller + logger.Warn(errorCause.Error()) + return errorCause + } + + return err + } - if sh.AuthSecret == "" { - return response.BadRequest(fmt.Errorf("No generated auth secret")) + return nil } - if sh.AuthSecret != secret { - return response.SmartError(fmt.Errorf("Request secret does not match, ignoring request")) + // Run a r/w transaction against the session as we might stop it due to too many failed attempts. + err := sh.SessionTransaction(false, sessionFunc) + if err != nil { + return response.SmartError(err) } return f(s, r) diff --git a/api/services_cluster.go b/api/services_cluster.go index 68f20be0e..5fd8c3e53 100644 --- a/api/services_cluster.go +++ b/api/services_cluster.go @@ -24,7 +24,7 @@ var ServicesClusterCmd = func(sh *service.Handler) rest.Endpoint { Name: "services/cluster/{name}", Path: "services/cluster/{name}", - Delete: rest.EndpointAction{Handler: authHandler(sh, removeClusterMember), AllowUntrusted: true}, + Delete: rest.EndpointAction{Handler: authHandlerMTLS(sh, removeClusterMember)}, } } diff --git a/api/services_proxy.go b/api/services_proxy.go index e8cfbf4b4..7bdd780df 100644 --- a/api/services_proxy.go +++ b/api/services_proxy.go @@ -47,11 +47,11 @@ func proxy(sh *service.Handler, name, path string, handler endpointHandler) rest Name: name, Path: path, - Get: rest.EndpointAction{Handler: authHandler(sh, handler), AllowUntrusted: true, ProxyTarget: true}, - Put: rest.EndpointAction{Handler: authHandler(sh, handler), AllowUntrusted: true, ProxyTarget: true}, - Post: rest.EndpointAction{Handler: authHandler(sh, handler), AllowUntrusted: true, ProxyTarget: true}, - Patch: rest.EndpointAction{Handler: authHandler(sh, handler), AllowUntrusted: true, ProxyTarget: true}, - Delete: rest.EndpointAction{Handler: authHandler(sh, handler), AllowUntrusted: true, ProxyTarget: true}, + Get: rest.EndpointAction{Handler: authHandlerMTLS(sh, handler), ProxyTarget: true}, + Put: rest.EndpointAction{Handler: authHandlerMTLS(sh, handler), ProxyTarget: true}, + Post: rest.EndpointAction{Handler: authHandlerMTLS(sh, handler), ProxyTarget: true}, + Patch: rest.EndpointAction{Handler: authHandlerMTLS(sh, handler), ProxyTarget: true}, + Delete: rest.EndpointAction{Handler: authHandlerMTLS(sh, handler), ProxyTarget: true}, } } diff --git a/api/services_tokens.go b/api/services_tokens.go index dfffd0785..f8c1aca89 100644 --- a/api/services_tokens.go +++ b/api/services_tokens.go @@ -22,7 +22,7 @@ var ServiceTokensCmd = func(sh *service.Handler) rest.Endpoint { Name: "services/{serviceType}/tokens", Path: "services/{serviceType}/tokens", - Post: rest.EndpointAction{Handler: authHandler(sh, serviceTokensPost), AllowUntrusted: true, ProxyTarget: true}, + Post: rest.EndpointAction{Handler: authHandlerMTLS(sh, serviceTokensPost), ProxyTarget: true}, } } diff --git a/api/session.go b/api/session.go new file mode 100644 index 000000000..d8ed5beb6 --- /dev/null +++ b/api/session.go @@ -0,0 +1,400 @@ +package api + +import ( + "context" + "crypto/x509" + "encoding/json" + "encoding/pem" + "errors" + "fmt" + "net" + "net/http" + "time" + + "github.com/canonical/lxd/lxd/response" + "github.com/canonical/lxd/shared" + "github.com/canonical/lxd/shared/logger" + "github.com/canonical/lxd/shared/trust" + "github.com/canonical/lxd/shared/ws" + "github.com/canonical/microcluster/v2/rest" + "github.com/canonical/microcluster/v2/state" + "golang.org/x/sync/errgroup" + + "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" + cloudMDNS "github.com/canonical/microcloud/microcloud/mdns" + "github.com/canonical/microcloud/microcloud/service" +) + +// HMACMicroCloud10 is the HMAC format version used during trust establishment. +const HMACMicroCloud10 trust.HMACVersion = "MicroCloud-1.0" + +// SessionInitiatingCmd represents the /1.0/session/initiating API on MicroCloud. +var SessionInitiatingCmd = func(sh *service.Handler) rest.Endpoint { + return rest.Endpoint{ + AllowedBeforeInit: true, + Name: "session/initiating", + Path: "session/initiating", + + Get: rest.EndpointAction{Handler: authHandlerMTLS(sh, sessionGet(sh, types.SessionInitiating))}, + } +} + +// SessionJoiningCmd represents the /1.0/session/joining API on MicroCloud. +var SessionJoiningCmd = func(sh *service.Handler) rest.Endpoint { + return rest.Endpoint{ + AllowedBeforeInit: true, + Name: "session/joining", + Path: "session/joining", + + Get: rest.EndpointAction{Handler: authHandlerMTLS(sh, sessionGet(sh, types.SessionJoining))}, + } +} + +// sessionGet returns a MicroCloud join session. +func sessionGet(sh *service.Handler, sessionRole types.SessionRole) func(state state.State, r *http.Request) response.Response { + return func(state state.State, r *http.Request) response.Response { + if sh.ActiveSession() { + return response.BadRequest(errors.New("There already is an active session")) + } + + sessionTimeoutStr := r.URL.Query().Get("timeout") + if sessionTimeoutStr == "" { + sessionTimeoutStr = "10m" + } + + sessionTimeout, err := time.ParseDuration(sessionTimeoutStr) + if err != nil { + return response.BadRequest(fmt.Errorf("Failed to parse timeout: %w", err)) + } + + if time.Now().Add(sessionTimeout).After(time.Now().Add(1 * time.Hour)) { + return response.BadRequest(errors.New("Session timeout cannot exceed 60 minutes")) + } + + return response.ManualResponse(func(w http.ResponseWriter) error { + conn, err := ws.Upgrader.Upgrade(w, r, nil) + if err != nil { + return err + } + + defer func() { + err := conn.Close() + if err != nil { + logger.Error("Failed to close the websocket connection", logger.Ctx{"err": err}) + } + }() + + sessionCtx, cancel := context.WithTimeoutCause(r.Context(), sessionTimeout, errors.New("Session timeout exceeded")) + defer cancel() + + gw := cloudClient.NewWebsocketGateway(sessionCtx, conn) + + if sessionRole == types.SessionInitiating { + err = handleInitiatingSession(state, sh, gw) + } else if sessionRole == types.SessionJoining { + err = handleJoiningSession(state, sh, gw) + } + + // Any errors occurring after the connection got upgraded have to be handled + // within the websocket. + // When writing a response to the original HTTP connection the server will + // complain with "http: connection has been hijacked". + if err != nil { + controlErr := gw.WriteClose(err) + if controlErr != nil { + logger.Error("Failed to write close control message", logger.Ctx{"err": controlErr, "controlErr": err}) + } + } + + return nil + }) + } +} + +func confirmedIntents(sh *service.Handler, gw *cloudClient.WebsocketGateway) ([]types.SessionJoinPost, error) { + for { + select { + case intent, ok := <-sh.Session.IntentCh(): + // Session got closed, try to receive the cause from the other channels. + if !ok { + continue + } + + err := gw.Write(types.Session{ + Intent: intent, + }) + if err != nil { + return nil, fmt.Errorf("Failed to forward join intent: %w", err) + } + + case bytes := <-gw.Receive(): + var session types.Session + err := json.Unmarshal(bytes, &session) + if err != nil { + return nil, fmt.Errorf("Failed to read confirmed intents: %w", err) + } + + return session.ConfirmedIntents, nil + case <-gw.Context().Done(): + return nil, fmt.Errorf("Exit waiting for intents: %w", context.Cause(gw.Context())) + } + } +} + +func handleInitiatingSession(state state.State, sh *service.Handler, gw *cloudClient.WebsocketGateway) error { + session := types.Session{} + err := gw.ReceiveWithContext(gw.Context(), &session) + if err != nil { + return fmt.Errorf("Failed to read session start message: %w", err) + } + + err = sh.StartSession(types.SessionInitiating, session.Passphrase, gw) + if err != nil { + return fmt.Errorf("Failed to start session: %w", err) + } + + defer func() { + err := sh.StopSession(nil) + if err != nil { + logger.Error("Failed to stop session", logger.Ctx{"err": err}) + } + }() + + sessionPassphrase := sh.Session.Passphrase() + err = gw.Write(types.Session{ + Passphrase: sessionPassphrase, + }) + if err != nil { + return fmt.Errorf("Failed to send session details: %w", err) + } + + err = sh.Session.Broadcast(state.Name(), session.Address, session.Interface) + if err != nil { + return fmt.Errorf("Failed to start broadcast: %w", err) + } + + confirmedIntents, err := confirmedIntents(sh, gw) + if err != nil { + return fmt.Errorf("Failed waiting for the confirmed intents: %w", err) + } + + g, ctx := errgroup.WithContext(context.Background()) + + // Add systems to temporary truststore. + for _, intent := range confirmedIntents { + remoteCert, err := shared.ParseCert([]byte(intent.Certificate)) + if err != nil { + return fmt.Errorf("Failed to parse certificate of confirmed intent: %w", err) + } + + // Add system to temporary truststore. + sh.Session.Allow(intent.Name, *remoteCert) + + cloud := sh.Services[types.MicroCloud].(*service.CloudService) + cert, err := cloud.ServerCert() + if err != nil { + return fmt.Errorf("Failed to get certificate of %q: %w", types.MicroCloud, err) + } + + joinIntent := types.SessionJoinPost{ + Version: cloudMDNS.Version, + Name: state.Name(), + Address: session.Address, + Certificate: string(cert.PublicKey()), + Services: session.Services, + } + + h, err := trust.NewHMACArgon2([]byte(sessionPassphrase), nil, trust.NewDefaultHMACConf(HMACMicroCloud10)) + if err != nil { + return fmt.Errorf("Failed to create a new HMAC instance using argon2: %w", err) + } + + header, err := trust.HMACAuthorizationHeader(h, joinIntent) + if err != nil { + return fmt.Errorf("Failed to create HMAC for join intent: %w", err) + } + + // Confirm join intent. + // This request uses polling to wait for confirmation from the other side. + g.Go(func() error { + conf := cloudClient.AuthConfig{ + HMAC: header, + // We already know the certificate of the joiner for TLS verification. + TLSServerCertificate: remoteCert, + } + + _, err := cloud.RequestJoinIntent(ctx, intent.Address, conf, joinIntent) + if err != nil { + return fmt.Errorf("Failed to confirm join intent of %q: %w", intent.Address, err) + } + + return nil + }) + } + + err = g.Wait() + if err != nil { + return fmt.Errorf("Failed to confirm join intents: %w", err) + } + + err = gw.Write(types.Session{ + Accepted: true, + }) + if err != nil { + return fmt.Errorf("Failed to send confirmation: %w", err) + } + + return nil +} + +func handleJoiningSession(state state.State, sh *service.Handler, gw *cloudClient.WebsocketGateway) error { + session := types.Session{} + err := gw.ReceiveWithContext(gw.Context(), &session) + if err != nil { + return fmt.Errorf("Failed to read session start message: %w", err) + } + + err = sh.StartSession(types.SessionJoining, session.Passphrase, gw) + if err != nil { + return fmt.Errorf("Failed to start session: %w", err) + } + + defer func() { + err := sh.StopSession(nil) + if err != nil { + logger.Error("Failed to stop session", logger.Ctx{"err": err}) + } + }() + + iface, err := net.InterfaceByName(session.Interface) + if err != nil { + return fmt.Errorf("Failed to lookup interface by name: %w", err) + } + + // No address selected, try to lookup system. + if session.InitiatorAddress == "" { + lookupCtx, cancel := context.WithTimeout(gw.Context(), session.LookupTimeout) + defer cancel() + + peer, err := cloudMDNS.LookupPeer(lookupCtx, iface, cloudMDNS.Version) + if err != nil { + return fmt.Errorf("Failed to lookup eligible system: %w", err) + } + + session.InitiatorAddress = peer.Address + } + + // Get the remotes name. + cloud := sh.Services[types.MicroCloud].(*service.CloudService) + cert, err := cloud.ServerCert() + if err != nil { + return fmt.Errorf("Failed to get certificate of %q: %w", types.MicroCloud, err) + } + + joinIntent := types.SessionJoinPost{ + Version: cloudMDNS.Version, + Name: state.Name(), + Address: session.Address, + Certificate: string(cert.PublicKey()), + Services: session.Services, + } + + h, err := trust.NewHMACArgon2([]byte(session.Passphrase), nil, trust.NewDefaultHMACConf(HMACMicroCloud10)) + if err != nil { + return fmt.Errorf("Failed to create a new HMAC instance using argon2: %w", err) + } + + header, err := trust.HMACAuthorizationHeader(h, joinIntent) + if err != nil { + return fmt.Errorf("Failed to create HMAC for join intent: %w", err) + } + + conf := cloudClient.AuthConfig{ + HMAC: header, + // The certificate of the initiater isn't yet known so we have to skip any TLS verification. + InsecureSkipVerify: true, + } + + peerCert, err := cloud.RequestJoinIntent(context.Background(), session.InitiatorAddress, conf, joinIntent) + if err != nil { + // If the HMAC of the request is invalid, a generic error is returned by the API. + // It's likely that the user provided the wrong passphrase. + // Indicate this in the error by rewriting it. + if err.Error() == "Invalid HMAC" { + err = errors.New("Wrong passphrase") + } + + return fmt.Errorf("Failed to send our intent to join %q: %w", session.InitiatorAddress, err) + } + + session.InitiatorFingerprint = shared.CertFingerprint(peerCert) + + peerStatus, err := cloud.RemoteStatus(gw.Context(), peerCert, session.InitiatorAddress) + if err != nil { + return fmt.Errorf("Failed to retrieve cluster status from %q: %w", session.InitiatorAddress, err) + } + + session.InitiatorName = peerStatus.Name + + // Notify the client we have found an eligible system. + err = gw.Write(types.Session{ + InitiatorName: session.InitiatorName, + InitiatorAddress: session.InitiatorAddress, + InitiatorFingerprint: session.InitiatorFingerprint, + }) + if err != nil { + return fmt.Errorf("Failed to confirm the eligible system %q at %q: %w", session.InitiatorName, session.InitiatorAddress, err) + } + + var ok bool + var confirmedIntent types.SessionJoinPost + + select { + case confirmedIntent, ok = <-sh.Session.IntentCh(): + // Session got closed. + if !ok { + return errors.New("Exit waiting for join confirmation") + } + + err = gw.Write(types.Session{ + Intent: confirmedIntent, + }) + if err != nil { + return fmt.Errorf("Failed to forward join confirmation: %w", err) + } + + case <-gw.Context().Done(): + return fmt.Errorf("Exit waiting for join confirmation: %w", context.Cause(gw.Context())) + } + + certBlock, _ := pem.Decode([]byte(confirmedIntent.Certificate)) + if certBlock == nil { + return fmt.Errorf("Invalid certificate file") + } + + remoteCert, err := x509.ParseCertificate(certBlock.Bytes) + if err != nil { + return fmt.Errorf("Failed to parse certificate: %w", err) + } + + // Add system to temporary truststore. + sh.Session.Allow(confirmedIntent.Name, *remoteCert) + + var errStr string + select { + case <-sh.Session.ExitCh(): + errStr = "" + case <-gw.Context().Done(): + errStr = fmt.Errorf("Exit waiting for session to end: %w", context.Cause(gw.Context())).Error() + } + + err = gw.Write(types.Session{ + Error: errStr, + }) + if err != nil { + return fmt.Errorf("Failed to signal final message: %w", err) + } + + return nil +} diff --git a/api/session_join.go b/api/session_join.go new file mode 100644 index 000000000..ea8c0042a --- /dev/null +++ b/api/session_join.go @@ -0,0 +1,95 @@ +package api + +import ( + "encoding/json" + "errors" + "fmt" + "net/http" + "time" + + "github.com/canonical/lxd/lxd/response" + "github.com/canonical/lxd/shared" + "github.com/canonical/lxd/shared/api" + "github.com/canonical/microcluster/v2/rest" + "github.com/canonical/microcluster/v2/state" + + "github.com/canonical/microcloud/microcloud/api/types" + "github.com/canonical/microcloud/microcloud/service" +) + +// SessionJoinCmd represents the /1.0/session/join API on MicroCloud. +var SessionJoinCmd = func(sh *service.Handler) rest.Endpoint { + return rest.Endpoint{ + AllowedBeforeInit: true, + Name: "session/join", + Path: "session/join", + + Post: rest.EndpointAction{Handler: authHandlerHMAC(sh, sessionJoinPost(sh)), AllowUntrusted: true}, + } +} + +// sessionJoinPost receives join intent requests from new potential members. +func sessionJoinPost(sh *service.Handler) func(state state.State, r *http.Request) response.Response { + return func(state state.State, r *http.Request) response.Response { + // Apply delay right at the beginning before doing any validation. + // This limits the number of join attempts that can be made by an attacker. + select { + case <-time.After(100 * time.Millisecond): + case <-r.Context().Done(): + return response.InternalError(errors.New("Request cancelled")) + } + + // Parse the request. + req := types.SessionJoinPost{} + + err := json.NewDecoder(r.Body).Decode(&req) + if err != nil { + return response.BadRequest(err) + } + + err = sh.SessionTransaction(true, func(session *service.Session) error { + // Only validate the intent (services) on the initiator. + // The joiner has to accept the services from the initiator. + if session.Role() == types.SessionInitiating { + err = validateIntent(sh, req) + if err != nil { + return api.NewStatusError(http.StatusBadRequest, err.Error()) + } + } + + fingerprint, err := shared.CertFingerprintStr(req.Certificate) + if err != nil { + return api.StatusErrorf(http.StatusBadRequest, "Failed to get fingerprint: %w", err) + } + + err = session.RegisterIntent(fingerprint) + if err != nil { + return api.StatusErrorf(http.StatusBadRequest, "Failed to register join intent: %w", err) + } + + // Prevent locking in case there isn't anymore an active consumer reading on the channel. + // This can happen if the initiator's websocket connection isn't anymore active. + select { + case session.IntentCh() <- req: + return nil + default: + return fmt.Errorf("No active consumer for join intent") + } + }) + + return response.SmartError(err) + } +} + +// validateIntent validates the given join intent. +// It checks whether or not the peer is missing any of our services and returns an error if one is missing. +func validateIntent(sh *service.Handler, intent types.SessionJoinPost) error { + // Reject any peers that are missing our services. + for service := range sh.Services { + if !shared.ValueInSlice(service, intent.Services) { + return fmt.Errorf("Rejecting peer %q due to missing services (%s)", intent.Name, string(service)) + } + } + + return nil +} diff --git a/api/types/session.go b/api/types/session.go new file mode 100644 index 000000000..31622b1fd --- /dev/null +++ b/api/types/session.go @@ -0,0 +1,42 @@ +package types + +import ( + "time" +) + +// SessionRole indicates the role when participating in a trust establishment session. +type SessionRole string + +const ( + // SessionInitiating represents the session of the initiator. + SessionInitiating SessionRole = "initiating" + + // SessionJoining represents the session of the joiner. + SessionJoining SessionRole = "joining" +) + +// Session represents the websocket protocol used during trust establishment between the client and server. +// Empty fields are omitted to require sending only the necessary information. +type Session struct { + Address string `json:"address,omitempty"` + InitiatorAddress string `json:"initiator_address,omitempty"` + InitiatorName string `json:"initiator_name,omitempty"` + InitiatorFingerprint string `json:"initiator_fingerprint,omitempty"` + Interface string `json:"interface,omitempty"` + Passphrase string `json:"passphrase,omitempty"` + Services []ServiceType `json:"services,omitempty"` + Intent SessionJoinPost `json:"intent,omitempty"` + ConfirmedIntents []SessionJoinPost `json:"confirmed_intents,omitempty"` + Accepted bool `json:"accepted,omitempty"` + LookupTimeout time.Duration `json:"lookup_timeout,omitempty"` + Error string `json:"error,omitempty"` +} + +// SessionJoinPost represents a request made to join an active session. +type SessionJoinPost struct { + Name string `json:"name" yaml:"name"` + Version string `json:"version" yaml:"version"` + Address string `json:"address" yaml:"address"` + Certificate string `json:"certificate" yaml:"certificate"` + Services []ServiceType `json:"services" yaml:"services"` +} diff --git a/client/client.go b/client/client.go index b3c74b540..e113df236 100644 --- a/client/client.go +++ b/client/client.go @@ -1,16 +1,35 @@ package client import ( + "bytes" "context" + "crypto/x509" + "encoding/json" "fmt" "time" "github.com/canonical/lxd/shared/api" "github.com/canonical/microcluster/v2/client" + "github.com/canonical/microcluster/v2/rest/response" + "github.com/gorilla/websocket" "github.com/canonical/microcloud/microcloud/api/types" ) +// StartSession starts a new session and returns the underlying websocket connection. +func StartSession(ctx context.Context, c *client.Client, role string, sessionTimeout time.Duration) (*websocket.Conn, error) { + queryCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + + url := api.NewURL().Path("session", role).WithQuery("timeout", sessionTimeout.String()) + conn, err := c.Websocket(queryCtx, types.APIVersion, url) + if err != nil { + return nil, fmt.Errorf("Failed to start session websocket: %w", err) + } + + return conn, nil +} + // JoinServices sends join information to initiate the cluster join process. func JoinServices(ctx context.Context, c *client.Client, data types.ServicesPut) error { queryCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) @@ -24,7 +43,45 @@ func JoinServices(ctx context.Context, c *client.Client, data types.ServicesPut) return nil } -// RemoteIssueToken issues a token on the remote MicroCloud, trusted by the mDNS auth secret. +// JoinIntent sends the join intent to a potential cluster. +func JoinIntent(ctx context.Context, c *client.Client, data types.SessionJoinPost) (*x509.Certificate, error) { + queryCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + + // The join intent request is using HMAC authorization. + // Therefore we have to marshal the data ourselves as the JSON encoder used + // by the query functions is appending a newline at the end. + // See https://pkg.go.dev/encoding/json#Encoder.Encode. + // This newline will cause the HMAC verification to fail on the server side + // as the server will recreate the HMAC based on the request body. + // The JSON marshaller doesn't add a newline. + dataBytes, err := json.Marshal(data) + if err != nil { + return nil, fmt.Errorf("Failed to marshal join intent: %w", err) + } + + path := api.NewURL().Path("session", "join") + + // We can pass a reader to indicate to the query functions the body is already marshalled. + resp, err := c.QueryRaw(queryCtx, "POST", types.APIVersion, path, bytes.NewBuffer(dataBytes)) + if err != nil { + return nil, fmt.Errorf("Failed to send join intent: %w", err) + } + + // Parse the response to check for errors. + _, err = response.ParseResponse(resp) + if err != nil { + return nil, err + } + + if len(resp.TLS.PeerCertificates) == 0 { + return nil, fmt.Errorf("Peer's certificate is missing") + } + + return resp.TLS.PeerCertificates[0], nil +} + +// RemoteIssueToken issues a token on the remote MicroCloud. func RemoteIssueToken(ctx context.Context, c *client.Client, serviceType types.ServiceType, data types.ServiceTokensPost) (string, error) { queryCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) defer cancel() diff --git a/client/proxy.go b/client/proxy.go index 95d9891cd..6d551c33b 100644 --- a/client/proxy.go +++ b/client/proxy.go @@ -2,6 +2,7 @@ package client import ( "crypto/tls" + "crypto/x509" "fmt" "net/http" "net/url" @@ -13,9 +14,20 @@ import ( "github.com/canonical/microcloud/microcloud/api/types" ) -// UseAuthProxy takes the given microcluster client and secret and proxies requests to other services through the MicroCloud API. -// The secret will be set in the authentication header in lieu of TLS authentication, if present. -func UseAuthProxy(c *client.Client, secret string, serviceType types.ServiceType) (*client.Client, error) { +// AuthConfig is used to configure the various authentication settings during trust establishment. +// In case of unverified mTLS, InsecureSkipVerify has to be set to true. +// In case of partially verified mTLS, the remote servers certificate can be set using TLSServerCertificate. +// Request authentication can be made by setting a valid HMAC. +type AuthConfig struct { + HMAC string + TLSServerCertificate *x509.Certificate + InsecureSkipVerify bool +} + +// UseAuthProxy takes the given microcluster client and HMAC and proxies requests to other services through the MicroCloud API. +// The HMAC will be set in the Authorization header in lieu of mTLS authentication, if present. +// If no HMAC is present mTLS is assumed. +func UseAuthProxy(c *client.Client, serviceType types.ServiceType, conf AuthConfig) (*client.Client, error) { tp, ok := c.Transport.(*http.Transport) if !ok { return nil, fmt.Errorf("Invalid client transport type") @@ -26,12 +38,8 @@ func UseAuthProxy(c *client.Client, secret string, serviceType types.ServiceType tp.TLSClientConfig = &tls.Config{} } - // Only set InsecureSkipVerify if the secret is non-empty, so we will fallback to regular TLS authentication. - if secret != "" { - tp.TLSClientConfig.InsecureSkipVerify = true - } - - tp.Proxy = AuthProxy(secret, serviceType) + tp.TLSClientConfig.InsecureSkipVerify = conf.InsecureSkipVerify + tp.Proxy = AuthProxy(conf.HMAC, serviceType) c.Transport = tp @@ -40,12 +48,14 @@ func UseAuthProxy(c *client.Client, secret string, serviceType types.ServiceType // AuthProxy takes a request to a service and sends it to MicroCloud instead, // to be then forwarded to the unix socket of the corresponding service. -// The secret is set in the request header to use in lieu of TLS authentication. -func AuthProxy(secret string, serviceType types.ServiceType) func(r *http.Request) (*url.URL, error) { +// The HMAC is set in the request header to be used partially in lieu of mTLS authentication. +func AuthProxy(hmac string, serviceType types.ServiceType) func(r *http.Request) (*url.URL, error) { return func(r *http.Request) (*url.URL, error) { - r.Header.Set("X-MicroCloud-Auth", secret) + if hmac != "" { + r.Header.Set("Authorization", hmac) + } - // MicroCloud itself doesn't need to use the proxy other than to set the auth secret. + // MicroCloud itself doesn't need to use the proxy. if serviceType != types.MicroCloud { path := fmt.Sprintf("/1.0/services/%s", strings.ToLower(string(serviceType))) if !strings.HasPrefix(r.URL.Path, path) { diff --git a/client/websocket.go b/client/websocket.go new file mode 100644 index 000000000..6f84ee97b --- /dev/null +++ b/client/websocket.go @@ -0,0 +1,158 @@ +package client + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "sync" + + "github.com/gorilla/websocket" +) + +// ControlClose represents a control close message to indicate an error +// and to ultimately close the underlying websocket connection. +// It reimplements the actual control close message available in the websocket +// protocol to overcome the limitation of 125 bytes. +// See https://www.rfc-editor.org/rfc/rfc6455#section-5.5. +type ControlClose struct { + ControlMessage string `json:"control_message"` +} + +// WebsocketGateway represents a utility wrapper for websocket connections. +type WebsocketGateway struct { + reader chan []byte + ctx context.Context + + conn *websocket.Conn + // There can only be one writer on the connection at a time. + // In case the outer context gets cancelled there can be a situation + // in which writing the contexts error cause can collide + // with a normal write to the websocket. + writeLock sync.Mutex +} + +// NewWebsocketGateway returns a new websocket wrapper allowing to easily write and consume +// messages to/from the underlying websocket connection. +// It allows providing a context which is cancelled as soon as the underlying websocket connection +// is closed by either side of the connection. +func NewWebsocketGateway(ctx context.Context, conn *websocket.Conn) *WebsocketGateway { + gw := &WebsocketGateway{ + reader: make(chan []byte), + conn: conn, + } + + gwCtx, gwCancel := context.WithCancelCause(ctx) + gw.ctx = gwCtx + + go func() { + <-gwCtx.Done() + + // Send close control message. + // Try to send the cause from the outer context if present. + _ = gw.WriteClose(context.Cause(gwCtx)) + + // Shutdown the read loop. + _ = gw.conn.Close() + }() + + go func() { + defer close(gw.reader) + + for { + _, reader, err := conn.ReadMessage() + if err != nil { + // If the connection got closed due to the outer context, return this error instead. + if ctx.Err() != nil { + // Try to use the cause from the outer context if present. + err = context.Cause(ctx) + } + + // Cancel the inner context too with the respective error. + defer gwCancel(err) + return + } + + // Cancel in case we have received our own control close message. + // To not get confused with other JSON payloads, we identify our + // control close message by requiring the "control_message" field. + controlClose := ControlClose{} + decoder := json.NewDecoder(bytes.NewReader(reader)) + decoder.DisallowUnknownFields() + err = decoder.Decode(&controlClose) + if err == nil { + // Cancel the inner context with the respective error. + defer gwCancel(errors.New(controlClose.ControlMessage)) + return + } + + gw.reader <- reader + } + }() + + return gw +} + +// Receive returns the inner channel which allows reading from the websocket connection. +// If used together with other channels ensure to also consume the gateway's context +// in order to get informed about a potentially closed connection. +// If there aren't other channels that need to be consumed in parallel use ReceiveCombined instead. +func (w *WebsocketGateway) Receive() <-chan []byte { + return w.reader +} + +// ReceiveWithContext tries to read from the websocket connection and unmarshals +// the received data into v. +// It's waiting on both the websocket connection and the either of the contexts and returns +// whatever is returning/cancelled first. +func (w *WebsocketGateway) ReceiveWithContext(ctx context.Context, v any) error { + var err error + select { + case bytes := <-w.Receive(): + err = json.Unmarshal(bytes, v) + case <-w.ctx.Done(): + err = context.Cause(w.ctx) + case <-ctx.Done(): + err = context.Cause(ctx) + } + + return err +} + +// Context returns the inner gateway's context. +// It's getting cancelled if the outer context is cancelled or the websocket connection is closed. +func (w *WebsocketGateway) Context() context.Context { + return w.ctx +} + +// Write writes the given data onto the websocket connection. +func (w *WebsocketGateway) Write(v any) error { + w.writeLock.Lock() + defer w.writeLock.Unlock() + + if w.ctx.Err() != nil { + return context.Cause(w.ctx) + } + + return w.conn.WriteJSON(v) +} + +// WriteClose sends our websocket control close message. +// Unlike the actual websocket control close message this supports message longer than 125 bytes +// as well as special characters. +// It waits for the other side to hang up or the gateway's context being cancelled. +func (w *WebsocketGateway) WriteClose(err error) error { + writeErr := w.Write(ControlClose{ + ControlMessage: err.Error(), + }) + if writeErr != nil { + return fmt.Errorf("Failed to write control message: %w", writeErr) + } + + // Wait on the other end to hang up. + // Our inner context gets cancelled if the websocket connection is closed. + <-w.Context().Done() + + return nil +} diff --git a/cmd/microcloud/add.go b/cmd/microcloud/add.go index 99cc885fc..681517a39 100644 --- a/cmd/microcloud/add.go +++ b/cmd/microcloud/add.go @@ -10,6 +10,7 @@ import ( "github.com/canonical/microcloud/microcloud/api" "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" "github.com/canonical/microcloud/microcloud/mdns" "github.com/canonical/microcloud/microcloud/service" ) @@ -17,21 +18,19 @@ import ( type cmdAdd struct { common *CmdControl - flagWipe bool - flagPreseed bool - flagLookupTimeout int64 + flagWipe bool + flagSessionTimeout int64 } func (c *cmdAdd) Command() *cobra.Command { cmd := &cobra.Command{ Use: "add", - Short: "Scan for new cluster members to add", + Short: "Add new systems to an existing MicroCloud cluster", RunE: c.Run, } cmd.Flags().BoolVar(&c.flagWipe, "wipe", false, "Wipe disks to add to MicroCeph") - cmd.Flags().BoolVar(&c.flagPreseed, "preseed", false, "Expect Preseed YAML for configuring MicroCloud in stdin") - cmd.Flags().Int64Var(&c.flagLookupTimeout, "lookup-timeout", 0, "Amount of seconds to wait for systems to show up. Defaults: 60s for interactive, 5s for automatic and preseed") + cmd.Flags().Int64Var(&c.flagSessionTimeout, "session-timeout", 0, "Amount of seconds to wait for the trust establishment session. Defaults: 60m") return cmd } @@ -51,15 +50,9 @@ func (c *cmdAdd) Run(cmd *cobra.Command, args []string) error { state: map[string]service.SystemInformation{}, } - cfg.lookupTimeout = DefaultLookupTimeout - if c.flagLookupTimeout > 0 { - cfg.lookupTimeout = time.Duration(c.flagLookupTimeout) * time.Second - } else if c.flagPreseed { - cfg.lookupTimeout = DefaultAutoLookupTimeout - } - - if c.flagPreseed { - return cfg.RunPreseed(cmd) + cfg.sessionTimeout = DefaultSessionTimeout + if c.flagSessionTimeout > 0 { + cfg.sessionTimeout = time.Duration(c.flagSessionTimeout) * time.Second } cloudApp, err := microcluster.App(microcluster.Args{StateDir: c.common.FlagMicroCloudDir}) @@ -78,7 +71,7 @@ func (c *cmdAdd) Run(cmd *cobra.Command, args []string) error { cfg.name = status.Name cfg.address = status.Address.Addr().String() - err = cfg.askAddress() + err = cfg.askAddress("") if err != nil { return err } @@ -99,7 +92,9 @@ func (c *cmdAdd) Run(cmd *cobra.Command, args []string) error { return err } - err = cfg.lookupPeers(s, nil) + err = cfg.runSession(context.Background(), s, types.SessionInitiating, cfg.sessionTimeout, func(gw *cloudClient.WebsocketGateway) error { + return cfg.initiatingSession(gw, s, services, "", nil) + }) if err != nil { return err } diff --git a/cmd/microcloud/ask.go b/cmd/microcloud/ask.go index 2aa92cd66..c90a76c08 100644 --- a/cmd/microcloud/ask.go +++ b/cmd/microcloud/ask.go @@ -2,10 +2,14 @@ package main import ( "context" + "crypto/x509" + "encoding/json" "fmt" "net" + "slices" "sort" "strings" + "time" "github.com/canonical/lxd/client" "github.com/canonical/lxd/shared" @@ -17,6 +21,7 @@ import ( cephTypes "github.com/canonical/microceph/microceph/api/types" "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" "github.com/canonical/microcloud/microcloud/mdns" "github.com/canonical/microcloud/microcloud/service" ) @@ -128,7 +133,7 @@ func (c *initConfig) askMissingServices(services []types.ServiceType, stateDirs return services, nil } -func (c *initConfig) askAddress() error { +func (c *initConfig) askAddress(filterAddress string) error { info, err := mdns.GetNetworkInfo() if err != nil { return fmt.Errorf("Failed to find network interfaces: %w", err) @@ -140,11 +145,21 @@ func (c *initConfig) askAddress() error { return fmt.Errorf("Found no valid network interfaces") } + filterIp := net.ParseIP(filterAddress) + if filterAddress != "" && filterIp == nil { + return fmt.Errorf("Invalid filter address %q", filterAddress) + } + listenAddr = info[0].Address if !c.autoSetup && len(info) > 1 { data := make([][]string, 0, len(info)) - for _, net := range info { - data = append(data, []string{net.Address, net.Interface.Name}) + for _, network := range info { + // Filter out addresses which are not in the same network as the filter address. + if filterAddress != "" && !network.Subnet.Contains(filterIp) { + continue + } + + data = append(data, []string{network.Address, network.Interface.Name}) } table := NewSelectableTable([]string{"ADDRESS", "IFACE"}, data) @@ -192,17 +207,6 @@ func (c *initConfig) askAddress() error { return fmt.Errorf("Cloud not find valid subnet for address %q", listenAddr) } - if !c.autoSetup && c.setupMany { - filter, err := c.asker.AskBool(fmt.Sprintf("Limit search for other MicroCloud servers to %s? (yes/no) [default=yes]: ", subnet.String()), "yes") - if err != nil { - return err - } - - if !filter { - subnet = nil - } - } - c.address = listenAddr c.lookupIface = iface c.lookupSubnet = subnet @@ -304,7 +308,7 @@ func (c *initConfig) askLocalPool(sh *service.Handler) error { lxd := sh.Services[types.LXD].(*service.LXDService) toWipe := map[string]string{} - wipeable, err := lxd.HasExtension(context.Background(), lxd.Name(), lxd.Address(), "", "storage_pool_source_wipe") + wipeable, err := lxd.HasExtension(context.Background(), lxd.Name(), lxd.Address(), nil, "storage_pool_source_wipe") if err != nil { return fmt.Errorf("Failed to check for source.wipe extension: %w", err) } @@ -493,13 +497,13 @@ func getTargetCephNetworks(sh *service.Handler, s *InitSystem) (internalCephNetw } var cephAddr string - var cephAuthSecret string + var cephCert *x509.Certificate if s != nil && s.ServerInfo.Name != sh.Name { cephAddr = s.ServerInfo.Address - cephAuthSecret = s.ServerInfo.AuthSecret + cephCert = s.ServerInfo.Certificate } - remoteCephConfigs, err := microCephService.ClusterConfig(context.Background(), cephAddr, cephAuthSecret) + remoteCephConfigs, err := microCephService.ClusterConfig(context.Background(), cephAddr, cephCert) if err != nil { return nil, err } @@ -732,7 +736,7 @@ func (c *initConfig) askRemotePool(sh *service.Handler) error { if !useJoinConfigRemoteFS { lxd := sh.Services[types.LXD].(*service.LXDService) ext := "storage_cephfs_create_missing" - hasCephFS, err := lxd.HasExtension(context.Background(), lxd.Name(), lxd.Address(), "", ext) + hasCephFS, err := lxd.HasExtension(context.Background(), lxd.Name(), lxd.Address(), nil, ext) if err != nil { return fmt.Errorf("Failed to check for the %q LXD API extension: %w", ext, err) } @@ -1375,3 +1379,242 @@ func (c *initConfig) askClustered(s *service.Handler, expectedServices []types.S return nil } + +func (c *initConfig) shortFingerprint(fingerprint string) (string, error) { + if len(fingerprint) < 12 { + return "", fmt.Errorf("Fingerprint is not long enough") + } + + return fingerprint[0:12], nil +} + +func (c *initConfig) askPassphrase(s *service.Handler) (string, error) { + validator := func(password string) error { + if password == "" { + return fmt.Errorf("Passphrase cannot be empty") + } + + passwordSplit := strings.Split(password, " ") + if len(passwordSplit) != 4 { + return fmt.Errorf("Passphrase has to contain exactly four elements") + } + + return nil + } + + cloud := s.Services[types.MicroCloud].(*service.CloudService) + cert, err := cloud.ServerCert() + if err != nil { + return "", err + } + + fingerprint, err := c.shortFingerprint(cert.Fingerprint()) + if err != nil { + return "", fmt.Errorf("Failed to shorten fingerprint: %w", err) + } + + fmt.Printf("Verify the fingerprint %q is displayed on the other system.\n", fingerprint) + + msg := "Specify the passphrase for joining the system: " + password, err := c.asker.AskString(msg, "", validator) + if err != nil { + return "", err + } + + return password, nil +} + +func (c *initConfig) askJoinIntents(gw *cloudClient.WebsocketGateway, expectedSystems []string) ([]types.SessionJoinPost, error) { + header := []string{"NAME", "ADDRESS", "FINGERPRINT"} + var table *SelectableTable + + rendered := make(chan error) + joinIntents := make(map[string]types.SessionJoinPost) + + renderCtx, renderCancel := context.WithCancel(gw.Context()) + defer renderCancel() + + renderIntentsInteractive := func() { + for { + select { + case bytes := <-gw.Receive(): + session := types.Session{} + err := json.Unmarshal(bytes, &session) + if err != nil { + logger.Error("Failed to read join intent", logger.Ctx{"err": err}) + break + } + + joinIntents[session.Intent.Name] = session.Intent + + remoteCert, err := shared.ParseCert([]byte(session.Intent.Certificate)) + if err != nil { + logger.Error("Failed to parse certificate", logger.Ctx{"err": err}) + } + + fingerprint, err := c.shortFingerprint(shared.CertFingerprint(remoteCert)) + if err != nil { + logger.Error("Failed to shorten fingerprint", logger.Ctx{"err": err}) + } + + if table == nil { + table = NewSelectableTable(header, [][]string{{session.Intent.Name, session.Intent.Address, fingerprint}}) + err := table.Render(table.rows) + if err != nil { + logger.Error("Failed to render table", logger.Ctx{"err": err}) + } + + rendered <- nil + } else { + table.Update([]string{session.Intent.Name, session.Intent.Address, fingerprint}) + } + + case <-renderCtx.Done(): + return + } + } + } + + renderIntents := func() { + for { + select { + case bytes := <-gw.Receive(): + session := types.Session{} + err := json.Unmarshal(bytes, &session) + if err != nil { + logger.Error("Failed to read join intent", logger.Ctx{"err": err}) + break + } + + // Skip systems which aren't listed in the preseed. + if !shared.ValueInSlice(session.Intent.Name, expectedSystems) { + continue + } + + joinIntents[session.Intent.Name] = session.Intent + if len(joinIntents) == len(expectedSystems) { + renderCancel() + } + + case <-renderCtx.Done(): + return + } + } + } + + var systems []types.SessionJoinPost + if !c.autoSetup { + go renderIntentsInteractive() + + // Wait until the table got rendered. + // This is important otherwise the table might not be selectable + // as it's being built in a go routine. + select { + case <-rendered: + case <-gw.Context().Done(): + return nil, fmt.Errorf("Failed to render join intents: %w", context.Cause(gw.Context())) + } + + var answers []string + retry := false + err := c.askRetry("Retry selecting systems?", func() error { + defer func() { + retry = true + }() + + fmt.Println("Select which systems you want to join:") + + if retry { + err := table.Render(table.rows) + if err != nil { + return fmt.Errorf("Failed to render table: %w", err) + } + } + + var err error + answers, err = table.GetSelections() + if err != nil { + return fmt.Errorf("Failed to get join intent selections: %w", err) + } + + if len(answers) == 0 { + return fmt.Errorf("No system selected") + } + + return nil + }) + if err != nil { + return nil, err + } + + for _, answer := range answers { + name := table.SelectionValue(answer, "NAME") + for intentName, intent := range joinIntents { + if intentName == name { + systems = append(systems, intent) + } + } + } + } else { + go renderIntents() + + select { + case <-time.After(c.lookupTimeout): + case <-renderCtx.Done(): + } + + for _, name := range expectedSystems { + _, ok := joinIntents[name] + if !ok { + return nil, fmt.Errorf("System %q hasn't reached out", name) + } + } + + for _, intent := range joinIntents { + systems = append(systems, intent) + } + } + + return systems, nil +} + +func (c *initConfig) askJoinConfirmation(gw *cloudClient.WebsocketGateway, services []types.ServiceType) error { + session := types.Session{} + err := gw.ReceiveWithContext(gw.Context(), &session) + if err != nil { + return fmt.Errorf("Failed to read join confirmation: %w", err) + } + + if !c.autoSetup { + fmt.Printf("\n Received confirmation from system %q\n\n", session.Intent.Name) + fmt.Println("Do not exit out to keep the session alive.") + fmt.Printf("Complete the remaining configuration on %q ...\n", session.Intent.Name) + } + + err = gw.ReceiveWithContext(gw.Context(), &session) + if err != nil { + return fmt.Errorf("Failed waiting during join: %w", err) + } + + if session.Error != "" { + return fmt.Errorf("Failed to join system: %s", session.Error) + } + + fmt.Println("Successfully joined the MicroCloud cluster and closing the session.") + + // Filter out MicroCloud. + services = slices.DeleteFunc(services, func(t types.ServiceType) bool { + return t == types.MicroCloud + }) + + if len(services) > 0 { + var servicesStr []string + for _, service := range services { + servicesStr = append(servicesStr, string(service)) + } + + fmt.Printf("Commencing cluster join of the remaining services (%s)\n", strings.Join(servicesStr, ", ")) + } + + return nil +} diff --git a/cmd/microcloud/join.go b/cmd/microcloud/join.go new file mode 100644 index 000000000..2a91b6c2b --- /dev/null +++ b/cmd/microcloud/join.go @@ -0,0 +1,121 @@ +package main + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/canonical/microcluster/v2/microcluster" + "github.com/spf13/cobra" + + "github.com/canonical/microcloud/microcloud/api" + "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" + "github.com/canonical/microcloud/microcloud/service" +) + +type cmdJoin struct { + common *CmdControl + + flagAutoSetup bool + flagWipe bool + flagLookupTimeout int64 + flagSessionTimeout int64 + flagInitiatorAddress string +} + +func (c *cmdJoin) Command() *cobra.Command { + cmd := &cobra.Command{ + Use: "join", + Short: "Join an existing MicroCloud cluster", + RunE: c.Run, + } + + cmd.Flags().BoolVar(&c.flagAutoSetup, "auto", false, "Automatic setup with default configuration") + cmd.Flags().BoolVar(&c.flagWipe, "wipe", false, "Wipe disks to add to MicroCeph") + cmd.Flags().Int64Var(&c.flagLookupTimeout, "lookup-timeout", 0, "Amount of seconds to wait when finding systems on the network. Defaults: 60s") + cmd.Flags().Int64Var(&c.flagSessionTimeout, "session-timeout", 0, "Amount of seconds to wait for the trust establishment session. Defaults: 10m") + cmd.Flags().StringVar(&c.flagInitiatorAddress, "initiator-address", "", "Address of the trust establishment session's initiator") + + return cmd +} + +func (c *cmdJoin) Run(cmd *cobra.Command, args []string) error { + if len(args) != 0 { + return cmd.Help() + } + + cfg := initConfig{ + bootstrap: false, + autoSetup: c.flagAutoSetup, + wipeAllDisks: c.flagWipe, + common: c.common, + asker: &c.common.asker, + systems: map[string]InitSystem{}, + state: map[string]service.SystemInformation{}, + } + + cfg.lookupTimeout = DefaultLookupTimeout + if c.flagLookupTimeout > 0 { + cfg.lookupTimeout = time.Duration(c.flagLookupTimeout) * time.Second + } + + cfg.sessionTimeout = DefaultSessionTimeout + if c.flagSessionTimeout > 0 { + cfg.sessionTimeout = time.Duration(c.flagSessionTimeout) * time.Second + } + + cloudApp, err := microcluster.App(microcluster.Args{StateDir: c.common.FlagMicroCloudDir}) + if err != nil { + return err + } + + status, err := cloudApp.Status(context.Background()) + if err != nil { + return fmt.Errorf("Failed to get MicroCloud status: %w", err) + } + + if status.Ready { + return fmt.Errorf("MicroCloud is already initialized, run 'microcloud add' instead") + } + + err = cfg.askAddress(c.flagInitiatorAddress) + if err != nil { + return err + } + + cfg.name, err = os.Hostname() + if err != nil { + return fmt.Errorf("Failed to retrieve system hostname: %w", err) + } + + services := []types.ServiceType{types.MicroCloud, types.LXD} + optionalServices := map[types.ServiceType]string{ + types.MicroCeph: api.MicroCephDir, + types.MicroOVN: api.MicroOVNDir, + } + + // Enable auto setup to skip service related questions. + cfg.autoSetup = true + services, err = cfg.askMissingServices(services, optionalServices) + if err != nil { + return err + } + + cfg.autoSetup = false + + s, err := service.NewHandler(cfg.name, cfg.address, c.common.FlagMicroCloudDir, services...) + if err != nil { + return err + } + + passphrase, err := cfg.askPassphrase(s) + if err != nil { + return err + } + + return cfg.runSession(context.Background(), s, types.SessionJoining, cfg.sessionTimeout, func(gw *cloudClient.WebsocketGateway) error { + return cfg.joiningSession(gw, s, services, c.flagInitiatorAddress, passphrase) + }) +} diff --git a/cmd/microcloud/main.go b/cmd/microcloud/main.go index ee67def39..4fb27981c 100644 --- a/cmd/microcloud/main.go +++ b/cmd/microcloud/main.go @@ -78,6 +78,12 @@ EOF`) var cmdAdd = cmdAdd{common: &commonCmd} app.AddCommand(cmdAdd.Command()) + var cmdJoin = cmdJoin{common: &commonCmd} + app.AddCommand(cmdJoin.Command()) + + var cmdPreseed = cmdPreseed{common: &commonCmd} + app.AddCommand(cmdPreseed.Command()) + var cmdRemove = cmdRemove{common: &commonCmd} app.AddCommand(cmdRemove.Command()) diff --git a/cmd/microcloud/main_init.go b/cmd/microcloud/main_init.go index af062e2c4..89acad18a 100644 --- a/cmd/microcloud/main_init.go +++ b/cmd/microcloud/main_init.go @@ -2,6 +2,7 @@ package main import ( "context" + "crypto/x509" "fmt" "net" "os" @@ -24,6 +25,7 @@ import ( "github.com/canonical/microcloud/microcloud/api" "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" "github.com/canonical/microcloud/microcloud/mdns" "github.com/canonical/microcloud/microcloud/service" ) @@ -37,6 +39,12 @@ const DefaultLookupTimeout time.Duration = time.Minute // RecommendedOSDHosts is the minimum number of OSD hosts recommended for a new cluster for fault-tolerance. const RecommendedOSDHosts = 3 +// DefaultAutoSessionTimeout is the default time limit for an automatic trust establishment session. +const DefaultAutoSessionTimeout time.Duration = 10 * time.Minute + +// DefaultSessionTimeout is the default time limit for the trust establishment session. +const DefaultSessionTimeout time.Duration = 60 * time.Minute + // InitSystem represents the configuration passed to individual systems that join via the Handler. type InitSystem struct { // ServerInfo contains the data reported by mDNS about this system. @@ -90,6 +98,9 @@ type initConfig struct { // lookupTimeout is the duration to wait for mDNS records to appear during system lookup. lookupTimeout time.Duration + // sessionTimeout is the duration to wait for the trust establishment session to complete. + sessionTimeout time.Duration + // wipeAllDisks indicates whether all disks should be wiped, or if the user should be prompted. wipeAllDisks bool @@ -112,26 +123,24 @@ type initConfig struct { type cmdInit struct { common *CmdControl - flagLookupTimeout int64 + flagSessionTimeout int64 flagWipeAllDisks bool flagEncryptAllDisks bool flagAddress string - flagPreseed bool } func (c *cmdInit) Command() *cobra.Command { cmd := &cobra.Command{ Use: "init", Aliases: []string{"bootstrap"}, - Short: "Initialize the network endpoint and create a new cluster", + Short: "Initialize MicroCloud and create a new cluster", RunE: c.Run, } cmd.Flags().BoolVar(&c.flagWipeAllDisks, "wipe", false, "Wipe disks to add to MicroCeph") cmd.Flags().BoolVar(&c.flagEncryptAllDisks, "encrypt", false, "Encrypt disks to add to MicroCeph") cmd.Flags().StringVar(&c.flagAddress, "address", "", "Address to use for MicroCloud") - cmd.Flags().BoolVar(&c.flagPreseed, "preseed", false, "Expect Preseed YAML for configuring MicroCloud in stdin") - cmd.Flags().Int64Var(&c.flagLookupTimeout, "lookup-timeout", 0, "Amount of seconds to wait for systems to show up. Defaults: 60s for interactive, 5s for automatic and preseed") + cmd.Flags().Int64Var(&c.flagSessionTimeout, "session-timeout", 0, "Amount of seconds to wait for the trust establishment session. Defaults: 60m") return cmd } @@ -153,15 +162,9 @@ func (c *cmdInit) Run(cmd *cobra.Command, args []string) error { state: map[string]service.SystemInformation{}, } - cfg.lookupTimeout = DefaultLookupTimeout - if c.flagLookupTimeout > 0 { - cfg.lookupTimeout = time.Duration(c.flagLookupTimeout) * time.Second - } else if c.flagPreseed { - cfg.lookupTimeout = DefaultAutoLookupTimeout - } - - if c.flagPreseed { - return cfg.RunPreseed(cmd) + cfg.sessionTimeout = DefaultSessionTimeout + if c.flagSessionTimeout > 0 { + cfg.sessionTimeout = time.Duration(c.flagSessionTimeout) * time.Second } return cfg.RunInteractive(cmd, args) @@ -185,7 +188,7 @@ func (c *initConfig) RunInteractive(cmd *cobra.Command, args []string) error { return err } - err = c.askAddress() + err = c.askAddress("") if err != nil { return err } @@ -218,9 +221,13 @@ func (c *initConfig) RunInteractive(cmd *cobra.Command, args []string) error { return err } - err = c.lookupPeers(s, nil) - if err != nil { - return err + if c.setupMany { + err = c.runSession(context.Background(), s, types.SessionInitiating, c.sessionTimeout, func(gw *cloudClient.WebsocketGateway) error { + return c.initiatingSession(gw, s, services, "", nil) + }) + if err != nil { + return err + } } state, err := s.CollectSystemInformation(context.Background(), mdns.ServerInfo{Name: c.name, Address: c.address, Services: services}) @@ -285,179 +292,12 @@ func (c *initConfig) RunInteractive(cmd *cobra.Command, args []string) error { return nil } -// lookupPeers attempts to find eligible systems over mDNS, optionally limiting lookup to the given subnet if not nil. -// Found systems will be progressively added to a table, and the user selection is added to the `systems` map. -// -// - If `autoSetup` is true, all systems found in the first 5s will be recorded, and no other input is required. -// - `expectedSystems` is a list of expected hostnames. If given, the behaviour is similar to `autoSetup`, -// except it will wait up to a minute for exclusively these systems to be recorded. -func (c *initConfig) lookupPeers(s *service.Handler, expectedSystems []string) error { - if !c.setupMany { - return nil - } - - header := []string{"NAME", "IFACE", "ADDR"} - var table *SelectableTable - var answers []string - - autoSetup := c.autoSetup - if len(expectedSystems) > 0 { - autoSetup = true - } - - tableCh := make(chan error) - selectionCh := make(chan error) - if !autoSetup { - go func() { - err := <-tableCh - if err != nil { - selectionCh <- err - return - } - - answers, err = table.GetSelections() - selectionCh <- err - }() - } - - ctx, cancel := context.WithTimeout(context.Background(), c.lookupTimeout) - defer cancel() - - expectedSystemsMap := make(map[string]bool, len(expectedSystems)) - for _, system := range expectedSystems { - expectedSystemsMap[system] = true - } - - fmt.Println("Scanning for eligible servers ...") - totalPeers := map[string]mdns.ServerInfo{} - done := false - for !done { - select { - case <-ctx.Done(): - done = true - case err := <-selectionCh: - if err != nil { - return err - } - - done = true - default: - // If we have found all expected systems, the map will be empty and we can return right away. - if len(expectedSystemsMap) == 0 && len(expectedSystems) > 0 { - done = true - - break - } - - peers, err := mdns.LookupPeers(ctx, c.lookupIface, mdns.Version, s.Name) - if err != nil { - return err - } - - skipPeers := map[string]bool{} - for key, info := range peers { - _, ok := totalPeers[key] - if !ok { - serviceMap := make(map[types.ServiceType]bool, len(info.Services)) - for _, service := range info.Services { - serviceMap[service] = true - } - - // Skip any peers that are missing our services. - for service := range s.Services { - if !serviceMap[service] { - skipPeers[info.Name] = true - logger.Infof("Skipping peer %q due to missing services (%s)", info.Name, string(service)) - break - } - } - - // If given a subnet, skip any peers that are broadcasting from a different subnet. - if c.lookupSubnet != nil && !c.lookupSubnet.Contains(net.ParseIP(info.Address)) { - continue - } - - if !skipPeers[info.Name] { - totalPeers[key] = info - - if len(expectedSystems) > 0 { - if expectedSystemsMap[info.Name] { - delete(expectedSystemsMap, info.Name) - } else { - delete(totalPeers, key) - } - } - - if autoSetup { - continue - } - - if len(totalPeers) == 1 { - table = NewSelectableTable(header, [][]string{{info.Name, info.Interface, info.Address}}) - err := table.Render(table.rows) - if err != nil { - return err - } - - time.Sleep(100 * time.Millisecond) - tableCh <- nil - } else { - table.Update([]string{info.Name, info.Interface, info.Address}) - } - } - } - } - } - } - - if len(totalPeers) == 0 { - return fmt.Errorf("Found no available systems") - } - - for _, answer := range answers { - peer := table.SelectionValue(answer, "NAME") - addr := table.SelectionValue(answer, "ADDR") - iface := table.SelectionValue(answer, "IFACE") - for _, info := range totalPeers { - if info.Name == peer && info.Address == addr && info.Interface == iface { - c.systems[peer] = InitSystem{ - ServerInfo: info, - } - } - } - } - - if autoSetup { - for _, info := range totalPeers { - c.systems[info.Name] = InitSystem{ - ServerInfo: info, - } - } - - if len(expectedSystems) > 0 { - return nil - } - - // Add a space between the CLI and the response. - fmt.Println("") - } - - for _, info := range c.systems { - fmt.Printf(" Selected %q at %q\n", info.ServerInfo.Name, info.ServerInfo.Address) - } - - // Add a space between the CLI and the response. - fmt.Println("") - - return nil -} - // waitForJoin requests a system to join each service's respective cluster, // and then waits for the request to either complete or time out. // If the request was successful, it additionally waits until the cluster appears in the database. -func waitForJoin(sh *service.Handler, clusterSizes map[types.ServiceType]int, secret string, peer string, cfg types.ServicesPut) error { +func waitForJoin(sh *service.Handler, clusterSizes map[types.ServiceType]int, peer string, cert *x509.Certificate, cfg types.ServicesPut) error { cloud := sh.Services[types.MicroCloud].(*service.CloudService) - err := cloud.RequestJoin(context.Background(), secret, peer, cfg) + err := cloud.RequestJoin(context.Background(), peer, cert, cfg) if err != nil { return fmt.Errorf("System %q failed to join the cluster: %w", peer, err) } @@ -491,8 +331,6 @@ func waitForJoin(sh *service.Handler, clusterSizes map[types.ServiceType]int, se } } - fmt.Printf(" Peer %q has joined the cluster\n", peer) - return nil } @@ -535,10 +373,36 @@ func (c *initConfig) addPeers(sh *service.Handler) (revert.Hook, error) { clusterSize[serviceType] = len(clusterMembers) } + // First let each joiner join the MicroCloud cluster. + // This ensures that the tokens for existing services can be issued on the remote system already using mTLS. + for peer := range c.systems { + // Only join other peers which aren't yet part of MicroCloud. + if peer != sh.Name && existingSystems[types.MicroCloud][peer] == "" { + token, err := sh.Services[types.MicroCloud].IssueToken(context.Background(), peer) + if err != nil { + return nil, fmt.Errorf("Failed to issue MicroCloud token for peer %q: %w", peer, err) + } + + cfg := joinConfig[peer] + cfg.Tokens = append(cfg.Tokens, types.ServiceToken{Service: types.MicroCloud, JoinToken: token}) + + cert := c.systems[peer].ServerInfo.Certificate + err = waitForJoin(sh, clusterSize, peer, cert, cfg) + if err != nil { + return nil, err + } + } + } + // Concurrently issue a token for each joiner. for peer := range c.systems { mut := sync.Mutex{} err := sh.RunConcurrent("", "", func(s service.Service) error { + // Skip MicroCloud as the cluster is already formed. + if s.Type() == types.MicroCloud { + return nil + } + // Only issue a token if the system isn't already part of that cluster. if existingSystems[s.Type()][peer] == "" { clusteredSystem := c.systems[initializedServices[s.Type()]] @@ -555,7 +419,7 @@ func (c *initConfig) addPeers(sh *service.Handler) (revert.Hook, error) { } } else { cloud := sh.Services[types.MicroCloud].(*service.CloudService) - token, err = cloud.RemoteIssueToken(context.Background(), clusteredSystem.ServerInfo.Address, clusteredSystem.ServerInfo.AuthSecret, peer, s.Type()) + token, err = cloud.RemoteIssueToken(context.Background(), clusteredSystem.ServerInfo.Address, peer, s.Type()) if err != nil { return err } @@ -563,7 +427,7 @@ func (c *initConfig) addPeers(sh *service.Handler) (revert.Hook, error) { mut.Lock() reverter.Add(func() { - err = s.DeleteToken(context.Background(), peer, clusteredSystem.ServerInfo.Address, clusteredSystem.ServerInfo.AuthSecret) + err = s.DeleteToken(context.Background(), peer, clusteredSystem.ServerInfo.Address) if err != nil { logger.Error("Failed to clean up join token", logger.Ctx{"service": s.Type(), "error": err}) } @@ -587,10 +451,12 @@ func (c *initConfig) addPeers(sh *service.Handler) (revert.Hook, error) { // If the local node needs to join an existing cluster, do it first so we can proceed as normal. if len(joinConfig[sh.Name].Tokens) > 0 { cfg := joinConfig[sh.Name] - err := waitForJoin(sh, clusterSize, "", sh.Name, cfg) + err := waitForJoin(sh, clusterSize, sh.Name, nil, cfg) if err != nil { return nil, err } + + fmt.Printf(" Peer %q has joined the cluster\n", sh.Name) } for peer, cfg := range joinConfig { @@ -599,10 +465,12 @@ func (c *initConfig) addPeers(sh *service.Handler) (revert.Hook, error) { } logger.Debug("Initiating sequential request for cluster join", logger.Ctx{"peer": peer}) - err := waitForJoin(sh, clusterSize, c.systems[peer].ServerInfo.AuthSecret, peer, cfg) + err := waitForJoin(sh, clusterSize, peer, nil, cfg) if err != nil { return nil, err } + + fmt.Printf(" Peer %q has joined the cluster\n", peer) } cleanup := reverter.Clone().Fail @@ -752,7 +620,7 @@ func (c *initConfig) setupCluster(s *service.Handler) error { defer reverter.Fail() lxd := s.Services[types.LXD].(*service.LXDService) - lxdClient, err := lxd.Client(context.Background(), "") + lxdClient, err := lxd.Client(context.Background()) if err != nil { return err } @@ -870,7 +738,7 @@ func (c *initConfig) setupCluster(s *service.Handler) error { var client *client.Client for _, disk := range c.systems[name].MicroCephDisks { if client == nil { - client, err = s.Services[types.MicroCeph].(*service.CephService).Client(name, c.systems[name].ServerInfo.AuthSecret) + client, err = s.Services[types.MicroCeph].(*service.CephService).Client(name) if err != nil { return err } @@ -884,7 +752,7 @@ func (c *initConfig) setupCluster(s *service.Handler) error { } } - c, err := s.Services[types.MicroCeph].(*service.CephService).Client(s.Name, "") + c, err := s.Services[types.MicroCeph].(*service.CephService).Client(s.Name) if err != nil { return err } @@ -972,7 +840,7 @@ func (c *initConfig) setupCluster(s *service.Handler) error { } system := c.systems[s.Name] - lxdClient, err := lxd.Client(context.Background(), system.ServerInfo.AuthSecret) + lxdClient, err := lxd.Client(context.Background()) if err != nil { logger.Error("Failed to get LXD client for cleanup", logger.Ctx{"error": err}) @@ -990,7 +858,7 @@ func (c *initConfig) setupCluster(s *service.Handler) error { // Create preliminary networks & storage pools on each target. for name, system := range c.systems { - lxdClient, err := lxd.Client(context.Background(), system.ServerInfo.AuthSecret) + lxdClient, err := lxd.Client(context.Background()) if err != nil { return err } @@ -1058,7 +926,7 @@ func (c *initConfig) setupCluster(s *service.Handler) error { // With storage pools set up, add some volumes for images & backups. for name, system := range c.systems { - lxdClient, err := lxd.Client(context.Background(), system.ServerInfo.AuthSecret) + lxdClient, err := lxd.Client(context.Background()) if err != nil { return err } diff --git a/cmd/microcloud/main_init_preseed.go b/cmd/microcloud/preseed.go similarity index 79% rename from cmd/microcloud/main_init_preseed.go rename to cmd/microcloud/preseed.go index 27413a658..11605dd70 100644 --- a/cmd/microcloud/main_init_preseed.go +++ b/cmd/microcloud/preseed.go @@ -8,17 +8,21 @@ import ( "os" "strconv" "strings" + "time" + "github.com/canonical/lxd/shared" lxdAPI "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/filter" "github.com/canonical/lxd/shared/units" "github.com/canonical/lxd/shared/validate" cephTypes "github.com/canonical/microceph/microceph/api/types" + "github.com/canonical/microcluster/v2/microcluster" "github.com/spf13/cobra" "gopkg.in/yaml.v3" "github.com/canonical/microcloud/microcloud/api" "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" "github.com/canonical/microcloud/microcloud/mdns" "github.com/canonical/microcloud/microcloud/service" ) @@ -26,8 +30,12 @@ import ( // Preseed represents the structure of the supported preseed yaml. type Preseed struct { LookupSubnet string `yaml:"lookup_subnet"` - LookupInterface string `yaml:"lookup_interface"` + LookupTimeout int64 `yaml:"lookup_timeout"` + SessionPassphrase string `yaml:"session_passphrase"` + SessionTimeout int64 `yaml:"session_timeout"` ReuseExistingClusters bool `yaml:"reuse_existing_clusters"` + Initiator string `yaml:"initiator"` + InitiatorAddress string `yaml:"initiator_address"` Systems []System `yaml:"systems"` OVN InitNetwork `yaml:"ovn"` Ceph CephOptions `yaml:"ceph"` @@ -37,6 +45,7 @@ type Preseed struct { // System represents the structure of the systems we expect to find in the preseed yaml. type System struct { Name string `yaml:"name"` + Address string `yaml:"address"` UplinkInterface string `yaml:"ovn_uplink_interface"` UnderlayIP string `yaml:"ovn_underlay_ip"` Storage InitStorage `yaml:"storage"` @@ -101,6 +110,34 @@ func DiskOperatorSet() filter.OperatorSet { } } +type cmdPreseed struct { + common *CmdControl +} + +func (c *cmdPreseed) Command() *cobra.Command { + cmd := &cobra.Command{ + Use: "preseed", + Short: "Initialize and extend a MicroCloud cluster unattended", + RunE: c.Run, + } + + return cmd +} + +func (c *cmdPreseed) Run(cmd *cobra.Command, args []string) error { + if len(args) != 0 { + return cmd.Help() + } + + cfg := initConfig{ + common: c.common, + systems: map[string]InitSystem{}, + state: map[string]service.SystemInformation{}, + } + + return cfg.RunPreseed(cmd) +} + // RunPreseed initializes MicroCloud from a preseed yaml filepath input. func (c *initConfig) RunPreseed(cmd *cobra.Command) error { c.autoSetup = true @@ -116,29 +153,57 @@ func (c *initConfig) RunPreseed(cmd *cobra.Command) error { return fmt.Errorf("Failed to parse the preseed yaml: %w", err) } - hostname, err := os.Hostname() + c.bootstrap = config.isBootstrap() + + c.lookupTimeout = DefaultLookupTimeout + if config.LookupTimeout > 0 { + c.lookupTimeout = time.Duration(config.LookupTimeout) * time.Second + } + + c.sessionTimeout = DefaultSessionTimeout + if config.SessionTimeout > 0 { + c.sessionTimeout = time.Duration(config.SessionTimeout) * time.Second + } + + cloudApp, err := microcluster.App(microcluster.Args{StateDir: c.common.FlagMicroCloudDir}) if err != nil { return err } - err = config.validate(hostname, c.bootstrap) + status, err := cloudApp.Status(context.Background()) if err != nil { - return err + return fmt.Errorf("Failed to get MicroCloud status: %w", err) } - _, lookupSubnet, err := net.ParseCIDR(config.LookupSubnet) + if status.Ready && c.bootstrap { + return fmt.Errorf("MicroCloud is already initialized") + } + + hostname, err := os.Hostname() if err != nil { return err } - lookupIface, err := net.InterfaceByName(config.LookupInterface) + err = config.validate(hostname, c.bootstrap) if err != nil { return err } - listenIP, err := addrInSubnet(lookupIface, *lookupSubnet) - if err != nil { - return fmt.Errorf("Failed to determine MicroCloud listen address: %w", err) + var listenAddr string + if status.Ready { + // If the cluster is already bootstrapped use its address. + listenAddr = status.Address.Addr().String() + } else { + // In case of bootstrap use the address from the preseed file. + listenAddr, err = config.address(hostname) + if err != nil { + return err + } + } + + listenIP := net.ParseIP(listenAddr) + if listenIP == nil { + return fmt.Errorf("Invalid MicroCloud listen address %q", listenAddr) } // Build the service handler. @@ -160,11 +225,27 @@ func (c *initConfig) RunPreseed(cmd *cobra.Command) error { return err } + initiator := config.isInitiator(c.name, c.address) + + if !status.Ready && !c.bootstrap && initiator { + return fmt.Errorf("MicroCloud isn't yet initialized and cannot be the initiator") + } + + if status.Ready && !initiator { + return fmt.Errorf("MicroCloud is already initialized and can only be the initiator") + } + systems, err := config.Parse(s, c) if err != nil { return err } + // Exit in case of join. + // Only the initiator has to continue. + if systems == nil { + return nil + } + if !c.bootstrap { peers, err := s.Services[types.MicroCloud].ClusterMembers(context.Background()) if err != nil { @@ -230,11 +311,40 @@ func (p *Preseed) validate(name string, bootstrap bool) error { return fmt.Errorf("No systems given") } + if p.Initiator == "" && p.InitiatorAddress == "" { + return fmt.Errorf("Missing initiator's name or address") + } + + if p.Initiator != "" && p.InitiatorAddress != "" { + return fmt.Errorf("Cannot provide both the initiator's name and address") + } + + if p.InitiatorAddress != "" && p.LookupSubnet != "" { + return fmt.Errorf("Cannot provide both the initiator's address and lookup subnet") + } + + if len(p.Systems) > 1 && p.SessionPassphrase == "" { + return fmt.Errorf("Missing session passphrase") + } + + systemNames := make([]string, 0, len(p.Systems)) for _, system := range p.Systems { if system.Name == "" { return fmt.Errorf("Missing system name") } + if system.Address != "" && p.LookupSubnet != "" { + return fmt.Errorf("Cannot provide both the address for system %q and the lookup subnet", system.Name) + } + + if system.Address == "" && p.InitiatorAddress != "" { + return fmt.Errorf("Missing address for system %q when the initiator's address is set", system.Name) + } + + if system.Address != "" && p.InitiatorAddress == "" { + return fmt.Errorf("Missing the initiator's address as system %q has an address", system.Name) + } + if system.Name == name { localInit = true } @@ -259,6 +369,12 @@ func (p *Preseed) validate(name string, bootstrap bool) error { if system.Storage.Local.Path != "" { directLocalCount++ } + + if shared.ValueInSlice(system.Name, systemNames) { + return fmt.Errorf("Duplicate system name %q", system.Name) + } + + systemNames = append(systemNames, system.Name) } if !bootstrap && p.ReuseExistingClusters { @@ -269,10 +385,6 @@ func (p *Preseed) validate(name string, bootstrap bool) error { return fmt.Errorf("Local MicroCloud must be included in the list of systems when initializing") } - if !bootstrap && localInit { - return fmt.Errorf("Local MicroCloud must not be included in the list of systems when adding new members") - } - containsUplinks := false containsLocalStorage := false containsCephStorage := false @@ -291,15 +403,6 @@ func (p *Preseed) validate(name string, bootstrap bool) error { return fmt.Errorf("Some systems are missing local storage disks") } - _, _, err := net.ParseCIDR(p.LookupSubnet) - if err != nil { - return err - } - - if p.LookupInterface == "" { - return fmt.Errorf("Missing interface name for machine lookup") - } - containsCephStorage = directCephCount > 0 || len(p.Storage.Ceph) > 0 usingCephInternalNetwork := p.Ceph.InternalNetwork != "" if !containsCephStorage && usingCephInternalNetwork { @@ -307,7 +410,7 @@ func (p *Preseed) validate(name string, bootstrap bool) error { } if usingCephInternalNetwork { - err = validate.IsNetwork(p.Ceph.InternalNetwork) + err := validate.IsNetwork(p.Ceph.InternalNetwork) if err != nil { return fmt.Errorf("Invalid Ceph internal network subnet: %v", err) } @@ -380,6 +483,71 @@ func (p *Preseed) validate(name string, bootstrap bool) error { return nil } +// isInitiator returns true if the current host is marked as being the initiator. +func (p *Preseed) isInitiator(name string, address string) bool { + if name == p.Initiator && p.Initiator != "" { + return true + } + + if address == p.InitiatorAddress && p.InitiatorAddress != "" { + return true + } + + return false +} + +// isBootstrap returns true if MicroCloud is in bootstrap mode. +// This is the case if either no initiator address is set +// or the initiator address is set to an address of a system +// in the current list of systems in the preseed file. +func (p *Preseed) isBootstrap() bool { + for _, system := range p.Systems { + if system.Name == p.Initiator { + return true + } + + if system.Address != "" && system.Address == p.InitiatorAddress { + return true + } + } + + return false +} + +// address either returns the address specified for the respective system +// or the first address found on the system within the provided lookup subnet. +func (p *Preseed) address(name string) (string, error) { + for _, system := range p.Systems { + if system.Name == name && system.Address != "" { + return system.Address, nil + } + } + + _, lookupSubnet, err := net.ParseCIDR(p.LookupSubnet) + if err != nil { + return "", err + } + + ifaces, err := net.Interfaces() + if err != nil { + return "", err + } + + for _, iface := range ifaces { + addrs, err := iface.Addrs() + if err != nil { + return "", err + } + + ip := addrInSubnet(addrs, *lookupSubnet) + if ip != nil { + return ip.String(), nil + } + } + + return "", fmt.Errorf("Failed to determine MicroCloud address within subnet %q", p.LookupSubnet) +} + // Match matches the devices to the given filter, and returns the result. func (d *DiskFilter) Match(disks []lxdAPI.ResourcesStorageDisk) ([]lxdAPI.ResourcesStorageDisk, error) { if d.Find == "" { @@ -435,41 +603,60 @@ func (p *Preseed) Parse(s *service.Handler, c *initConfig) (map[string]InitSyste expectedSystems = append(expectedSystems, system.Name) } - // Lookup peers until expected systems are found. - var err error - _, c.lookupSubnet, err = net.ParseCIDR(p.LookupSubnet) - if err != nil { - return nil, err - } - ifaces, err := net.Interfaces() if err != nil { return nil, fmt.Errorf("Failed to get network interfaces: %w", err) } for _, iface := range ifaces { - if iface.Name == p.LookupInterface { + addresses, err := iface.Addrs() + if err != nil { + return nil, fmt.Errorf("Failed to get addresses of interface %q: %w", iface.Name, err) + } + + addressStrings := make([]string, 0, len(addresses)) + for _, address := range addresses { + ipNet, ok := address.(*net.IPNet) + if !ok { + continue + } + + addressStrings = append(addressStrings, ipNet.IP.String()) + } + + if shared.ValueInSlice(c.address, addressStrings) { c.lookupIface = &iface break } } if c.lookupIface == nil { - return nil, fmt.Errorf("Failed to find lookup interface %q", p.LookupInterface) + return nil, fmt.Errorf("Failed to find lookup interface for address %q", c.address) + } + + initiator := p.isInitiator(c.name, c.address) + + expectedServices := make([]types.ServiceType, 0, len(s.Services)) + for _, v := range s.Services { + expectedServices = append(expectedServices, v.Type()) + } + + if !initiator { + err = c.runSession(context.Background(), s, types.SessionJoining, c.sessionTimeout, func(gw *cloudClient.WebsocketGateway) error { + return c.joiningSession(gw, s, expectedServices, p.InitiatorAddress, p.SessionPassphrase) + }) + return nil, err } if len(expectedSystems) > 0 { - err = c.lookupPeers(s, expectedSystems) + err = c.runSession(context.Background(), s, types.SessionInitiating, c.sessionTimeout, func(gw *cloudClient.WebsocketGateway) error { + return c.initiatingSession(gw, s, expectedServices, p.SessionPassphrase, expectedSystems) + }) if err != nil { return nil, err } } - expectedServices := make(map[types.ServiceType]service.Service, len(s.Services)) - for k, v := range s.Services { - expectedServices[k] = v - } - for peer, system := range c.systems { existingClusters, err := s.GetExistingClusters(context.Background(), system.ServerInfo) if err != nil { @@ -521,7 +708,8 @@ func (p *Preseed) Parse(s *service.Handler, c *initConfig) (map[string]InitSyste addressedInterfaces := map[string]map[string]service.DedicatedInterface{} for _, system := range c.systems { - uplinkIfaces, dedicatedIfaces, _, err := lxd.GetNetworkInterfaces(context.Background(), system.ServerInfo.Name, system.ServerInfo.Address, system.ServerInfo.AuthSecret) + cert := system.ServerInfo.Certificate + uplinkIfaces, dedicatedIfaces, _, err := lxd.GetNetworkInterfaces(context.Background(), system.ServerInfo.Name, system.ServerInfo.Address, cert) if err != nil { return nil, err } @@ -707,8 +895,10 @@ func (p *Preseed) Parse(s *service.Handler, c *initConfig) (map[string]InitSyste continue } + cert := system.ServerInfo.Certificate + // Fetch system resources from LXD to find disks if we haven't directly set up disks. - allResources[peer], err = s.Services[types.LXD].(*service.LXDService).GetResources(context.Background(), peer, system.ServerInfo.Address, system.ServerInfo.AuthSecret) + allResources[peer], err = s.Services[types.LXD].(*service.LXDService).GetResources(context.Background(), peer, system.ServerInfo.Address, cert) if err != nil { return nil, fmt.Errorf("Failed to get system resources of peer %q: %w", peer, err) } @@ -926,12 +1116,7 @@ func (p *Preseed) Parse(s *service.Handler, c *initConfig) (map[string]InitSyste } // Returns the first IP address assigned to iface that falls within lookupSubnet. -func addrInSubnet(iface *net.Interface, lookupSubnet net.IPNet) (net.IP, error) { - addrs, err := iface.Addrs() - if err != nil { - return nil, err - } - +func addrInSubnet(addrs []net.Addr, lookupSubnet net.IPNet) net.IP { for _, addr := range addrs { ip, _, err := net.ParseCIDR(addr.String()) if err != nil { @@ -939,9 +1124,9 @@ func addrInSubnet(iface *net.Interface, lookupSubnet net.IPNet) (net.IP, error) } if lookupSubnet.Contains(ip) { - return ip, nil + return ip } } - return nil, fmt.Errorf("%q has no addresses in subnet %q", iface.Name, lookupSubnet) + return nil } diff --git a/cmd/microcloud/preseed_test.go b/cmd/microcloud/preseed_test.go index e23f8edbc..09f895dcb 100644 --- a/cmd/microcloud/preseed_test.go +++ b/cmd/microcloud/preseed_test.go @@ -20,231 +20,290 @@ func TestPreseedSuite(t *testing.T) { func (s *preseedSuite) Test_preseedValidateInvalid() { cases := []struct { desc string - subnet string - iface string - systems []System - ovn InitNetwork - storage StorageFilter + preseed Preseed addErr bool err error }{ { - desc: "No systems", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: nil, - ovn: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, - Ceph: []DiskFilter{{Find: "def", FindMin: 0, FindMax: 3, Wipe: false}}, + desc: "No systems", + preseed: Preseed{ + Systems: nil, + OVN: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + Ceph: []DiskFilter{{Find: "def", FindMin: 0, FindMax: 3, Wipe: false}}, + }, }, - addErr: true, err: errors.New("No systems given"), }, { - desc: "Single node preseed", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1", UplinkInterface: "eth0", Storage: InitStorage{}}}, - ovn: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, - Ceph: []DiskFilter{{Find: "def", FindMin: 1, FindMax: 3, Wipe: false}}, + desc: "Duplicate systems", + preseed: Preseed{ + SessionPassphrase: "foo", + Initiator: "n1", + Systems: []System{{Name: "n1"}, {Name: "n1"}}, + }, + addErr: true, + err: errors.New(`Duplicate system name "n1"`), + }, + { + desc: "Single node preseed", + preseed: Preseed{ + Initiator: "n1", + Systems: []System{{Name: "n1", UplinkInterface: "eth0", Storage: InitStorage{}}}, + OVN: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + Ceph: []DiskFilter{{Find: "def", FindMin: 1, FindMax: 3, Wipe: false}}, + }, }, - addErr: false, err: nil, }, { - desc: "Missing lookup subnet", - systems: []System{{Name: "n1"}, {Name: "n2"}}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + desc: "Missing session passphrase", + preseed: Preseed{ + Initiator: "n1", + Systems: []System{{Name: "n1"}, {Name: "n2"}}, }, - addErr: true, - err: errors.New("invalid CIDR address: "), + err: errors.New(`Missing session passphrase`), }, { - desc: "Missing lookup interface", - subnet: "10.0.0.1/24", - systems: []System{{Name: "n1"}, {Name: "n2"}}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + desc: "Missing initiator's name or address", + preseed: Preseed{ + Systems: []System{{Name: "n1"}}, }, - addErr: true, - err: errors.New("Missing interface name for machine lookup"), + err: errors.New(`Missing initiator's name or address`), }, { - desc: "Systems missing name", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "", UplinkInterface: "eth0"}, {Name: "n2", UplinkInterface: "eth0"}}, - ovn: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, - Ceph: []DiskFilter{{Find: "def", FindMin: 0, FindMax: 3, Wipe: false}}, + desc: "Cannot provide both the initiator's name and address", + preseed: Preseed{ + Initiator: "n1", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1"}}, + }, + addErr: true, + err: errors.New(`Cannot provide both the initiator's name and address`), + }, + { + desc: "Cannot provide both the initiator's address and lookup subnet", + preseed: Preseed{ + InitiatorAddress: "1.0.0.1", + LookupSubnet: "1.0.0.0/24", + Systems: []System{{Name: "n1"}}, + }, + addErr: true, + err: errors.New(`Cannot provide both the initiator's address and lookup subnet`), + }, + { + desc: "Cannot provide both system address and lookup subnet", + preseed: Preseed{ + Initiator: "n1", + LookupSubnet: "1.0.0.0/24", + Systems: []System{{Name: "n1", Address: "1.0.0.1"}}, + }, + addErr: true, + err: errors.New(`Cannot provide both the address for system "n1" and the lookup subnet`), + }, + { + desc: "Missing initiator address if one system has an address", + preseed: Preseed{ + Initiator: "n1", + Systems: []System{{Name: "n1", Address: "1.0.0.1"}}, + }, + addErr: true, + err: errors.New(`Missing the initiator's address as system "n1" has an address`), + }, + { + desc: "Missing listen address", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1"}, {Name: "n2", Address: "1.0.0.2"}}, + Storage: StorageFilter{}, + }, + addErr: true, + err: errors.New(`Missing address for system "n1" when the initiator's address is set`), + }, + { + desc: "Systems missing name", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "", UplinkInterface: "eth0", Address: "1.0.0.1"}, {Name: "n2", UplinkInterface: "eth0", Address: "1.0.0.2"}}, + OVN: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + Ceph: []DiskFilter{{Find: "def", FindMin: 0, FindMax: 3, Wipe: false}}, + }, }, - addErr: true, err: errors.New("Missing system name"), }, { - desc: "FindMin too low for ceph filter", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1"}, {Name: "n2"}}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, - Ceph: []DiskFilter{{Find: "def", FindMin: 0, FindMax: 3, Wipe: false}}, + desc: "FindMin too low for ceph filter", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1"}, {Name: "n2", Address: "1.0.0.2"}}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + Ceph: []DiskFilter{{Find: "def", FindMin: 0, FindMax: 3, Wipe: false}}, + }, }, - addErr: true, err: errors.New("Remote storage filter cannot be defined with find_min less than 1"), }, { - desc: "Ceph direct selection (3) with more systems (4)", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{ - {Name: "n1", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, - {Name: "n2", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, - {Name: "n3", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, - {Name: "n4"}}, + desc: "Ceph direct selection (3) with more systems (4)", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{ + {Name: "n1", Address: "1.0.0.1", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, + {Name: "n2", Address: "1.0.0.2", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, + {Name: "n3", Address: "1.0.0.3", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, + {Name: "n4", Address: "1.0.0.4"}}, + }, addErr: false, err: nil, }, { - desc: "Minimum ceph direct selection (1) with more systems (4)", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{ - {Name: "n1", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, - {Name: "n2"}, - {Name: "n3"}, - {Name: "n4"}}, + desc: "Minimum ceph direct selection (1) with more systems (4)", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{ + {Name: "n1", Address: "1.0.0.1", Storage: InitStorage{Ceph: []DirectStorage{{Path: "def"}}}}, + {Name: "n2", Address: "1.0.0.2"}, + {Name: "n3", Address: "1.0.0.3"}, + {Name: "n4", Address: "1.0.0.4"}}, + }, addErr: false, err: nil, }, { - desc: "Incomplete zfs direct selection", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1", Storage: InitStorage{Local: DirectStorage{Path: "def"}}}, {Name: "n2", Storage: InitStorage{Local: DirectStorage{Path: "def"}}}, {Name: "n3"}}, - addErr: true, - err: errors.New("Some systems are missing local storage disks"), + desc: "Incomplete zfs direct selection", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1", Storage: InitStorage{Local: DirectStorage{Path: "def"}}}, {Name: "n2", Address: "1.0.0.2", Storage: InitStorage{Local: DirectStorage{Path: "def"}}}, {Name: "n3", Address: "1.0.0.3"}}, + }, + addErr: true, + err: errors.New("Some systems are missing local storage disks"), }, { - desc: "Invalid zfs filter constraint", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1"}, {Name: "n2"}}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 3, FindMax: 2, Wipe: false}}, + desc: "Invalid zfs filter constraint", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1"}, {Name: "n2", Address: "1.0.0.2"}}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 3, FindMax: 2, Wipe: false}}, + }, }, addErr: true, err: errors.New("Invalid local storage filter constraints find_max (2) larger than find_min (3)"), }, { - desc: "Invalid zfs filter value", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1"}, {Name: "n2"}}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "", FindMin: 3, FindMax: 2, Wipe: false}}, + desc: "Invalid zfs filter value", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1"}, {Name: "n2", Address: "1.0.0.2"}}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "", FindMin: 3, FindMax: 2, Wipe: false}}, + }, }, addErr: true, err: errors.New("Received empty local disk filter"), }, { - desc: "Invalid ceph filter min > max", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1"}, {Name: "n2"}, {Name: "n3"}}, - storage: StorageFilter{ - Ceph: []DiskFilter{{Find: "def", FindMin: 4, FindMax: 3, Wipe: false}}, + desc: "Invalid ceph filter min > max", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1"}, {Name: "n2", Address: "1.0.0.2"}, {Name: "n3", Address: "1.0.0.3"}}, + Storage: StorageFilter{ + Ceph: []DiskFilter{{Find: "def", FindMin: 4, FindMax: 3, Wipe: false}}, + }, }, addErr: true, err: errors.New("Invalid remote storage filter constraints find_max (3) must be larger than find_min (4)"), }, { - desc: "Invalid ceph filter constraints", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1"}, {Name: "n2"}, {Name: "n3"}}, - storage: StorageFilter{ - Ceph: []DiskFilter{{Find: "", FindMin: 4, FindMax: 3, Wipe: false}}, + desc: "Invalid ceph filter constraints", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1"}, {Name: "n2", Address: "1.0.0.2"}, {Name: "n3", Address: "1.0.0.3"}}, + Storage: StorageFilter{ + Ceph: []DiskFilter{{Find: "", FindMin: 4, FindMax: 3, Wipe: false}}, + }, }, addErr: true, err: errors.New("Received empty remote disk filter"), }, { - desc: "Systems missing interface", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1", UplinkInterface: ""}, {Name: "n2", UplinkInterface: "eth0"}, {Name: "n3", UplinkInterface: "eth0"}}, - ovn: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, - Ceph: []DiskFilter{{Find: "def", FindMin: 3, FindMax: 3, Wipe: false}}, + desc: "Systems missing interface", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1", UplinkInterface: ""}, {Name: "n2", Address: "1.0.0.2", UplinkInterface: "eth0"}, {Name: "n3", Address: "1.0.0.3", UplinkInterface: "eth0"}}, + OVN: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + Ceph: []DiskFilter{{Find: "def", FindMin: 3, FindMax: 3, Wipe: false}}, + }, }, - addErr: true, err: errors.New("Some systems are missing an uplink interface"), }, { - desc: "OVN IPv4 Ranges with no gateway", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1", UplinkInterface: "eth0"}, {Name: "n2", UplinkInterface: "eth0"}, {Name: "n3", UplinkInterface: "eth0"}}, - ovn: InitNetwork{IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, - Ceph: []DiskFilter{{Find: "def", FindMin: 3, FindMax: 3, Wipe: false}}, + desc: "OVN IPv4 Ranges with no gateway", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1", UplinkInterface: "eth0"}, {Name: "n2", Address: "1.0.0.2", UplinkInterface: "eth0"}, {Name: "n3", Address: "1.0.0.3", UplinkInterface: "eth0"}}, + OVN: InitNetwork{IPv4Range: "10.0.0.100-10.0.0.254", IPv6Gateway: "cafe::1/64"}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + Ceph: []DiskFilter{{Find: "def", FindMin: 3, FindMax: 3, Wipe: false}}, + }, }, - addErr: true, err: errors.New("Cannot specify IPv4 range without IPv4 gateway"), }, { - desc: "Invalid OVN IPv4 Ranges", - subnet: "10.0.0.1/24", - iface: "enp5s0", - systems: []System{{Name: "n1", UplinkInterface: "eth0"}, {Name: "n2", UplinkInterface: "eth0"}, {Name: "n3", UplinkInterface: "eth0"}}, - ovn: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100,10.0.0.254", IPv6Gateway: "cafe::1/64"}, - storage: StorageFilter{ - Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, - Ceph: []DiskFilter{{Find: "def", FindMin: 3, FindMax: 3, Wipe: false}}, + desc: "Invalid OVN IPv4 Ranges", + preseed: Preseed{ + SessionPassphrase: "foo", + InitiatorAddress: "1.0.0.1", + Systems: []System{{Name: "n1", Address: "1.0.0.1", UplinkInterface: "eth0"}, {Name: "n2", Address: "1.0.0.2", UplinkInterface: "eth0"}, {Name: "n3", Address: "1.0.0.3", UplinkInterface: "eth0"}}, + OVN: InitNetwork{IPv4Gateway: "10.0.0.1/24", IPv4Range: "10.0.0.100,10.0.0.254", IPv6Gateway: "cafe::1/64"}, + Storage: StorageFilter{ + Local: []DiskFilter{{Find: "abc", FindMin: 0, FindMax: 3, Wipe: false}}, + Ceph: []DiskFilter{{Find: "def", FindMin: 3, FindMax: 3, Wipe: false}}, + }, }, - addErr: true, err: errors.New("Invalid IPv4 range (must be of the form -)"), }, } s.T().Log("Preseed init missing local system") - p := Preseed{LookupSubnet: "10.0.0.1/24", Systems: []System{{Name: "B"}, {Name: "C"}}} + p := Preseed{SessionPassphrase: "foo", InitiatorAddress: "1.0.0.1", Systems: []System{{Name: "B", Address: "1.0.0.1"}, {Name: "C", Address: "1.0.0.2"}}} err := p.validate("A", true) s.EqualError(err, "Local MicroCloud must be included in the list of systems when initializing") - s.T().Log("Preseed add includes local system") - p = Preseed{LookupSubnet: "10.0.0.1/24", Systems: []System{{Name: "A"}, {Name: "B"}}} - err = p.validate("A", false) - s.EqualError(err, "Local MicroCloud must not be included in the list of systems when adding new members") for _, c := range cases { s.T().Log(c.desc) - p := Preseed{ - LookupSubnet: c.subnet, - LookupInterface: c.iface, - Systems: c.systems, - OVN: c.ovn, - Storage: c.storage, - } - err := p.validate("n1", true) + err := c.preseed.validate("n1", true) if c.err == nil { s.NoError(err) } else { @@ -252,7 +311,7 @@ func (s *preseedSuite) Test_preseedValidateInvalid() { } s.T().Logf("%s in add mode", c.desc) - err = p.validate("n0", false) + err = c.preseed.validate("n0", false) if c.addErr { s.EqualError(err, c.err.Error()) } else { @@ -280,7 +339,7 @@ func (s *preseedSuite) Test_preseedMatchDisksMemory() { // Tests that ReuseExistingClusters only works when initializing, not when growing the cluster. func (s *preseedSuite) Test_restrictClusterReuse() { - p := Preseed{ReuseExistingClusters: true, LookupSubnet: "10.0.0.1/24", LookupInterface: "enp5s0", Systems: []System{{Name: "B"}, {Name: "C"}}} + p := Preseed{SessionPassphrase: "foo", Initiator: "B", ReuseExistingClusters: true, Systems: []System{{Name: "B"}, {Name: "C"}}} s.NoError(p.validate("B", true)) @@ -289,3 +348,132 @@ func (s *preseedSuite) Test_restrictClusterReuse() { p.ReuseExistingClusters = false s.NoError(p.validate("A", false)) } + +func (s *preseedSuite) Test_isInitiator() { + cases := []struct { + desc string + preseed Preseed + name string + address string + isInitiator bool + }{ + { + desc: "System name matches initiator", + preseed: Preseed{Initiator: "A"}, + name: "A", + isInitiator: true, + }, + { + desc: "System name doesn't match initiator", + preseed: Preseed{Initiator: "A"}, + name: "B", + isInitiator: false, + }, + { + desc: "System address does match initiator address", + preseed: Preseed{InitiatorAddress: "1.0.0.1"}, + address: "1.0.0.1", + isInitiator: true, + }, + { + desc: "System address doesn't match initiator address", + preseed: Preseed{InitiatorAddress: "1.0.0.1"}, + address: "1.0.0.2", + isInitiator: false, + }, + } + + for _, c := range cases { + s.T().Log(c.desc) + + s.Equal(c.isInitiator, c.preseed.isInitiator(c.name, c.address)) + } +} + +func (s *preseedSuite) Test_isBootstrap() { + cases := []struct { + desc string + preseed Preseed + isBootstrap bool + }{ + { + desc: "Initiator is in the list of systems", + preseed: Preseed{Initiator: "A", Systems: []System{{Name: "A"}}}, + isBootstrap: true, + }, + { + desc: "Initiator is not in the list of systems", + preseed: Preseed{Initiator: "B", Systems: []System{{Name: "A"}}}, + isBootstrap: false, + }, + { + desc: "Initiator address is in the list of systems", + preseed: Preseed{InitiatorAddress: "1.0.0.1", Systems: []System{{Name: "A", Address: "1.0.0.1"}}}, + isBootstrap: true, + }, + { + desc: "Initiator address is not in the list of systems", + preseed: Preseed{InitiatorAddress: "1.0.0.2", Systems: []System{{Name: "A", Address: "1.0.0.1"}}}, + isBootstrap: false, + }, + } + + for _, c := range cases { + s.T().Log(c.desc) + + s.Equal(c.isBootstrap, c.preseed.isBootstrap()) + } +} + +func (s *preseedSuite) Test_address() { + cases := []struct { + desc string + name string + preseed Preseed + address string + err error + }{ + { + desc: "Local address is the one specified in preseed", + name: "A", + preseed: Preseed{ + Systems: []System{{Name: "A", Address: "1.0.0.1"}}, + }, + address: "1.0.0.1", + }, + { + // Assumption that the test system has a `lo` with 127.0.0.0/8. + // This allows not specifying/creating a custom interface for testing. + desc: "Local address is the first one from lookup subnet", + preseed: Preseed{ + LookupSubnet: "127.0.0.0/8", + }, + address: "127.0.0.1", + }, + { + desc: "Failed to parse lookup subnet", + preseed: Preseed{ + LookupSubnet: "foo", + }, + err: errors.New("invalid CIDR address: foo"), + }, + { + desc: "Failed to find address in non-existing subnet", + preseed: Preseed{ + LookupSubnet: "1.2.3.0/24", + }, + err: errors.New(`Failed to determine MicroCloud address within subnet "1.2.3.0/24"`), + }, + } + + for _, c := range cases { + s.T().Log(c.desc) + + addr, err := c.preseed.address(c.name) + if c.err != nil { + s.Equal(c.err.Error(), err.Error()) + } else { + s.Equal(c.address, addr) + } + } +} diff --git a/cmd/microcloud/services.go b/cmd/microcloud/services.go index be2aadb60..34b3ab4c2 100644 --- a/cmd/microcloud/services.go +++ b/cmd/microcloud/services.go @@ -114,9 +114,9 @@ func (c *cmdServiceList) Run(cmd *cobra.Command, args []string) error { var lxd lxd.InstanceServer switch s.Type() { case types.LXD: - lxd, err = s.(*service.LXDService).Client(context.Background(), "") + lxd, err = s.(*service.LXDService).Client(context.Background()) case types.MicroCeph: - microClient, err = s.(*service.CephService).Client("", "") + microClient, err = s.(*service.CephService).Client("") case types.MicroOVN: microClient, err = s.(*service.OVNService).Client() case types.MicroCloud: @@ -236,7 +236,7 @@ func (c *cmdServiceAdd) Run(cmd *cobra.Command, args []string) error { cfg.address = status.Address.Addr().String() // enable auto setup to skip lookup related questions. cfg.autoSetup = true - err = cfg.askAddress() + err = cfg.askAddress("") if err != nil { return err } diff --git a/cmd/microcloud/session.go b/cmd/microcloud/session.go new file mode 100644 index 000000000..1bf34e7a2 --- /dev/null +++ b/cmd/microcloud/session.go @@ -0,0 +1,178 @@ +package main + +import ( + "context" + "crypto/x509" + "encoding/pem" + "fmt" + "time" + + "github.com/canonical/lxd/shared" + + "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" + "github.com/canonical/microcloud/microcloud/mdns" + "github.com/canonical/microcloud/microcloud/service" +) + +type SessionFunc func(gw *cloudClient.WebsocketGateway) error + +func (c *initConfig) runSession(ctx context.Context, s *service.Handler, role types.SessionRole, timeout time.Duration, f SessionFunc) error { + cloud := s.Services[types.MicroCloud].(*service.CloudService) + conn, err := cloud.StartSession(ctx, string(role), timeout) + if err != nil { + return err + } + + defer conn.Close() + + return f(cloudClient.NewWebsocketGateway(ctx, conn)) +} + +func (c *initConfig) initiatingSession(gw *cloudClient.WebsocketGateway, sh *service.Handler, services []types.ServiceType, passphrase string, expectedSystems []string) error { + session := types.Session{ + Address: c.address, + Interface: c.lookupIface.Name, + Services: services, + Passphrase: passphrase, + } + + err := gw.Write(session) + if err != nil { + return fmt.Errorf("Failed to send session start: %w", err) + } + + err = gw.ReceiveWithContext(gw.Context(), &session) + if err != nil { + return fmt.Errorf("Failed to read session reply: %w", err) + } + + if !c.autoSetup { + cloud := sh.Services[types.MicroCloud].(*service.CloudService) + + // If the cluster is already bootstrapped the cluster certificate is used + // instead for the server. + // If a joiner sends it's intent to join our existing cluster it extracts + // the certificates fingerprint out of the underlying connection. + // When adding new systems the fingerprint displayed here has to be the + // one from the cluster certificate as this one is used as soon as the + // cluster is bootstrapped. + var cert *shared.CertInfo + if c.bootstrap { + cert, err = cloud.ServerCert() + if err != nil { + return err + } + } else { + cert, err = cloud.ClusterCert() + if err != nil { + return err + } + } + + fingerprint, err := c.shortFingerprint(cert.Fingerprint()) + if err != nil { + return fmt.Errorf("Failed to shorten fingerprint: %w", err) + } + + fmt.Printf("Use the following command on systems that you want to join the cluster:\n\n microcloud join\n\n") + fmt.Printf("When requested enter the passphrase:\n\n %s\n\n", session.Passphrase) + fmt.Printf("Verify the fingerprint %q is displayed on joining systems.\n", fingerprint) + fmt.Println("Waiting to detect systems ...") + } + + confirmedIntents, err := c.askJoinIntents(gw, expectedSystems) + if err != nil { + return err + } + + err = gw.Write(types.Session{ + ConfirmedIntents: confirmedIntents, + }) + if err != nil { + return fmt.Errorf("Failed to send join intents: %w", err) + } + + err = gw.ReceiveWithContext(gw.Context(), &session) + if err != nil { + return fmt.Errorf("Failed to read confirmation errors: %w", err) + } + + if !session.Accepted { + return fmt.Errorf("Join confirmations didn't get accepted on all systems") + } + + for _, joinIntent := range confirmedIntents { + certBlock, _ := pem.Decode([]byte(joinIntent.Certificate)) + if certBlock == nil { + return fmt.Errorf("Invalid certificate file") + } + + remoteCert, err := x509.ParseCertificate(certBlock.Bytes) + if err != nil { + return fmt.Errorf("Failed to parse certificate: %w", err) + } + + // Register init system + c.systems[joinIntent.Name] = InitSystem{ + ServerInfo: mdns.ServerInfo{ + Version: joinIntent.Version, + Name: joinIntent.Name, + Address: joinIntent.Address, + Services: joinIntent.Services, + // Store the peers certificate to allow mTLS server validation + // for requests after the trust establishment. + Certificate: remoteCert, + }, + } + } + + if !c.autoSetup { + for _, info := range c.systems { + fmt.Printf(" Selected %q at %q\n", info.ServerInfo.Name, info.ServerInfo.Address) + } + + // Add a space between the CLI and the response. + fmt.Println("") + } + + return nil +} + +func (c *initConfig) joiningSession(gw *cloudClient.WebsocketGateway, sh *service.Handler, services []types.ServiceType, initiatorAddress string, passphrase string) error { + session := types.Session{ + Passphrase: passphrase, + Address: sh.Address, + InitiatorAddress: initiatorAddress, + Interface: c.lookupIface.Name, + Services: services, + LookupTimeout: c.lookupTimeout, + } + + err := gw.Write(session) + if err != nil { + return fmt.Errorf("Failed to send session start: %w", err) + } + + if !c.autoSetup && initiatorAddress == "" { + fmt.Println("Searching for an eligible system ...") + } + + // The server confirms the target regardless whether or not one was provided. + err = gw.ReceiveWithContext(gw.Context(), &session) + if err != nil { + return fmt.Errorf("Failed to find an eligible system: %w", err) + } + + if !c.autoSetup { + fingerprint, err := c.shortFingerprint(session.InitiatorFingerprint) + if err != nil { + return err + } + + fmt.Printf("\n Found system %q at %q using fingerprint %q\n\n", session.InitiatorName, session.InitiatorAddress, fingerprint) + fmt.Printf("Select %q on %q to let it join the cluster\n", sh.Name, session.InitiatorName) + } + + return c.askJoinConfirmation(gw, services) +} diff --git a/cmd/microcloudd/main.go b/cmd/microcloudd/main.go index 6cd0186e3..97c2a4346 100644 --- a/cmd/microcloudd/main.go +++ b/cmd/microcloudd/main.go @@ -85,14 +85,6 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error { // Periodically check if new services have been installed. go func() { for { - if s.AuthSecret == "" { - logger.Debug("Waiting for initial setup before checking for optional services") - time.Sleep(1 * time.Second) - - continue - } - - updated := false for serviceName, stateDir := range optionalServices { if service.Exists(serviceName, stateDir) { if s.Services[serviceName] != nil { @@ -105,24 +97,9 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error { break } - updated = true s.Services[serviceName] = newService.Services[serviceName] } else if s.Services[serviceName] != nil { delete(s.Services, serviceName) - updated = true - } - } - - if updated { - err = s.StopBroadcast() - if err != nil { - logger.Error("Failed to shutdown broadcast after detecting new services", logger.Ctx{"error": err}) - continue - } - - err = s.Broadcast() - if err != nil { - logger.Error("Failed to restart broadcast after detecting new services", logger.Ctx{"error": err}) } } @@ -134,6 +111,9 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error { api.ServicesCmd(s), api.ServiceTokensCmd(s), api.ServicesClusterCmd(s), + api.SessionJoinCmd(s), + api.SessionInitiatingCmd(s), + api.SessionJoiningCmd(s), api.LXDProxy(s), api.CephProxy(s), api.OVNProxy(s), diff --git a/doc/explanation/initialisation.md b/doc/explanation/initialisation.md index b900009ab..95250227d 100644 --- a/doc/explanation/initialisation.md +++ b/doc/explanation/initialisation.md @@ -7,15 +7,25 @@ relatedlinks: https://en.wikipedia.org/wiki/Multicast_DNS See {ref}`howto-initialise` for instructions on how to set up MicroCloud. +(trust-establishment-session)= +## Trust establishment session + +To allow several instances of MicroCloud joining the final cluster, in both the interactive and non-interactive method each instance +is running one half of the trust establishment session to trust the other side. + +Each trust establishment session has one initiator and one to many joiners. +In case of the interactive mode the side which runs the `microcloud init` command becomes the initiator. +The other side becomes the joiner by running `microcloud join`. +In the non-interactive mode the initiator is being defined either using the `initiator` or `initiator_address` configuration key. + (automatic-server-detection)= ## Automatic server detection -MicroCloud uses {abbr}`mDNS (multicast DNS)` to automatically detect other servers on the network. +If required MicroCloud uses {abbr}`mDNS (multicast DNS)` to automatically detect a so called initiator on the network. This method works in physical networks, but it is usually not supported in a cloud environment. +Instead you can specify the address of the initiator instead to not require using mDNS. -The scan can be limited to the default local subnet of the network interface you select. - -MicroCloud will display all servers that it detects and periodically update the list. You can select the servers you want to add to the MicroCloud cluster. +The scan is limited to the local subnet of the network interface you select when choosing an address for MicroCloud's internal traffic (see {ref}`microcloud-networking-intracluster`). (bootstrapping-process)= ## Bootstrapping process diff --git a/doc/how-to/add_machine.md b/doc/how-to/add_machine.md index 55f112fab..d7badbb88 100644 --- a/doc/how-to/add_machine.md +++ b/doc/how-to/add_machine.md @@ -1,9 +1,32 @@ (howto-add)= # How to add a machine +## Interactive configuration If you want to add a machine to the MicroCloud cluster after the initialisation, use the {command}`microcloud add` command: sudo microcloud add -Answer the prompts to add the machine. -You can add the `--wipe` flag to automatically wipe any disks you add to the cluster. +On the new machine use the {command}`microcloud join` command: + + sudo microcloud join + +Answer the prompts on both sides to add the machine. +You can also add the `--wipe` flag to automatically wipe any disks you add to the cluster. + +## Non-interactive configuration + +If you want to automatically add a machine, you can provide a preseed configuration in YAML format to the {command}`microcloud preseed` command: + + cat | microcloud preseed + +In the list of systems include only the new machine and set either `initiator` or `initiator_address` which can point to any machine +that is already part of the MicroCloud. + +Make sure to distribute and run the same preseed configuration on the new and existing system configured using either `initiator` or `initiator_address`. + +The preseed YAML file must use the following syntax: + +```{literalinclude} preseed.yaml +:language: YAML +:emphasize-lines: 1-3,6-8,11-12,19,29-33 +``` diff --git a/doc/how-to/initialise.md b/doc/how-to/initialise.md index 0e596d50a..822b37532 100644 --- a/doc/how-to/initialise.md +++ b/doc/how-to/initialise.md @@ -2,7 +2,7 @@ # How to initialise MicroCloud The {ref}`initialisation process ` bootstraps the MicroCloud cluster. -You run the initialisation on one of the machines, and it configures the required services on all machines. +You run the initialisation on one of the machines, and it configures the required services on all of the machines that have been joined. (howto-initialise-interactive)= ## Interactive configuration @@ -28,19 +28,26 @@ Complete the following steps to initialise MicroCloud: sudo microcloud init +1. Select whether you want to set up more than one machine. + + This allows you to create a MicroCloud using a single node. + It will skip the {ref}`trust-establishment-session` if no more machines should be part of the MicroCloud. + + Additional machines can always be added at a later point in time. + See {ref}`howto-add` for more information. 1. Select the IP address that you want to use for MicroCloud's internal traffic (see {ref}`microcloud-networking-intracluster`). MicroCloud automatically detects the available addresses (IPv4 and IPv6) on the existing network interfaces and displays them in a table. You must select exactly one address. -1. Decide if you want to limit the search for other machines. +1. On all the other machines, enter the following command and repeat the address selection: - If you accept the default (`yes`), MicroCloud will automatically detect machines in the local subnet. - Otherwise, it will detect all available machines, which might include duplicates (if machines are available both on IPv4 and on IPv6). + sudo microcloud join - See {ref}`automatic-server-detection` for more information. + It will automatically detect the machine acting as the initiator. + See {ref}`trust-establishment-session` for more information and {ref}`automatic-server-detection` in case the network doesn't support mDNS. 1. Select the machines that you want to add to the MicroCloud cluster. - MicroCloud displays all machines that it detects. This list will periodically update as new machines are detected. + MicroCloud displays all machines that have reached out during the trust establishment session. Make sure that all machines that you select have the required snaps installed. 1. Select whether you want to set up local storage. @@ -135,13 +142,15 @@ If more than one MicroCeph or MicroOVN cluster exists among the systems, the Mic (howto-initialise-preseed)= ## Non-interactive configuration -If you want to automate the initialisation process, you can provide a preseed configuration in YAML format to the {command}`microcloud init` command: +If you want to automate the initialisation process, you can provide a preseed configuration in YAML format to the {command}`microcloud preseed` command: + + cat | microcloud preseed - cat | microcloud init --preseed +Make sure to distribute and run the same preseed configuration on all systems that should be part of the MicroCloud. The preseed YAML file must use the following syntax: ```{literalinclude} preseed.yaml :language: YAML -:emphasize-lines: 1,4-7,27,33-41 +:emphasize-lines: 1-3,6-8,11-12,19,29-33 ``` diff --git a/doc/how-to/preseed.yaml b/doc/how-to/preseed.yaml index 97cfc80b3..98d0a45be 100644 --- a/doc/how-to/preseed.yaml +++ b/doc/how-to/preseed.yaml @@ -1,18 +1,46 @@ +# `initiator` defines which system takes over the role of the initiator during the trust establishment with mDNS. +# Make sure to also set `lookup_subnet`. +# The field cannot be set together with `initiator_address`. +initiator: micro01 + +# `initiator_address` defines which system takes over the role of the initiator during the trust establishment. +# It also allows joining systems to learn about the address they have to connect to. +# The field cannot be set together with `initiator`. +initiator_address: 10.0.0.1 + # `lookup_subnet` limits the subnet when looking up systems with mDNS. -lookup_subnet: 10.0.0.1/24 -# `lookup_interface` limits the interface when looking up systems with mDNS. -lookup_interface: eth0 +# The first assigned address of this subnet is used for MicroCloud. +lookup_subnet: 10.0.0.0/24 + +# `lookup_timeout` is optional and configures how long the joining system will wait for a system to be discovered with mDNS. +# The value has to be provided in seconds. +# It defaults to 60 seconds. +lookup_timeout: 300 + +# `session_passphrase` configures the passphrase used during the trust establishment session. +session_passphrase: 83P27XWKbDczUyE7xaX3pgVfaEacfQ2qiQ0r6gPb + +# `session_timeout` is optional and configures how long the trust establishment session will last. +# The value has to be provided in seconds. +# It defaults to 60 minutes. +session_timeout: 300 + +# `reuse_existing_clusters` is optional and configures whether or not to reuse existing clusters. +reuse_existing_clusters: true # `systems` lists the systems we expect to find by their host name. -# `name` represents the host name +# `name` represents the host name. +# `address` sets the address used for MicroCloud and is required in case `initiator_address` is present. # `ovn_uplink_interface` is optional and represents the name of the interface reserved for use with OVN. # `ovn_underlay_ip` is optional and represents the Geneve Encap IP for each system. # `storage` is optional and represents explicit paths to disks for each system. systems: - name: micro01 + address: 10.0.0.1 ovn_uplink_interface: eth1 ovn_underlay_ip: 10.0.2.101 - name: micro02 + address: 10.0.0.2 ovn_uplink_interface: eth1 ovn_underlay_ip: 10.0.2.102 storage: @@ -26,9 +54,11 @@ systems: wipe: true encrypt: true - name: micro03 + address: 10.0.0.3 ovn_uplink_interface: eth1 ovn_underlay_ip: 10.0.2.103 - name: micro04 + address: 10.0.0.4 ovn_uplink_interface: eth1 # `ceph` is optional and represents the Ceph global configuration diff --git a/doc/tutorial/get_started.md b/doc/tutorial/get_started.md index 6711b8ec3..fc56fc7b3 100644 --- a/doc/tutorial/get_started.md +++ b/doc/tutorial/get_started.md @@ -248,18 +248,13 @@ Complete the following steps on each VM (`micro1`, `micro2`, `micro3`, and `micr ## 6. Initialise MicroCloud After installing all snaps on all VMs, you can initialise MicroCloud. -This initialisation is done on one of the machines only. We use `micro1`, but you can choose another machine. Complete the following steps: -1. Access the shell in `micro1`: +1. Access the shell in `micro1` and start the initialisation process: - lxc exec micro1 -- bash - -1. Start the initialisation process: - - microcloud init + lxc exec micro1 microcloud init ```{tip} In this tutorial, we initialise MicroCloud interactively. @@ -268,8 +263,21 @@ Complete the following steps: 1. Answer the questions: + 1. Select `yes` to select more than one cluster member. 1. As the address for MicroCloud's internal traffic, select the listed IPv4 address. - 1. Select `yes` to limit the search for other MicroCloud servers to the local subnet. + 1. Copy the session passphrase. + 1. Head to the other servers (`micro02`, `micro03`, and `micro04`) and start the join process: + + lxc exec micro02 microcloud join + + ```{tip} + + Open up three additional terminals to run the commands concurrently. + ``` + + In each terminal select an address for MicroCloud's internal traffic. + When prompted enter the passphrase in each terminal and return to `micro01`. + 1. Select all listed servers (these should be `micro2`, `micro3`, and `micro4`). 1. Select `yes` to set up local storage. 1. Select the listed local disks (`local1`, `local2`, `local3`, and `local4`). @@ -303,7 +311,7 @@ Complete the following steps: MicroCloud will now initialise the cluster. See {ref}`explanation-initialisation` for more information. -See the full initialisation process here: +See the full process here for the initiating side: (initialisation-process)= @@ -313,6 +321,7 @@ See the full initialisation process here: :host: micro1 :scroll: +Do you want to set up more than one cluster member? (yes/no) [default=yes]: yes Select an address for MicroCloud's internal traffic: Space to select; enter to confirm; type to filter results. Up/down to move; right to select all; left to select none. @@ -325,18 +334,27 @@ Up/down to move; right to select all; left to select none. Using address "203.0.113.169" for MicroCloud -Limit search for other MicroCloud servers to 203.0.113.169/24? (yes/no) [default=yes]: yes -Scanning for eligible servers ... +Use the following command on systems that you want to join the cluster: + + microcloud join + +When requested enter the passphrase: + + koala absorbing update dorsal + +Verify the fingerprint "5d0808de679d" is displayed on joining systems. +Waiting to detect systems ... Space to select; enter to confirm; type to filter results. Up/down to move; right to select all; left to select none. - +---------+--------+---------------+ - | NAME | IFACE | ADDR | - +---------+--------+---------------+ -> [x] | micro3 | enp5s0 | 203.0.113.171 | - [x] | micro2 | enp5s0 | 203.0.113.170 | - [x] | micro4 | enp5s0 | 203.0.113.172 | - +---------+--------+---------------+ - + +---------+---------------+--------------+ + | NAME | ADDRESS | FINGERPRINT | + +---------+---------------+--------------+ +> [x] | micro3 | 203.0.113.171 | 4e80954d6a64 | + [x] | micro2 | 203.0.113.170 | 84e0b50e13b3 | + [x] | micro4 | 203.0.113.172 | 98667a808a99 | + +---------+---------------+--------------+ + + Selected "micro1" at "203.0.113.169" Selected "micro3" at "203.0.113.171" Selected "micro2" at "203.0.113.170" Selected "micro4" at "203.0.113.172" @@ -442,6 +460,33 @@ Cluster initialization is complete MicroCloud is ready ``` +See the full process here for one of the joining sides (`micro02`): + +```{terminal} +:input: microcloud init +:user: root +:host: micro1 +:scroll: + +Select an address for MicroCloud's internal traffic: + + Using address "203.0.113.170" for MicroCloud + +Verify the fingerprint "84e0b50e13b3" is displayed on the other system. +Specify the passphrase for joining the system: koala absorbing update dorsal +Searching for an eligible system ... + + Found system "micro01" at "203.0.113.169" using fingerprint "5d0808de679d" + +Select "micro02" on "micro01" to let it join the cluster + + Received confirmation from system "micro01" + +Do not exit out to keep the session alive. +Complete the remaining configuration on "micro01" ... +Successfully joined the cluster +``` + ## 7. Inspect your MicroCloud setup You can now inspect your cluster setup. diff --git a/go.mod b/go.mod index 4759bffc4..2d6f955be 100644 --- a/go.mod +++ b/go.mod @@ -1,24 +1,28 @@ module github.com/canonical/microcloud/microcloud -go 1.22.5 +go 1.22.6 + +toolchain go1.23.1 replace github.com/canonical/microcluster/v2 => github.com/canonical/microcluster/v2 v2.0.0-20240911074836-85e676b8f4bc require ( github.com/AlecAivazis/survey/v2 v2.3.7 github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 - github.com/canonical/lxd v0.0.0-20240906102712-be65fe046f98 + github.com/canonical/lxd v0.0.0-20241001102405-5bba53b1ecd4 github.com/canonical/microceph/microceph v0.0.0-20240912190827-ef42f096671e github.com/canonical/microcluster/v2 v2.0.2 github.com/canonical/microovn/microovn v0.0.0-20240912142147-31ce8c71de4f github.com/creack/pty v1.1.21 github.com/gorilla/mux v1.8.1 + github.com/gorilla/websocket v1.5.3 github.com/hashicorp/mdns v1.0.5 github.com/hinshun/vt10x v0.0.0-20220301184237-5011da428d02 github.com/olekukonko/tablewriter v0.0.5 github.com/spf13/cobra v1.8.1 github.com/stretchr/testify v1.9.0 - golang.org/x/mod v0.20.0 + golang.org/x/mod v0.21.0 + golang.org/x/sync v0.8.0 golang.org/x/sys v0.25.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 @@ -38,7 +42,6 @@ require ( github.com/google/renameio v1.0.1 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/securecookie v1.1.2 // indirect - github.com/gorilla/websocket v1.5.3 // indirect github.com/gosexy/gettext v0.0.0-20160830220431-74466a0a0c4a // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect @@ -59,16 +62,15 @@ require ( github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/zitadel/logging v0.6.0 // indirect - github.com/zitadel/oidc/v3 v3.28.2 // indirect + github.com/zitadel/oidc/v3 v3.30.0 // indirect github.com/zitadel/schema v1.3.0 // indirect - go.opentelemetry.io/otel v1.28.0 // indirect - go.opentelemetry.io/otel/metric v1.28.0 // indirect - go.opentelemetry.io/otel/trace v1.28.0 // indirect - golang.org/x/crypto v0.26.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/oauth2 v0.22.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/term v0.23.0 // indirect - golang.org/x/text v0.17.0 // indirect - golang.org/x/tools v0.24.0 // indirect + go.opentelemetry.io/otel v1.30.0 // indirect + go.opentelemetry.io/otel/metric v1.30.0 // indirect + go.opentelemetry.io/otel/trace v1.30.0 // indirect + golang.org/x/crypto v0.27.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/term v0.24.0 // indirect + golang.org/x/text v0.18.0 // indirect + golang.org/x/tools v0.25.0 // indirect ) diff --git a/go.sum b/go.sum index fa4475b89..989e99bbe 100644 --- a/go.sum +++ b/go.sum @@ -58,8 +58,8 @@ github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwN github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/canonical/go-dqlite v1.22.0 h1:DuJmfcREl4gkQJyvZzjl2GHFZROhbPyfdjDRQXpkOyw= github.com/canonical/go-dqlite v1.22.0/go.mod h1:Uvy943N8R4CFUAs59A1NVaziWY9nJ686lScY7ywurfg= -github.com/canonical/lxd v0.0.0-20240906102712-be65fe046f98 h1:qIAlzHSoZ5lqAZrY06mEFhq2YOIOPRSqapWktFJna94= -github.com/canonical/lxd v0.0.0-20240906102712-be65fe046f98/go.mod h1:ebmIFCKHkOrzL0ahCy2maJTmPtk+oadjYFwGJyGVUMc= +github.com/canonical/lxd v0.0.0-20241001102405-5bba53b1ecd4 h1:dVHeJJSpW7jFqC96slCya8zh1YeVOOSCi0WgSmlr6s4= +github.com/canonical/lxd v0.0.0-20241001102405-5bba53b1ecd4/go.mod h1:UyjqYS/HZYiXb+kjpnYUphDy5wewkdVs7+ZnXUGpzq0= github.com/canonical/microceph/microceph v0.0.0-20240912190827-ef42f096671e h1:enQgR0bgyQ1SfYIvYx5rU4n0OLjtpbIraGiwzqBtpYk= github.com/canonical/microceph/microceph v0.0.0-20240912190827-ef42f096671e/go.mod h1:l/yzvjl6FsuNClbYx2VffcL4QZsYff5qElaKWFqktjc= github.com/canonical/microcluster/v2 v2.0.0-20240911074836-85e676b8f4bc h1:nFhj6x5C7syjO1BoxBCWlRexxRa18H1Ema6RsMJePr8= @@ -311,8 +311,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/rs/cors v1.11.0 h1:0B9GE/r9Bc2UxRMMtymBkHTenPkHDv0CW4Y98GBY+po= -github.com/rs/cors v1.11.0/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= +github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= +github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= @@ -352,8 +352,8 @@ github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/zitadel/logging v0.6.0 h1:t5Nnt//r+m2ZhhoTmoPX+c96pbMarqJvW1Vq6xFTank= github.com/zitadel/logging v0.6.0/go.mod h1:Y4CyAXHpl3Mig6JOszcV5Rqqsojj+3n7y2F591Mp/ow= -github.com/zitadel/oidc/v3 v3.28.2 h1:poJmUjjJhgSNgfzyVtArnnAlXhXCpefsvEV36rUwM9s= -github.com/zitadel/oidc/v3 v3.28.2/go.mod h1:WmDFu3dZ9YNKrIoZkmxjGG8QyUR4PbbhsVVSY+rpojM= +github.com/zitadel/oidc/v3 v3.30.0 h1:1IuZlK+X+JLExEA2PYgRlVvWHBhz/cMwT7VL/YrQabw= +github.com/zitadel/oidc/v3 v3.30.0/go.mod h1:+I5BgvGO5C2ZJrQRjV34EjkyA7P3GXyYGZgXI8Sdw18= github.com/zitadel/schema v1.3.0 h1:kQ9W9tvIwZICCKWcMvCEweXET1OcOyGEuFbHs4o5kg0= github.com/zitadel/schema v1.3.0/go.mod h1:NptN6mkBDFvERUCvZHlvWmmME+gmZ44xzwRXwhzsbtc= go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= @@ -366,12 +366,12 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= -go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= -go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= -go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= -go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= -go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= +go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= +go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= +go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= +go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= +go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= @@ -384,8 +384,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -422,8 +422,8 @@ golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -463,8 +463,8 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -477,8 +477,8 @@ golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= -golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -550,8 +550,8 @@ golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -562,8 +562,8 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -619,8 +619,8 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= -golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE= +golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/mdns/lookup.go b/mdns/lookup.go index 964f77922..1238722db 100644 --- a/mdns/lookup.go +++ b/mdns/lookup.go @@ -2,6 +2,7 @@ package mdns import ( "context" + "crypto/x509" "encoding/json" "fmt" "log" @@ -18,12 +19,11 @@ import ( // ServerInfo is information about the server that is broadcast over mDNS. type ServerInfo struct { - Version string - Name string - Address string - Interface string - Services []types.ServiceType - AuthSecret string + Version string + Name string + Address string + Services []types.ServiceType + Certificate *x509.Certificate } // NetworkInfo represents information about a network interface broadcast by a MicroCloud peer. @@ -33,11 +33,6 @@ type NetworkInfo struct { Subnet *net.IPNet } -// LookupKey returns a unique key representing a lookup entry. -func (s ServerInfo) LookupKey() string { - return fmt.Sprintf("%s_%s_%s", s.Name, s.Interface, s.Address) -} - // forwardingWriter forwards the mdns log message to LXD's logger package. type forwardingWriter struct{} @@ -57,92 +52,57 @@ func (f forwardingWriter) Write(p []byte) (int, error) { return len(logMsg), nil } -// LookupPeers finds any broadcasting peers and returns a list of their names. -func LookupPeers(ctx context.Context, iface *net.Interface, version string, localPeer string) (map[string]ServerInfo, error) { - entries := []*mdns.ServiceEntry{} - for i := 0; i < ServiceSize; i++ { - nextEntries, err := Lookup(ctx, iface, fmt.Sprintf("%s_%d", ClusterService, i), clusterSize) - if err != nil { - return nil, err - } - - entries = append(entries, nextEntries...) +// LookupPeer finds any broadcasting peer and returns its info. +func LookupPeer(ctx context.Context, iface *net.Interface, version string) (*ServerInfo, error) { + entry, err := Lookup(ctx, iface, ClusterService) + if err != nil { + return nil, err } - peers := map[string]ServerInfo{} - for _, entry := range entries { - if entry == nil { - return nil, fmt.Errorf("Received empty record") - } - - if !strings.HasSuffix(entry.Name, ".local.") { - continue - } - - serviceStr := strings.TrimSuffix(entry.Name, ".local.") - parts := strings.SplitN(serviceStr, fmt.Sprintf(".%s_", ClusterService), 2) - if len(parts) != 2 { - continue - } - - // Skip a response from ourselves. - peerName := parts[0] - if localPeer == peerName { - continue - } + if entry == nil { + return nil, fmt.Errorf("Received empty record") + } - if len(entry.InfoFields) == 0 { - logger.Infof("Received incomplete record from peer %q", peerName) - continue - } + if !strings.HasSuffix(entry.Name, ".local.") { + return nil, fmt.Errorf("Invalid service name %q", entry.Name) + } - unquoted, err := strconv.Unquote("\"" + strings.Join(entry.InfoFields, "") + "\"") - if err != nil { - return nil, fmt.Errorf("Failed to format DNS TXT record: %w", err) - } + serviceStr := strings.TrimSuffix(entry.Name, ".local.") + parts := strings.SplitN(serviceStr, ClusterService, 2) + if len(parts) != 2 { + return nil, fmt.Errorf("Invalid service name %q", entry.Name) + } - info := ServerInfo{} - err = json.Unmarshal([]byte(unquoted), &info) - if err != nil { - return nil, fmt.Errorf("Failed to parse server info: %w", err) - } + peerName := parts[0] + if len(entry.InfoFields) == 0 { + return nil, fmt.Errorf("Received incomplete record from peer %q", peerName) + } - // Skip a response from ourselves. - if localPeer == info.Name { - continue - } + unquoted, err := strconv.Unquote("\"" + strings.Join(entry.InfoFields, "") + "\"") + if err != nil { + return nil, fmt.Errorf("Failed to format DNS TXT record: %w", err) + } - // Skip any responses from mismatched versions. - if info.Version != version { - logger.Infof("System %q (version %q) has a version mismatch. Expected %q", peerName, info.Version, version) - continue - } + info := ServerInfo{} + err = json.Unmarshal([]byte(unquoted), &info) + if err != nil { + return nil, fmt.Errorf("Failed to parse server info: %w", err) + } - peers[info.LookupKey()] = info + // Skip any responses from mismatched versions. + if info.Version != version { + return nil, fmt.Errorf("System %q (version %q) has a version mismatch. Expected %q", peerName, info.Version, version) } - return peers, nil + return &info, nil } // Lookup searches for the given service name over mdns. -func Lookup(ctx context.Context, iface *net.Interface, service string, size int) ([]*mdns.ServiceEntry, error) { +func Lookup(ctx context.Context, iface *net.Interface, service string) (*mdns.ServiceEntry, error) { log.SetOutput(forwardingWriter{}) - ctx, cancel := context.WithCancel(ctx) - defer cancel() - entriesCh := make(chan *mdns.ServiceEntry, size) - entries := []*mdns.ServiceEntry{} - go func() { - for { - select { - case <-ctx.Done(): - return - default: - for entry := range entriesCh { - entries = append(entries, entry) - } - } - } - }() + + entriesCh := make(chan *mdns.ServiceEntry, 1) + defer close(entriesCh) params := mdns.DefaultParams(service) params.Interface = iface @@ -167,14 +127,20 @@ func Lookup(ctx context.Context, iface *net.Interface, service string, size int) return nil, fmt.Errorf("No supported IP versions on the network interface %q", iface.Name) } - err = mdns.Query(params) - if err != nil { - return nil, fmt.Errorf("Failed lookup: %w", err) + // Return the first peer that gets found. + for { + select { + case <-ctx.Done(): + return nil, fmt.Errorf("Failed lookup: %w", ctx.Err()) + case entry := <-entriesCh: + return entry, nil + default: + err = mdns.Query(params) + if err != nil { + return nil, fmt.Errorf("Failed lookup: %w", err) + } + } } - - close(entriesCh) - - return entries, nil } // GetNetworkInfo returns a slice of NetworkInfo to be included in the mDNS broadcast. diff --git a/mdns/mdns.go b/mdns/mdns.go index 23387d44b..f248acbec 100644 --- a/mdns/mdns.go +++ b/mdns/mdns.go @@ -10,12 +10,6 @@ import ( // ClusterService is the service name used for broadcasting willingness to join a cluster. const ClusterService = "_microcloud" -// ServiceSize is the maximum number of simultaneous broadcasts of the same mDNS service. -const ServiceSize = 10 - -// clusterSize is the maximum number of cluster members we can find. -const clusterSize = 1000 - // NewBroadcast returns a running mdns.Server which broadcasts the service at the given name and address. func NewBroadcast(name string, iface *net.Interface, addr string, port int, service string, txt []byte) (*mdns.Server, error) { var sendTXT []string diff --git a/mdns/version.go b/mdns/version.go index 5f74f26cb..3ee65c2cd 100644 --- a/mdns/version.go +++ b/mdns/version.go @@ -1,4 +1,4 @@ package mdns // Version is the current version of the mDNS broadcast/lookup format. -const Version = "1.0" +const Version = "2.0" diff --git a/service/interface.go b/service/interface.go index aa90d585a..b9f117de3 100644 --- a/service/interface.go +++ b/service/interface.go @@ -2,6 +2,7 @@ package service import ( "context" + "crypto/x509" "github.com/canonical/microcloud/microcloud/api/types" ) @@ -12,11 +13,12 @@ type Service interface { Join(ctx context.Context, config JoinConfig) error IssueToken(ctx context.Context, peer string) (string, error) - DeleteToken(ctx context.Context, tokenName string, address string, secret string) error + DeleteToken(ctx context.Context, tokenName string, address string) error ClusterMembers(ctx context.Context) (map[string]string, error) - RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) - + // RemoteClusterMembers is called during the pre-init phase of microcluster. + // It allows providing the certificate of the remote microcluster member for mTLS verification. + RemoteClusterMembers(ctx context.Context, cert *x509.Certificate, address string) (map[string]string, error) DeleteClusterMember(ctx context.Context, name string, force bool) error Type() types.ServiceType diff --git a/service/lxd.go b/service/lxd.go index e2a47f8c6..934cd812d 100644 --- a/service/lxd.go +++ b/service/lxd.go @@ -2,6 +2,7 @@ package service import ( "context" + "crypto/x509" "fmt" "net" "net/http" @@ -14,6 +15,7 @@ import ( "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/logger" "github.com/canonical/microcluster/v2/microcluster" + microTypes "github.com/canonical/microcluster/v2/rest/types" "golang.org/x/mod/semver" "github.com/canonical/microcloud/microcloud/api/types" @@ -47,8 +49,7 @@ func NewLXDService(name string, addr string, cloudDir string) (*LXDService, erro } // Client returns a client to the LXD unix socket. -// The secret should be specified when the request is going to be forwarded to a remote address, such as with UseTarget. -func (s LXDService) Client(ctx context.Context, secret string) (lxd.InstanceServer, error) { +func (s LXDService) Client(ctx context.Context) (lxd.InstanceServer, error) { c, err := s.m.LocalClient() if err != nil { return nil, err @@ -57,12 +58,13 @@ func (s LXDService) Client(ctx context.Context, secret string) (lxd.InstanceServ return lxd.ConnectLXDUnixWithContext(ctx, s.m.FileSystem.ControlSocket().URL.Host, &lxd.ConnectionArgs{ HTTPClient: c.Client.Client, SkipGetServer: true, - Proxy: cloudClient.AuthProxy(secret, types.LXD), + Proxy: cloudClient.AuthProxy("", types.LXD), }) } // remoteClient returns an https client for the given address:port. -func (s LXDService) remoteClient(secret string, address string, port int64) (lxd.InstanceServer, error) { +// It picks the cluster certificate if none is provided to verify the remote. +func (s LXDService) remoteClient(cert *x509.Certificate, address string, port int64) (lxd.InstanceServer, error) { c, err := s.m.RemoteClient(util.CanonicalNetworkAddress(address, port)) if err != nil { return nil, err @@ -73,14 +75,27 @@ func (s LXDService) remoteClient(secret string, address string, port int64) (lxd return nil, err } + // Use the cluster certificate if none is provided. + if cert == nil { + clusterCert, err := s.m.FileSystem.ClusterCert() + if err != nil { + return nil, err + } + + cert, err = clusterCert.PublicKeyX509() + if err != nil { + return nil, err + } + } + remoteURL := c.URL() client, err := lxd.ConnectLXD(remoteURL.String(), &lxd.ConnectionArgs{ - HTTPClient: c.Client.Client, - TLSClientCert: string(serverCert.PublicKey()), - TLSClientKey: string(serverCert.PrivateKey()), - InsecureSkipVerify: true, - SkipGetServer: true, - Proxy: cloudClient.AuthProxy(secret, types.LXD), + HTTPClient: c.Client.Client, + TLSClientCert: string(serverCert.PublicKey()), + TLSClientKey: string(serverCert.PrivateKey()), + TLSServerCert: microTypes.X509Certificate{Certificate: cert}.String(), + SkipGetServer: true, + Proxy: cloudClient.AuthProxy("", types.LXD), }) if err != nil { return nil, err @@ -91,7 +106,7 @@ func (s LXDService) remoteClient(secret string, address string, port int64) (lxd // Bootstrap bootstraps the LXD daemon on the default port. func (s LXDService) Bootstrap(ctx context.Context) error { - client, err := s.Client(ctx, "") + client, err := s.Client(ctx) if err != nil { return err } @@ -178,7 +193,7 @@ func (s LXDService) Join(ctx context.Context, joinConfig JoinConfig) error { } config.Cluster.MemberConfig = joinConfig.LXDConfig - client, err := s.Client(ctx, "") + client, err := s.Client(ctx) if err != nil { return err } @@ -213,7 +228,7 @@ func (s LXDService) Join(ctx context.Context, joinConfig JoinConfig) error { // IssueToken issues a token for the given peer. func (s LXDService) IssueToken(ctx context.Context, peer string) (string, error) { - client, err := s.Client(ctx, "") + client, err := s.Client(ctx) if err != nil { return "", err } @@ -233,13 +248,13 @@ func (s LXDService) IssueToken(ctx context.Context, peer string) (string, error) } // DeleteToken deletes a token by its name. -func (s LXDService) DeleteToken(ctx context.Context, tokenName string, address string, secret string) error { +func (s LXDService) DeleteToken(ctx context.Context, tokenName string, address string) error { var c lxd.InstanceServer var err error - if address != "" && secret != "" { - c, err = s.remoteClient(secret, address, CloudPort) + if address != "" { + c, err = s.remoteClient(nil, address, CloudPort) } else { - c, err = s.Client(ctx, secret) + c, err = s.Client(ctx) } if err != nil { @@ -280,9 +295,10 @@ func (s LXDService) DeleteToken(ctx context.Context, tokenName string, address s return fmt.Errorf("No corresponding join token operation found for %q", tokenName) } -// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. -func (s LXDService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { - client, err := s.remoteClient(secret, address, CloudPort) +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address. +// Provide the certificate of the remote server for mTLS. +func (s LXDService) RemoteClusterMembers(ctx context.Context, cert *x509.Certificate, address string) (map[string]string, error) { + client, err := s.remoteClient(cert, address, CloudPort) if err != nil { return nil, err } @@ -292,7 +308,7 @@ func (s LXDService) RemoteClusterMembers(ctx context.Context, secret string, add // ClusterMembers returns a map of cluster member names. func (s LXDService) ClusterMembers(ctx context.Context) (map[string]string, error) { - client, err := s.Client(ctx, "") + client, err := s.Client(ctx) if err != nil { return nil, err } @@ -332,7 +348,7 @@ func (s LXDService) clusterMembers(client lxd.InstanceServer) (map[string]string // DeleteClusterMember removes the given cluster member from the service. func (s LXDService) DeleteClusterMember(ctx context.Context, name string, force bool) error { - c, err := s.Client(ctx, "") + c, err := s.Client(ctx) if err != nil { return err } @@ -372,16 +388,16 @@ func (s *LXDService) SetConfig(config map[string]string) { } // HasExtension checks if the server supports the API extension. -func (s *LXDService) HasExtension(ctx context.Context, target string, address string, secret string, apiExtension string) (bool, error) { +func (s *LXDService) HasExtension(ctx context.Context, target string, address string, cert *x509.Certificate, apiExtension string) (bool, error) { var err error var client lxd.InstanceServer if s.Name() == target { - client, err = s.Client(ctx, secret) + client, err = s.Client(ctx) if err != nil { return false, err } } else { - client, err = s.remoteClient(secret, address, CloudPort) + client, err = s.remoteClient(cert, address, CloudPort) if err != nil { return false, err } @@ -401,16 +417,16 @@ func (s *LXDService) HasExtension(ctx context.Context, target string, address st // GetResources returns the system resources for the LXD target. // As we cannot guarantee that LXD is available on this machine, the request is // forwarded through MicroCloud on via the ListenPort argument. -func (s *LXDService) GetResources(ctx context.Context, target string, address string, secret string) (*api.Resources, error) { +func (s *LXDService) GetResources(ctx context.Context, target string, address string, cert *x509.Certificate) (*api.Resources, error) { var err error var client lxd.InstanceServer if s.Name() == target { - client, err = s.Client(ctx, secret) + client, err = s.Client(ctx) if err != nil { return nil, err } } else { - client, err = s.remoteClient(secret, address, CloudPort) + client, err = s.remoteClient(cert, address, CloudPort) if err != nil { return nil, err } @@ -420,13 +436,13 @@ func (s *LXDService) GetResources(ctx context.Context, target string, address st } // GetStoragePools fetches the list of all storage pools from LXD, keyed by pool name. -func (s LXDService) GetStoragePools(ctx context.Context, name string, address string, secret string) (map[string]api.StoragePool, error) { +func (s LXDService) GetStoragePools(ctx context.Context, name string, address string, cert *x509.Certificate) (map[string]api.StoragePool, error) { var err error var client lxd.InstanceServer if name == s.Name() { - client, err = s.Client(ctx, "") + client, err = s.Client(ctx) } else { - client, err = s.remoteClient(secret, address, CloudPort) + client, err = s.remoteClient(cert, address, CloudPort) } if err != nil { @@ -448,12 +464,12 @@ func (s LXDService) GetStoragePools(ctx context.Context, name string, address st // GetConfig returns the member-specific and cluster-wide configurations of LXD. // If LXD is not clustered, it just returns the member-specific configuration. -func (s LXDService) GetConfig(ctx context.Context, clustered bool, name string, address string, secret string) (localConfig map[string]any, globalConfig map[string]any, err error) { +func (s LXDService) GetConfig(ctx context.Context, clustered bool, name string, address string, cert *x509.Certificate) (localConfig map[string]any, globalConfig map[string]any, err error) { var client lxd.InstanceServer if name == s.Name() { - client, err = s.Client(ctx, "") + client, err = s.Client(ctx) } else { - client, err = s.remoteClient(secret, address, CloudPort) + client, err = s.remoteClient(cert, address, CloudPort) } if err != nil { @@ -537,13 +553,13 @@ type DedicatedInterface struct { // - A map of ceph compatible networks keyed by interface name. // - A map of ovn compatible networks keyed by interface name. // - The list of all networks. -func (s LXDService) GetNetworkInterfaces(ctx context.Context, name string, address string, secret string) (map[string]api.Network, map[string]DedicatedInterface, []api.Network, error) { +func (s LXDService) GetNetworkInterfaces(ctx context.Context, name string, address string, cert *x509.Certificate) (map[string]api.Network, map[string]DedicatedInterface, []api.Network, error) { var err error var client lxd.InstanceServer if name == s.Name() { - client, err = s.Client(ctx, "") + client, err = s.Client(ctx) } else { - client, err = s.remoteClient(secret, address, CloudPort) + client, err = s.remoteClient(cert, address, CloudPort) } if err != nil { @@ -635,7 +651,7 @@ func (s *LXDService) isInitialized(c lxd.InstanceServer) (bool, error) { // Restart requests LXD to shutdown, then waits until it is ready. func (s *LXDService) Restart(ctx context.Context, timeoutSeconds int) error { - c, err := s.Client(ctx, "") + c, err := s.Client(ctx) if err != nil { return err } @@ -786,7 +802,7 @@ func (s LXDService) defaultGatewaySubnetV4() (*net.IPNet, string, error) { // SupportsFeature checks if the specified API feature of this Service instance if supported. func (s LXDService) SupportsFeature(ctx context.Context, feature string) (bool, error) { - c, err := s.Client(ctx, "") + c, err := s.Client(ctx) if err != nil { return false, err } diff --git a/service/lxd_join.go b/service/lxd_join.go index e9e169239..32a797974 100644 --- a/service/lxd_join.go +++ b/service/lxd_join.go @@ -23,7 +23,7 @@ func (s *LXDService) configFromToken(token string) (*api.ClusterPut, error) { ServerAddress: util.CanonicalNetworkAddress(s.address, s.port), } - ok, err := s.HasExtension(context.Background(), s.Name(), s.Address(), "", "explicit_trust_token") + ok, err := s.HasExtension(context.Background(), s.Name(), s.Address(), nil, "explicit_trust_token") if err != nil { return nil, err } diff --git a/service/microceph.go b/service/microceph.go index 8467d64a9..507d773d7 100644 --- a/service/microceph.go +++ b/service/microceph.go @@ -2,6 +2,7 @@ package service import ( "context" + "crypto/x509" "fmt" "net/http" "net/url" @@ -55,8 +56,7 @@ func NewCephService(name string, addr string, cloudDir string) (*CephService, er } // Client returns a client to the Ceph unix socket. If target is specified, it will be added to the query params. -// If secret is specified, it will be added to the request header. -func (s CephService) Client(target string, secret string) (*client.Client, error) { +func (s CephService) Client(target string) (*client.Client, error) { c, err := s.m.LocalClient() if err != nil { return nil, err @@ -66,7 +66,7 @@ func (s CephService) Client(target string, secret string) (*client.Client, error c = c.UseTarget(target) } - c, err = cloudClient.UseAuthProxy(c, secret, types.MicroCeph) + c, err = cloudClient.UseAuthProxy(c, types.MicroCeph, cloudClient.AuthConfig{}) if err != nil { return nil, err } @@ -108,16 +108,16 @@ func (s CephService) IssueToken(ctx context.Context, peer string) (string, error } // DeleteToken deletes a token by its name. -func (s CephService) DeleteToken(ctx context.Context, tokenName string, address string, secret string) error { +func (s CephService) DeleteToken(ctx context.Context, tokenName string, address string) error { var c *client.Client var err error - if address != "" && secret != "" { + if address != "" { c, err = s.m.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) if err != nil { return err } - c, err = cloudClient.UseAuthProxy(c, secret, types.MicroCeph) + c, err = cloudClient.UseAuthProxy(c, types.MicroCeph, cloudClient.AuthConfig{}) } else { c, err = s.m.LocalClient() } @@ -136,7 +136,7 @@ func (s CephService) Join(ctx context.Context, joinConfig JoinConfig) error { return err } - c, err := s.Client("", "") + c, err := s.Client("") if err != nil { return err } @@ -151,14 +151,35 @@ func (s CephService) Join(ctx context.Context, joinConfig JoinConfig) error { return nil } -// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. -func (s CephService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { - client, err := s.m.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) +// remoteClient returns an https client for the given address:port. +// It picks the cluster certificate if none is provided to verify the remote. +func (s CephService) remoteClient(cert *x509.Certificate, address string) (*client.Client, error) { + var err error + var client *client.Client + + canonicalAddress := util.CanonicalNetworkAddress(address, CloudPort) + if cert != nil { + client, err = s.m.RemoteClientWithCert(canonicalAddress, cert) + } else { + client, err = s.m.RemoteClient(canonicalAddress) + } + + if err != nil { + return nil, err + } + + return client, nil +} + +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address. +// Provide the certificate of the remote server for mTLS. +func (s CephService) RemoteClusterMembers(ctx context.Context, cert *x509.Certificate, address string) (map[string]string, error) { + client, err := s.remoteClient(cert, address) if err != nil { return nil, err } - client, err = cloudClient.UseAuthProxy(client, secret, types.MicroCeph) + client, err = cloudClient.UseAuthProxy(client, types.MicroCeph, cloudClient.AuthConfig{}) if err != nil { return nil, err } @@ -168,7 +189,7 @@ func (s CephService) RemoteClusterMembers(ctx context.Context, secret string, ad // ClusterMembers returns a map of cluster member names and addresses. func (s CephService) ClusterMembers(ctx context.Context) (map[string]string, error) { - client, err := s.Client("", "") + client, err := s.Client("") if err != nil { return nil, err } @@ -187,21 +208,21 @@ func (s CephService) DeleteClusterMember(ctx context.Context, name string, force } // ClusterConfig returns the Ceph cluster configuration. -func (s CephService) ClusterConfig(ctx context.Context, targetAddress string, targetSecret string) (map[string]string, error) { +func (s CephService) ClusterConfig(ctx context.Context, targetAddress string, cert *x509.Certificate) (map[string]string, error) { var c *client.Client var err error - if targetAddress == "" && targetSecret == "" { - c, err = s.Client("", "") + if targetAddress == "" { + c, err = s.Client("") if err != nil { return nil, err } } else { - c, err = s.m.RemoteClient(util.CanonicalNetworkAddress(targetAddress, CloudPort)) + c, err = s.remoteClient(cert, targetAddress) if err != nil { return nil, err } - c, err = cloudClient.UseAuthProxy(c, targetSecret, types.MicroCeph) + c, err = cloudClient.UseAuthProxy(c, types.MicroCeph, cloudClient.AuthConfig{}) if err != nil { return nil, err } diff --git a/service/microcloud.go b/service/microcloud.go index eefd3d17a..783ae1fe8 100644 --- a/service/microcloud.go +++ b/service/microcloud.go @@ -2,11 +2,13 @@ package service import ( "context" + "crypto/x509" "fmt" "strconv" "time" "github.com/canonical/lxd/lxd/util" + "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/logger" cephTypes "github.com/canonical/microceph/microceph/api/types" @@ -14,6 +16,7 @@ import ( "github.com/canonical/microcluster/v2/microcluster" "github.com/canonical/microcluster/v2/rest" "github.com/canonical/microcluster/v2/state" + "github.com/gorilla/websocket" "github.com/canonical/microcloud/microcloud/api/types" "github.com/canonical/microcloud/microcloud/client" @@ -39,6 +42,12 @@ type JoinConfig struct { OVNConfig map[string]string } +// Status represents information about a cluster member. +// It represents microcluster's internal Server type and implements a subset of it. +type Status struct { + Name string `json:"name" yaml:"name"` +} + // NewCloudService creates a new MicroCloud service with a client attached. func NewCloudService(name string, addr string, dir string) (*CloudService, error) { client, err := microcluster.App(microcluster.Args{StateDir: dir}) @@ -63,9 +72,19 @@ func (s *CloudService) StartCloud(ctx context.Context, service *Handler, endpoin Version: version.Version, PreInitListenAddress: "[::]:" + strconv.FormatInt(CloudPort, 10), Hooks: &state.Hooks{ - PostBootstrap: func(ctx context.Context, s state.State, cfg map[string]string) error { return service.StopBroadcast() }, - PostJoin: func(ctx context.Context, s state.State, cfg map[string]string) error { return service.StopBroadcast() }, - OnStart: service.Start, + PostJoin: func(ctx context.Context, s state.State, cfg map[string]string) error { + // If the node has joined close the session. + // This will signal to the client to exit out gracefully + // and ultimately lead to the closing of the websocket connection. + // Prevent blocking of the hook by also watching the outer context. + select { + case service.Session.ExitCh() <- true: + case <-ctx.Done(): + } + + return nil + }, + OnStart: service.Start, }, ExtensionServers: map[string]rest.Server{ "microcloud": { @@ -124,16 +143,16 @@ func (s CloudService) IssueToken(ctx context.Context, peer string) (string, erro } // DeleteToken deletes a token by its name. -func (s CloudService) DeleteToken(ctx context.Context, tokenName string, address string, secret string) error { +func (s CloudService) DeleteToken(ctx context.Context, tokenName string, address string) error { var c *microClient.Client var err error - if address != "" && secret != "" { + if address != "" { c, err = s.client.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) if err != nil { return err } - c, err = cloudClient.UseAuthProxy(c, secret, types.MicroCloud) + c, err = cloudClient.UseAuthProxy(c, types.MicroCloud, cloudClient.AuthConfig{}) } else { c, err = s.client.LocalClient() } @@ -146,13 +165,13 @@ func (s CloudService) DeleteToken(ctx context.Context, tokenName string, address } // RemoteIssueToken issues a token for the given peer on a remote MicroCloud where we are authorized by mDNS. -func (s CloudService) RemoteIssueToken(ctx context.Context, clusterAddress string, secret string, peer string, serviceType types.ServiceType) (string, error) { +func (s CloudService) RemoteIssueToken(ctx context.Context, clusterAddress string, peer string, serviceType types.ServiceType) (string, error) { c, err := s.client.RemoteClient(util.CanonicalNetworkAddress(clusterAddress, CloudPort)) if err != nil { return "", err } - c, err = cloudClient.UseAuthProxy(c, secret, types.MicroCloud) + c, err = cloudClient.UseAuthProxy(c, types.MicroCloud, cloudClient.AuthConfig{}) if err != nil { return "", err } @@ -165,8 +184,28 @@ func (s CloudService) Join(ctx context.Context, joinConfig JoinConfig) error { return s.client.JoinCluster(ctx, s.name, util.CanonicalNetworkAddress(s.address, s.port), joinConfig.Token, nil) } +// remoteClient returns an https client for the given address:port. +// It picks the cluster certificate if none is provided to verify the remote. +func (s CloudService) remoteClient(cert *x509.Certificate, address string) (*microClient.Client, error) { + var err error + var client *microClient.Client + + canonicalAddress := util.CanonicalNetworkAddress(address, CloudPort) + if cert != nil { + client, err = s.client.RemoteClientWithCert(canonicalAddress, cert) + } else { + client, err = s.client.RemoteClient(canonicalAddress) + } + + if err != nil { + return nil, err + } + + return client, nil +} + // RequestJoin sends the signal to initiate a join to the remote system, or timeout after a maximum of 5 min. -func (s CloudService) RequestJoin(ctx context.Context, secret string, name string, joinConfig types.ServicesPut) error { +func (s CloudService) RequestJoin(ctx context.Context, name string, cert *x509.Certificate, joinConfig types.ServicesPut) error { ctx, cancel := context.WithTimeout(ctx, time.Minute*5) defer cancel() @@ -178,12 +217,12 @@ func (s CloudService) RequestJoin(ctx context.Context, secret string, name strin return err } } else { - c, err = s.client.RemoteClient(util.CanonicalNetworkAddress(joinConfig.Address, CloudPort)) + c, err = s.remoteClient(cert, joinConfig.Address) if err != nil { return err } - c, err = cloudClient.UseAuthProxy(c, secret, types.MicroCloud) + c, err = cloudClient.UseAuthProxy(c, types.MicroCloud, cloudClient.AuthConfig{}) if err != nil { return err } @@ -192,14 +231,30 @@ func (s CloudService) RequestJoin(ctx context.Context, secret string, name strin return client.JoinServices(ctx, c, joinConfig) } -// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. -func (s CloudService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { - client, err := s.client.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) +// RequestJoinIntent send the intent to join the remote cluster. +func (s CloudService) RequestJoinIntent(ctx context.Context, clusterAddress string, conf cloudClient.AuthConfig, intent types.SessionJoinPost) (*x509.Certificate, error) { + c, err := s.client.RemoteClientWithCert(util.CanonicalNetworkAddress(clusterAddress, CloudPort), conf.TLSServerCertificate) + if err != nil { + return nil, err + } + + c, err = cloudClient.UseAuthProxy(c, types.MicroCloud, conf) + if err != nil { + return nil, err + } + + return client.JoinIntent(ctx, c, intent) +} + +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address. +// Provide the certificate of the remote server for mTLS. +func (s CloudService) RemoteClusterMembers(ctx context.Context, cert *x509.Certificate, address string) (map[string]string, error) { + client, err := s.remoteClient(cert, address) if err != nil { return nil, err } - client, err = cloudClient.UseAuthProxy(client, secret, types.MicroCloud) + client, err = cloudClient.UseAuthProxy(client, types.MicroCloud, cloudClient.AuthConfig{}) if err != nil { return nil, err } @@ -207,6 +262,27 @@ func (s CloudService) RemoteClusterMembers(ctx context.Context, secret string, a return clusterMembers(ctx, client) } +// RemoteStatus returns the status of a remote member which doesn't have to be part of any cluster. +func (s CloudService) RemoteStatus(ctx context.Context, cert *x509.Certificate, address string) (*Status, error) { + client, err := s.remoteClient(cert, address) + if err != nil { + return nil, err + } + + client, err = cloudClient.UseAuthProxy(client, types.MicroCloud, cloudClient.AuthConfig{}) + if err != nil { + return nil, err + } + + status := Status{} + err = client.Query(ctx, "GET", "core/1.0", nil, nil, &status) + if err != nil { + return nil, fmt.Errorf("Failed to get status: %w", err) + } + + return &status, nil +} + // ClusterMembers returns a map of cluster member names and addresses. func (s CloudService) ClusterMembers(ctx context.Context) (map[string]string, error) { client, err := s.client.LocalClient() @@ -287,3 +363,23 @@ func (s *CloudService) SupportsFeature(ctx context.Context, feature string) (boo return server.Extensions.HasExtension(feature), nil } + +// ServerCert returns the local clusters server certificate. +func (s *CloudService) ServerCert() (*shared.CertInfo, error) { + return s.client.FileSystem.ServerCert() +} + +// ClusterCert returns the local clusters certificate. +func (s *CloudService) ClusterCert() (*shared.CertInfo, error) { + return s.client.FileSystem.ClusterCert() +} + +// StartSession starts a trust establishment session via the unix socket. +func (s *CloudService) StartSession(ctx context.Context, role string, sessionTimeout time.Duration) (*websocket.Conn, error) { + c, err := s.client.LocalClient() + if err != nil { + return nil, err + } + + return client.StartSession(ctx, c, role, sessionTimeout) +} diff --git a/service/microovn.go b/service/microovn.go index fb949b489..a906eae1f 100644 --- a/service/microovn.go +++ b/service/microovn.go @@ -2,6 +2,7 @@ package service import ( "context" + "crypto/x509" "fmt" "net/http" "net/url" @@ -91,16 +92,16 @@ func (s OVNService) IssueToken(ctx context.Context, peer string) (string, error) } // DeleteToken deletes a token by its name. -func (s OVNService) DeleteToken(ctx context.Context, tokenName string, address string, secret string) error { +func (s OVNService) DeleteToken(ctx context.Context, tokenName string, address string) error { var c *client.Client var err error - if address != "" && secret != "" { + if address != "" { c, err = s.m.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) if err != nil { return err } - c, err = cloudClient.UseAuthProxy(c, secret, types.MicroOVN) + c, err = cloudClient.UseAuthProxy(c, types.MicroOVN, cloudClient.AuthConfig{}) } else { c, err = s.m.LocalClient() } @@ -117,14 +118,26 @@ func (s OVNService) Join(ctx context.Context, joinConfig JoinConfig) error { return s.m.JoinCluster(ctx, s.name, util.CanonicalNetworkAddress(s.address, s.port), joinConfig.Token, joinConfig.OVNConfig) } -// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. -func (s OVNService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { - client, err := s.m.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) - if err != nil { - return nil, err +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address. +// Provide the certificate of the remote server for mTLS. +func (s OVNService) RemoteClusterMembers(ctx context.Context, cert *x509.Certificate, address string) (map[string]string, error) { + var err error + var client *client.Client + + canonicalAddress := util.CanonicalNetworkAddress(address, CloudPort) + if cert != nil { + client, err = s.m.RemoteClientWithCert(canonicalAddress, cert) + if err != nil { + return nil, err + } + } else { + client, err = s.m.RemoteClient(canonicalAddress) + if err != nil { + return nil, err + } } - client, err = cloudClient.UseAuthProxy(client, secret, types.MicroOVN) + client, err = cloudClient.UseAuthProxy(client, types.MicroOVN, cloudClient.AuthConfig{}) if err != nil { return nil, err } diff --git a/service/service_handler.go b/service/service_handler.go index 254c033cd..89fb95d33 100644 --- a/service/service_handler.go +++ b/service/service_handler.go @@ -2,21 +2,19 @@ package service import ( "context" - "encoding/json" + "crypto/x509" "fmt" - "math" - "net" + "net/http" "os" "path/filepath" "sync" - "github.com/canonical/lxd/shared" + "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/logger" "github.com/canonical/microcluster/v2/state" - "github.com/hashicorp/mdns" "github.com/canonical/microcloud/microcloud/api/types" - cloudMDNS "github.com/canonical/microcloud/microcloud/mdns" + cloudClient "github.com/canonical/microcloud/microcloud/client" ) const ( @@ -35,14 +33,13 @@ const ( // Handler holds a set of services and an mdns server for communication between them. type Handler struct { - servers []*mdns.Server - Services map[types.ServiceType]Service Name string Address string Port int64 - AuthSecret string + sessionLock sync.RWMutex + Session *Session } // NewHandler creates a new Handler with a client for each of the given services. @@ -70,7 +67,6 @@ func NewHandler(name string, addr string, stateDir string, services ...types.Ser } return &Handler{ - servers: []*mdns.Server{}, Services: servicesMap, Name: name, Address: addr, @@ -92,90 +88,6 @@ func (s *Handler) Start(ctx context.Context, state state.State) error { logger.Error("Failed to restart LXD", logger.Ctx{"error": err}) } - s.AuthSecret, err = shared.RandomCryptoString() - if err != nil { - return err - } - - return s.Broadcast() -} - -// Broadcast broadcasts service information over mDNS. -func (s *Handler) Broadcast() error { - services := make([]types.ServiceType, 0, len(s.Services)) - for service := range s.Services { - services = append(services, service) - } - - networks, err := cloudMDNS.GetNetworkInfo() - if err != nil { - return err - } - - info := cloudMDNS.ServerInfo{ - Version: cloudMDNS.Version, - Name: s.Name, - Services: services, - AuthSecret: s.AuthSecret, - } - - // Prepare up to `ServiceSize` variations of the broadcast for each network interface. - broadcasts := make([][]cloudMDNS.ServerInfo, cloudMDNS.ServiceSize) - for i, net := range networks { - info.Address = net.Address - info.Interface = net.Interface.Name - - services := broadcasts[i%cloudMDNS.ServiceSize] - if services == nil { - services = []cloudMDNS.ServerInfo{} - } - - services = append(services, info) - broadcasts[i%cloudMDNS.ServiceSize] = services - } - - // Broadcast up to `ServiceSize` times with different service names before overlapping. - // The lookup won't know how many records there are, so this will reduce the amount of - // overlapping records preventing us from finding new ones. - for i, payloads := range broadcasts { - service := fmt.Sprintf("%s_%d", cloudMDNS.ClusterService, i) - for _, info := range payloads { - bytes, err := json.Marshal(info) - if err != nil { - return fmt.Errorf("Failed to marshal server info: %w", err) - } - - iface, err := net.InterfaceByName(info.Interface) - if err != nil { - return err - } - - if s.Port < 1 || s.Port > math.MaxUint16 { - return fmt.Errorf("Port number for service %q (%q) is out of range", s.Name, s.Port) - } - - server, err := cloudMDNS.NewBroadcast(info.LookupKey(), iface, info.Address, int(s.Port), service, bytes) - if err != nil { - return err - } - - s.servers = append(s.servers, server) - } - } - - return nil -} - -// StopBroadcast stops the mDNS broadcast and token lookup, as we are initiating a new cluster. -func (s *Handler) StopBroadcast() error { - for i, server := range s.servers { - service := fmt.Sprintf("%s_%d", cloudMDNS.ClusterService, i) - err := server.Shutdown() - if err != nil { - return fmt.Errorf("Failed to shut down %q server: %w", service, err) - } - } - return nil } @@ -231,6 +143,80 @@ func (s *Handler) RunConcurrent(firstService types.ServiceType, lastService type return nil } +// StartSession starts a new local trust establishment session. +func (s *Handler) StartSession(role types.SessionRole, passphrase string, gw *cloudClient.WebsocketGateway) error { + session, err := NewSession(role, passphrase, gw) + if err != nil { + return err + } + + s.sessionLock.Lock() + s.Session = session + s.sessionLock.Unlock() + + return nil +} + +// StopSession stops the current session started on this handler. +// If there isn't an active session it's a no-op. +func (s *Handler) StopSession(cause error) error { + s.sessionLock.Lock() + defer s.sessionLock.Unlock() + + if s.Session != nil { + err := s.Session.Stop(cause) + if err != nil { + return fmt.Errorf("Failed to stop session: %w", err) + } + } + + return nil +} + +// ActiveSession returns true if there is an active trust establishment session. +func (s *Handler) ActiveSession() bool { + // Try to open a transaction in the current session. + // If it succeeds there is an active session. + err := s.SessionTransaction(true, func(session *Session) error { + return nil + }) + return err == nil +} + +// SessionTransaction allows running f within the current handler's session. +// It allows running multiple operations on the handler's session struct without always +// checking if the session is still alive. +// Set readOnly to false if you don't modify the session. +// Set it to false if you intend to perform any modifications on the session. +func (s *Handler) SessionTransaction(readOnly bool, f func(session *Session) error) error { + if readOnly { + s.sessionLock.RLock() + defer s.sessionLock.RUnlock() + } else { + s.sessionLock.Lock() + defer s.sessionLock.Unlock() + } + + if s.Session == nil || s.Session != nil && s.Session.Passphrase() == "" { + return api.NewStatusError(http.StatusBadRequest, "No active session") + } + + return f(s.Session) +} + +// TemporaryTrustStore returns a copy of the trust establishment's session truststore. +func (s *Handler) TemporaryTrustStore() map[string]x509.Certificate { + var trustStore = make(map[string]x509.Certificate, 0) + + // Ignore the error from the session and return the empty trust store instead. + _ = s.SessionTransaction(true, func(session *Session) error { + trustStore = session.TemporaryTrustStore() + return nil + }) + + return trustStore +} + // Exists returns true if we can stat the unix socket in the state directory of the given service. func Exists(service types.ServiceType, stateDir string) bool { socketPath := filepath.Join(stateDir, "control.socket") diff --git a/service/session.go b/service/session.go new file mode 100644 index 000000000..361d24913 --- /dev/null +++ b/service/session.go @@ -0,0 +1,233 @@ +package service + +import ( + "crypto/rand" + "crypto/x509" + "encoding/json" + "errors" + "fmt" + "math/big" + "net" + "strings" + "sync" + + "github.com/canonical/lxd/shared" + "github.com/hashicorp/mdns" + + "github.com/canonical/microcloud/microcloud/api/types" + cloudClient "github.com/canonical/microcloud/microcloud/client" + cloudMDNS "github.com/canonical/microcloud/microcloud/mdns" +) + +// AllowedFailedJoinAttempts contains the number of allowed failed session join attempts. +const AllowedFailedJoinAttempts uint8 = 50 + +// Session represents a local trust establishment session. +type Session struct { + lock sync.RWMutex + passphrase string + server *mdns.Server + trustStore map[string]x509.Certificate + failedAttempts uint8 + gw *cloudClient.WebsocketGateway + role types.SessionRole + + joinIntentFingerprints []string + joinIntents chan types.SessionJoinPost + exit chan bool +} + +// generatePassphrase returns four random words chosen from wordlist. +// The words are separated by space. +func generatePassphrase() (string, error) { + splitWordlist := strings.Split(wordlist, "\n") + wordlistLength := int64(len(splitWordlist)) + + var randomWords = make([]string, 4) + for i := 0; i < 4; i++ { + randomNumber, err := rand.Int(rand.Reader, big.NewInt(wordlistLength)) + if err != nil { + return "", fmt.Errorf("Failed to get random number: %w", err) + } + + splitLine := strings.Split(splitWordlist[randomNumber.Int64()], "\t") + splitLineLength := len(splitLine) + if splitLineLength != 2 { + return "", fmt.Errorf("Invalid wordlist line: %q", splitWordlist[randomNumber.Int64()]) + } + + randomWords[i] = splitLine[1] + } + + return strings.Join(randomWords, " "), nil +} + +// NewSession returns a new local trust establishment session. +func NewSession(role types.SessionRole, passphrase string, gw *cloudClient.WebsocketGateway) (*Session, error) { + var err error + + if passphrase == "" { + passphrase, err = generatePassphrase() + if err != nil { + return nil, err + } + } + + return &Session{ + passphrase: passphrase, + trustStore: make(map[string]x509.Certificate), + gw: gw, + role: role, + + joinIntents: make(chan types.SessionJoinPost), + exit: make(chan bool), + }, nil +} + +// Passphrase returns the passphrase of the current trust establishment session. +func (s *Session) Passphrase() string { + s.lock.RLock() + defer s.lock.RUnlock() + + return s.passphrase +} + +// Role returns the role of the current trust establishment session. +func (s *Session) Role() types.SessionRole { + s.lock.RLock() + defer s.lock.RUnlock() + + return s.role +} + +// Broadcast starts a new mDNS listener in the current trust establishment session. +func (s *Session) Broadcast(name string, address string, ifaceName string) error { + info := cloudMDNS.ServerInfo{ + Version: cloudMDNS.Version, + Name: name, + Address: address, + } + + bytes, err := json.Marshal(info) + if err != nil { + return fmt.Errorf("Failed to marshal server info: %w", err) + } + + iface, err := net.InterfaceByName(ifaceName) + if err != nil { + return fmt.Errorf("Failed to get interface %q by name: %w", ifaceName, err) + } + + server, err := cloudMDNS.NewBroadcast(name, iface, address, int(CloudPort), cloudMDNS.ClusterService, bytes) + if err != nil { + return err + } + + s.lock.Lock() + s.server = server + s.lock.Unlock() + + return nil +} + +// Allow grants access via the temporary trust store to the given certificate. +func (s *Session) Allow(name string, cert x509.Certificate) { + s.lock.Lock() + defer s.lock.Unlock() + + s.trustStore[name] = cert +} + +// TemporaryTrustStore returns the temporary truststore of the current trust establishment session. +func (s *Session) TemporaryTrustStore() map[string]x509.Certificate { + s.lock.RLock() + defer s.lock.RUnlock() + + // Create a copy of the trust store. + trustStoreCopy := make(map[string]x509.Certificate) + for name, cert := range s.trustStore { + trustStoreCopy[name] = cert + } + + return trustStoreCopy +} + +// RegisterIntent registers the intention to join during the current trust establishment session +// for the given fingerprint. +func (s *Session) RegisterIntent(fingerprint string) error { + s.lock.Lock() + defer s.lock.Unlock() + + if shared.ValueInSlice(fingerprint, s.joinIntentFingerprints) { + return errors.New("Fingerprint already exists") + } + + s.joinIntentFingerprints = append(s.joinIntentFingerprints, fingerprint) + return nil +} + +// RegisterFailedAttempt registers a failed attempt trying to join the current trust establishment session. +func (s *Session) RegisterFailedAttempt() error { + s.lock.Lock() + defer s.lock.Unlock() + + if s.failedAttempts == AllowedFailedJoinAttempts { + return errors.New("Exceeded the number of failed session join attempts") + } + + s.failedAttempts++ + return nil +} + +// IntentCh returns a channel which allows publishing and consuming join intents. +func (s *Session) IntentCh() chan types.SessionJoinPost { + return s.joinIntents +} + +// ExitCh returns a channel which allows waiting on the current trust establishment session. +func (s *Session) ExitCh() chan bool { + return s.exit +} + +// Stop stops the current trust establishment session. +func (s *Session) Stop(cause error) error { + s.lock.Lock() + defer s.lock.Unlock() + + if s.server != nil { + err := s.server.Shutdown() + if err != nil { + return fmt.Errorf("Failed to shutdown mDNS server: %w", err) + } + } + + // If a cause is provided also write it onto the session's websocket + // to notify the client. + if cause != nil { + err := s.gw.WriteClose(cause) + if err != nil { + return fmt.Errorf("Failed to write session stop cause to websocket: %w", err) + } + } + + s.server = nil + s.passphrase = "" + s.trustStore = make(map[string]x509.Certificate, 0) + s.joinIntentFingerprints = []string{} + s.failedAttempts = 0 + + // For idempotency don't try to close the channels twice. + select { + case <-s.joinIntents: + default: + close(s.joinIntents) + } + + select { + case <-s.exit: + default: + close(s.exit) + } + + return nil +} diff --git a/service/system_information.go b/service/system_information.go index 7c57dee8c..50e2e14af 100644 --- a/service/system_information.go +++ b/service/system_information.go @@ -23,9 +23,6 @@ type SystemInformation struct { // ClusterAddress is the default cluster address used for MicroCloud. ClusterAddress string - // AuthSecret authenticates remote connections from the MicroCloud initiator. - AuthSecret string - // AvailableDisks is the list of disks available for use on the system. AvailableDisks map[string]api.ResourcesStorageDisk @@ -78,7 +75,6 @@ func (sh *Handler) CollectSystemInformation(ctx context.Context, connectInfo mdn ExistingServices: map[types.ServiceType]map[string]string{}, ClusterName: connectInfo.Name, ClusterAddress: connectInfo.Address, - AuthSecret: connectInfo.AuthSecret, AvailableDisks: map[string]api.ResourcesStorageDisk{}, AvailableUplinkInterfaces: map[string]api.Network{}, AvailableCephInterfaces: map[string]DedicatedInterface{}, @@ -94,9 +90,9 @@ func (sh *Handler) CollectSystemInformation(ctx context.Context, connectInfo mdn var allResources *api.Resources lxd := sh.Services[types.LXD].(*LXDService) if localSystem { - allResources, err = lxd.GetResources(ctx, s.ClusterName, "", "") + allResources, err = lxd.GetResources(ctx, s.ClusterName, "", nil) } else { - allResources, err = lxd.GetResources(ctx, s.ClusterName, s.ClusterAddress, s.AuthSecret) + allResources, err = lxd.GetResources(ctx, s.ClusterName, s.ClusterAddress, connectInfo.Certificate) } if err != nil { @@ -112,7 +108,7 @@ func (sh *Handler) CollectSystemInformation(ctx context.Context, connectInfo mdn } var allNets []api.Network - uplinkInterfaces, dedicatedInterfaces, allNets, err := lxd.GetNetworkInterfaces(ctx, s.ClusterName, s.ClusterAddress, s.AuthSecret) + uplinkInterfaces, dedicatedInterfaces, allNets, err := lxd.GetNetworkInterfaces(ctx, s.ClusterName, s.ClusterAddress, connectInfo.Certificate) if err != nil { return nil, fmt.Errorf("Failed to get network interfaces on %q: %w", s.ClusterName, err) } @@ -138,7 +134,7 @@ func (sh *Handler) CollectSystemInformation(ctx context.Context, connectInfo mdn } } - pools, err := lxd.GetStoragePools(ctx, s.ClusterName, s.ClusterAddress, s.AuthSecret) + pools, err := lxd.GetStoragePools(ctx, s.ClusterName, s.ClusterAddress, connectInfo.Certificate) if err != nil { return nil, fmt.Errorf("Failed to get storage pools on %q: %w", s.ClusterName, err) } @@ -165,9 +161,9 @@ func (sh *Handler) CollectSystemInformation(ctx context.Context, connectInfo mdn microceph := sh.Services[types.MicroCeph].(*CephService) if localSystem { - s.CephConfig, err = microceph.ClusterConfig(ctx, "", "") + s.CephConfig, err = microceph.ClusterConfig(ctx, "", nil) } else { - s.CephConfig, err = microceph.ClusterConfig(ctx, s.ClusterAddress, s.AuthSecret) + s.CephConfig, err = microceph.ClusterConfig(ctx, s.ClusterAddress, connectInfo.Certificate) } if err != nil && !api.StatusErrorCheck(err, http.StatusServiceUnavailable) { @@ -176,9 +172,9 @@ func (sh *Handler) CollectSystemInformation(ctx context.Context, connectInfo mdn } if localSystem { - s.LXDLocalConfig, s.LXDConfig, err = lxd.GetConfig(ctx, s.ServiceClustered(types.LXD), s.ClusterName, "", "") + s.LXDLocalConfig, s.LXDConfig, err = lxd.GetConfig(ctx, s.ServiceClustered(types.LXD), s.ClusterName, "", nil) } else { - s.LXDLocalConfig, s.LXDConfig, err = lxd.GetConfig(ctx, s.ServiceClustered(types.LXD), s.ClusterName, s.ClusterAddress, s.AuthSecret) + s.LXDLocalConfig, s.LXDConfig, err = lxd.GetConfig(ctx, s.ServiceClustered(types.LXD), s.ClusterName, s.ClusterAddress, connectInfo.Certificate) } if err != nil { @@ -200,7 +196,7 @@ func (sh *Handler) GetExistingClusters(ctx context.Context, connectInfo mdns.Ser if localSystem { existingCluster, err = sh.Services[service].ClusterMembers(ctx) } else { - existingCluster, err = sh.Services[service].RemoteClusterMembers(ctx, connectInfo.AuthSecret, connectInfo.Address) + existingCluster, err = sh.Services[service].RemoteClusterMembers(ctx, connectInfo.Certificate, connectInfo.Address) } if err != nil && !api.StatusErrorCheck(err, http.StatusServiceUnavailable) { diff --git a/service/wordlist.go b/service/wordlist.go new file mode 100644 index 000000000..f60ca0648 --- /dev/null +++ b/service/wordlist.go @@ -0,0 +1,1301 @@ +package service + +// Provided by the EFF under the Creative Commons Attribution License. +// License: http://creativecommons.org/licenses/by/3.0/us/ +// Source: https://www.eff.org/files/2016/09/08/eff_short_wordlist_2_0.txt +var wordlist = `1111 aardvark +1112 abandoned +1113 abbreviate +1114 abdomen +1115 abhorrence +1116 abiding +1121 abnormal +1122 abrasion +1123 absorbing +1124 abundant +1125 abyss +1126 academy +1131 accountant +1132 acetone +1133 achiness +1134 acid +1135 acoustics +1136 acquire +1141 acrobat +1142 actress +1143 acuteness +1144 aerosol +1145 aesthetic +1146 affidavit +1151 afloat +1152 afraid +1153 aftershave +1154 again +1155 agency +1156 aggressor +1161 aghast +1162 agitate +1163 agnostic +1164 agonizing +1165 agreeing +1166 aidless +1211 aimlessly +1212 ajar +1213 alarmclock +1214 albatross +1215 alchemy +1216 alfalfa +1221 algae +1222 aliens +1223 alkaline +1224 almanac +1225 alongside +1226 alphabet +1231 already +1232 also +1233 altitude +1234 aluminum +1235 always +1236 amazingly +1241 ambulance +1242 amendment +1243 amiable +1244 ammunition +1245 amnesty +1246 amoeba +1251 amplifier +1252 amuser +1253 anagram +1254 anchor +1255 android +1256 anesthesia +1261 angelfish +1262 animal +1263 anklet +1264 announcer +1265 anonymous +1266 answer +1311 antelope +1312 anxiety +1313 anyplace +1314 aorta +1315 apartment +1316 apnea +1321 apostrophe +1322 apple +1323 apricot +1324 aquamarine +1325 arachnid +1326 arbitrate +1331 ardently +1332 arena +1333 argument +1334 aristocrat +1335 armchair +1336 aromatic +1341 arrowhead +1342 arsonist +1343 artichoke +1344 asbestos +1345 ascend +1346 aseptic +1351 ashamed +1352 asinine +1353 asleep +1354 asocial +1355 asparagus +1356 astronaut +1361 asymmetric +1362 atlas +1363 atmosphere +1364 atom +1365 atrocious +1366 attic +1411 atypical +1412 auctioneer +1413 auditorium +1414 augmented +1415 auspicious +1416 automobile +1421 auxiliary +1422 avalanche +1423 avenue +1424 aviator +1425 avocado +1426 awareness +1431 awhile +1432 awkward +1433 awning +1434 awoke +1435 axially +1436 azalea +1441 babbling +1442 backpack +1443 badass +1444 bagpipe +1445 bakery +1446 balancing +1451 bamboo +1452 banana +1453 barracuda +1454 basket +1455 bathrobe +1456 bazooka +1461 blade +1462 blender +1463 blimp +1464 blouse +1465 blurred +1466 boatyard +1511 bobcat +1512 body +1513 bogusness +1514 bohemian +1515 boiler +1516 bonnet +1521 boots +1522 borough +1523 bossiness +1524 bottle +1525 bouquet +1526 boxlike +1531 breath +1532 briefcase +1533 broom +1534 brushes +1535 bubblegum +1536 buckle +1541 buddhist +1542 buffalo +1543 bullfrog +1544 bunny +1545 busboy +1546 buzzard +1551 cabin +1552 cactus +1553 cadillac +1554 cafeteria +1555 cage +1556 cahoots +1561 cajoling +1562 cakewalk +1563 calculator +1564 camera +1565 canister +1566 capsule +1611 carrot +1612 cashew +1613 cathedral +1614 caucasian +1615 caviar +1616 ceasefire +1621 cedar +1622 celery +1623 cement +1624 census +1625 ceramics +1626 cesspool +1631 chalkboard +1632 cheesecake +1633 chimney +1634 chlorine +1635 chopsticks +1636 chrome +1641 chute +1642 cilantro +1643 cinnamon +1644 circle +1645 cityscape +1646 civilian +1651 clay +1652 clergyman +1653 clipboard +1654 clock +1655 clubhouse +1656 coathanger +1661 cobweb +1662 coconut +1663 codeword +1664 coexistent +1665 coffeecake +1666 cognitive +2111 cohabitate +2112 collarbone +2113 computer +2114 confetti +2115 copier +2116 cornea +2121 cosmetics +2122 cotton +2123 couch +2124 coverless +2125 coyote +2126 coziness +2131 crawfish +2132 crewmember +2133 crib +2134 croissant +2135 crumble +2136 crystal +2141 cubical +2142 cucumber +2143 cuddly +2144 cufflink +2145 cuisine +2146 culprit +2151 cup +2152 curry +2153 cushion +2154 cuticle +2155 cybernetic +2156 cyclist +2161 cylinder +2162 cymbal +2163 cynicism +2164 cypress +2165 cytoplasm +2166 dachshund +2211 daffodil +2212 dagger +2213 dairy +2214 dalmatian +2215 dandelion +2216 dartboard +2221 dastardly +2222 datebook +2223 daughter +2224 dawn +2225 daytime +2226 dazzler +2231 dealer +2232 debris +2233 decal +2234 dedicate +2235 deepness +2236 defrost +2241 degree +2242 dehydrator +2243 deliverer +2244 democrat +2245 dentist +2246 deodorant +2251 depot +2252 deranged +2253 desktop +2254 detergent +2255 device +2256 dexterity +2261 diamond +2262 dibs +2263 dictionary +2264 diffuser +2265 digit +2266 dilated +2311 dimple +2312 dinnerware +2313 dioxide +2314 diploma +2315 directory +2316 dishcloth +2321 ditto +2322 dividers +2323 dizziness +2324 doctor +2325 dodge +2326 doll +2331 dominoes +2332 donut +2333 doorstep +2334 dorsal +2335 double +2336 downstairs +2341 dozed +2342 drainpipe +2343 dresser +2344 driftwood +2345 droppings +2346 drum +2351 dryer +2352 dubiously +2353 duckling +2354 duffel +2355 dugout +2356 dumpster +2361 duplex +2362 durable +2363 dustpan +2364 dutiful +2365 duvet +2366 dwarfism +2411 dwelling +2412 dwindling +2413 dynamite +2414 dyslexia +2415 eagerness +2416 earlobe +2421 easel +2422 eavesdrop +2423 ebook +2424 eccentric +2425 echoless +2426 eclipse +2431 ecosystem +2432 ecstasy +2433 edged +2434 editor +2435 educator +2436 eelworm +2441 eerie +2442 effects +2443 eggnog +2444 egomaniac +2445 ejection +2446 elastic +2451 elbow +2452 elderly +2453 elephant +2454 elfishly +2455 eliminator +2456 elk +2461 elliptical +2462 elongated +2463 elsewhere +2464 elusive +2465 elves +2466 emancipate +2511 embroidery +2512 emcee +2513 emerald +2514 emission +2515 emoticon +2516 emperor +2521 emulate +2522 enactment +2523 enchilada +2524 endorphin +2525 energy +2526 enforcer +2531 engine +2532 enhance +2533 enigmatic +2534 enjoyably +2535 enlarged +2536 enormous +2541 enquirer +2542 enrollment +2543 ensemble +2544 entryway +2545 enunciate +2546 envoy +2551 enzyme +2552 epidemic +2553 equipment +2554 erasable +2555 ergonomic +2556 erratic +2561 eruption +2562 escalator +2563 eskimo +2564 esophagus +2565 espresso +2566 essay +2611 estrogen +2612 etching +2613 eternal +2614 ethics +2615 etiquette +2616 eucalyptus +2621 eulogy +2622 euphemism +2623 euthanize +2624 evacuation +2625 evergreen +2626 evidence +2631 evolution +2632 exam +2633 excerpt +2634 exerciser +2635 exfoliate +2636 exhale +2641 exist +2642 exorcist +2643 explode +2644 exquisite +2645 exterior +2646 exuberant +2651 fabric +2652 factory +2653 faded +2654 failsafe +2655 falcon +2656 family +2661 fanfare +2662 fasten +2663 faucet +2664 favorite +2665 feasibly +2666 february +3111 federal +3112 feedback +3113 feigned +3114 feline +3115 femur +3116 fence +3121 ferret +3122 festival +3123 fettuccine +3124 feudalist +3125 feverish +3126 fiberglass +3131 fictitious +3132 fiddle +3133 figurine +3134 fillet +3135 finalist +3136 fiscally +3141 fixture +3142 flashlight +3143 fleshiness +3144 flight +3145 florist +3146 flypaper +3151 foamless +3152 focus +3153 foggy +3154 folksong +3155 fondue +3156 footpath +3161 fossil +3162 fountain +3163 fox +3164 fragment +3165 freeway +3166 fridge +3211 frosting +3212 fruit +3213 fryingpan +3214 gadget +3215 gainfully +3216 gallstone +3221 gamekeeper +3222 gangway +3223 garlic +3224 gaslight +3225 gathering +3226 gauntlet +3231 gearbox +3232 gecko +3233 gem +3234 generator +3235 geographer +3236 gerbil +3241 gesture +3242 getaway +3243 geyser +3244 ghoulishly +3245 gibberish +3246 giddiness +3251 giftshop +3252 gigabyte +3253 gimmick +3254 giraffe +3255 giveaway +3256 gizmo +3261 glasses +3262 gleeful +3263 glisten +3264 glove +3265 glucose +3266 glycerin +3311 gnarly +3312 gnomish +3313 goatskin +3314 goggles +3315 goldfish +3316 gong +3321 gooey +3322 gorgeous +3323 gosling +3324 gothic +3325 gourmet +3326 governor +3331 grape +3332 greyhound +3333 grill +3334 groundhog +3335 grumbling +3336 guacamole +3341 guerrilla +3342 guitar +3343 gullible +3344 gumdrop +3345 gurgling +3346 gusto +3351 gutless +3352 gymnast +3353 gynecology +3354 gyration +3355 habitat +3356 hacking +3361 haggard +3362 haiku +3363 halogen +3364 hamburger +3365 handgun +3366 happiness +3411 hardhat +3412 hastily +3413 hatchling +3414 haughty +3415 hazelnut +3416 headband +3421 hedgehog +3422 hefty +3423 heinously +3424 helmet +3425 hemoglobin +3426 henceforth +3431 herbs +3432 hesitation +3433 hexagon +3434 hubcap +3435 huddling +3436 huff +3441 hugeness +3442 hullabaloo +3443 human +3444 hunter +3445 hurricane +3446 hushing +3451 hyacinth +3452 hybrid +3453 hydrant +3454 hygienist +3455 hypnotist +3456 ibuprofen +3461 icepack +3462 icing +3463 iconic +3464 identical +3465 idiocy +3466 idly +3511 igloo +3512 ignition +3513 iguana +3514 illuminate +3515 imaging +3516 imbecile +3521 imitator +3522 immigrant +3523 imprint +3524 iodine +3525 ionosphere +3526 ipad +3531 iphone +3532 iridescent +3533 irksome +3534 iron +3535 irrigation +3536 island +3541 isotope +3542 issueless +3543 italicize +3544 itemizer +3545 itinerary +3546 itunes +3551 ivory +3552 jabbering +3553 jackrabbit +3554 jaguar +3555 jailhouse +3556 jalapeno +3561 jamboree +3562 janitor +3563 jarring +3564 jasmine +3565 jaundice +3566 jawbreaker +3611 jaywalker +3612 jazz +3613 jealous +3614 jeep +3615 jelly +3616 jeopardize +3621 jersey +3622 jetski +3623 jezebel +3624 jiffy +3625 jigsaw +3626 jingling +3631 jobholder +3632 jockstrap +3633 jogging +3634 john +3635 joinable +3636 jokingly +3641 journal +3642 jovial +3643 joystick +3644 jubilant +3645 judiciary +3646 juggle +3651 juice +3652 jujitsu +3653 jukebox +3654 jumpiness +3655 junkyard +3656 juror +3661 justifying +3662 juvenile +3663 kabob +3664 kamikaze +3665 kangaroo +3666 karate +4111 kayak +4112 keepsake +4113 kennel +4114 kerosene +4115 ketchup +4116 khaki +4121 kickstand +4122 kilogram +4123 kimono +4124 kingdom +4125 kiosk +4126 kissing +4131 kite +4132 kleenex +4133 knapsack +4134 kneecap +4135 knickers +4136 koala +4141 krypton +4142 laboratory +4143 ladder +4144 lakefront +4145 lantern +4146 laptop +4151 laryngitis +4152 lasagna +4153 latch +4154 laundry +4155 lavender +4156 laxative +4161 lazybones +4162 lecturer +4163 leftover +4164 leggings +4165 leisure +4166 lemon +4211 length +4212 leopard +4213 leprechaun +4214 lettuce +4215 leukemia +4216 levers +4221 lewdness +4222 liability +4223 library +4224 licorice +4225 lifeboat +4226 lightbulb +4231 likewise +4232 lilac +4233 limousine +4234 lint +4235 lioness +4236 lipstick +4241 liquid +4242 listless +4243 litter +4244 liverwurst +4245 lizard +4246 llama +4251 luau +4252 lubricant +4253 lucidity +4254 ludicrous +4255 luggage +4256 lukewarm +4261 lullaby +4262 lumberjack +4263 lunchbox +4264 luridness +4265 luscious +4266 luxurious +4311 lyrics +4312 macaroni +4313 maestro +4314 magazine +4315 mahogany +4316 maimed +4321 majority +4322 makeover +4323 malformed +4324 mammal +4325 mango +4326 mapmaker +4331 marbles +4332 massager +4333 matchstick +4334 maverick +4335 maximum +4336 mayonnaise +4341 moaning +4342 mobilize +4343 moccasin +4344 modify +4345 moisture +4346 molecule +4351 momentum +4352 monastery +4353 moonshine +4354 mortuary +4355 mosquito +4356 motorcycle +4361 mousetrap +4362 movie +4363 mower +4364 mozzarella +4365 muckiness +4366 mudflow +4411 mugshot +4412 mule +4413 mummy +4414 mundane +4415 muppet +4416 mural +4421 mustard +4422 mutation +4423 myriad +4424 myspace +4425 myth +4426 nail +4431 namesake +4432 nanosecond +4433 napkin +4434 narrator +4435 nastiness +4436 natives +4441 nautically +4442 navigate +4443 nearest +4444 nebula +4445 nectar +4446 nefarious +4451 negotiator +4452 neither +4453 nemesis +4454 neoliberal +4455 nephew +4456 nervously +4461 nest +4462 netting +4463 neuron +4464 nevermore +4465 nextdoor +4466 nicotine +4511 niece +4512 nimbleness +4513 nintendo +4514 nirvana +4515 nuclear +4516 nugget +4521 nuisance +4522 nullify +4523 numbing +4524 nuptials +4525 nursery +4526 nutcracker +4531 nylon +4532 oasis +4533 oat +4534 obediently +4535 obituary +4536 object +4541 obliterate +4542 obnoxious +4543 observer +4544 obtain +4545 obvious +4546 occupation +4551 oceanic +4552 octopus +4553 ocular +4554 office +4555 oftentimes +4556 oiliness +4561 ointment +4562 older +4563 olympics +4564 omissible +4565 omnivorous +4566 oncoming +4611 onion +4612 onlooker +4613 onstage +4614 onward +4615 onyx +4616 oomph +4621 opaquely +4622 opera +4623 opium +4624 opossum +4625 opponent +4626 optical +4631 opulently +4632 oscillator +4633 osmosis +4634 ostrich +4635 otherwise +4636 ought +4641 outhouse +4642 ovation +4643 oven +4644 owlish +4645 oxford +4646 oxidize +4651 oxygen +4652 oyster +4653 ozone +4654 pacemaker +4655 padlock +4656 pageant +4661 pajamas +4662 palm +4663 pamphlet +4664 pantyhose +4665 paprika +4666 parakeet +5111 passport +5112 patio +5113 pauper +5114 pavement +5115 payphone +5116 pebble +5121 peculiarly +5122 pedometer +5123 pegboard +5124 pelican +5125 penguin +5126 peony +5131 pepperoni +5132 peroxide +5133 pesticide +5134 petroleum +5135 pewter +5136 pharmacy +5141 pheasant +5142 phonebook +5143 phrasing +5144 physician +5145 plank +5146 pledge +5151 plotted +5152 plug +5153 plywood +5154 pneumonia +5155 podiatrist +5156 poetic +5161 pogo +5162 poison +5163 poking +5164 policeman +5165 poncho +5166 popcorn +5211 porcupine +5212 postcard +5213 poultry +5214 powerboat +5215 prairie +5216 pretzel +5221 princess +5222 propeller +5223 prune +5224 pry +5225 pseudo +5226 psychopath +5231 publisher +5232 pucker +5233 pueblo +5234 pulley +5235 pumpkin +5236 punchbowl +5241 puppy +5242 purse +5243 pushup +5244 putt +5245 puzzle +5246 pyramid +5251 python +5252 quarters +5253 quesadilla +5254 quilt +5255 quote +5256 racoon +5261 radish +5262 ragweed +5263 railroad +5264 rampantly +5265 rancidity +5266 rarity +5311 raspberry +5312 ravishing +5313 rearrange +5314 rebuilt +5315 receipt +5316 reentry +5321 refinery +5322 register +5323 rehydrate +5324 reimburse +5325 rejoicing +5326 rekindle +5331 relic +5332 remote +5333 renovator +5334 reopen +5335 reporter +5336 request +5341 rerun +5342 reservoir +5343 retriever +5344 reunion +5345 revolver +5346 rewrite +5351 rhapsody +5352 rhetoric +5353 rhino +5354 rhubarb +5355 rhyme +5356 ribbon +5361 riches +5362 ridden +5363 rigidness +5364 rimmed +5365 riptide +5366 riskily +5411 ritzy +5412 riverboat +5413 roamer +5414 robe +5415 rocket +5416 romancer +5421 ropelike +5422 rotisserie +5423 roundtable +5424 royal +5425 rubber +5426 rudderless +5431 rugby +5432 ruined +5433 rulebook +5434 rummage +5435 running +5436 rupture +5441 rustproof +5442 sabotage +5443 sacrifice +5444 saddlebag +5445 saffron +5446 sainthood +5451 saltshaker +5452 samurai +5453 sandworm +5454 sapphire +5455 sardine +5456 sassy +5461 satchel +5462 sauna +5463 savage +5464 saxophone +5465 scarf +5466 scenario +5511 schoolbook +5512 scientist +5513 scooter +5514 scrapbook +5515 sculpture +5516 scythe +5521 secretary +5522 sedative +5523 segregator +5524 seismology +5525 selected +5526 semicolon +5531 senator +5532 septum +5533 sequence +5534 serpent +5535 sesame +5536 settler +5541 severely +5542 shack +5543 shelf +5544 shirt +5545 shovel +5546 shrimp +5551 shuttle +5552 shyness +5553 siamese +5554 sibling +5555 siesta +5556 silicon +5561 simmering +5562 singles +5563 sisterhood +5564 sitcom +5565 sixfold +5566 sizable +5611 skateboard +5612 skeleton +5613 skies +5614 skulk +5615 skylight +5616 slapping +5621 sled +5622 slingshot +5623 sloth +5624 slumbering +5625 smartphone +5626 smelliness +5631 smitten +5632 smokestack +5633 smudge +5634 snapshot +5635 sneezing +5636 sniff +5641 snowsuit +5642 snugness +5643 speakers +5644 sphinx +5645 spider +5646 splashing +5651 sponge +5652 sprout +5653 spur +5654 spyglass +5655 squirrel +5656 statue +5661 steamboat +5662 stingray +5663 stopwatch +5664 strawberry +5665 student +5666 stylus +6111 suave +6112 subway +6113 suction +6114 suds +6115 suffocate +6116 sugar +6121 suitcase +6122 sulphur +6123 superstore +6124 surfer +6125 sushi +6126 swan +6131 sweatshirt +6132 swimwear +6133 sword +6134 sycamore +6135 syllable +6136 symphony +6141 synagogue +6142 syringes +6143 systemize +6144 tablespoon +6145 taco +6146 tadpole +6151 taekwondo +6152 tagalong +6153 takeout +6154 tallness +6155 tamale +6156 tanned +6161 tapestry +6162 tarantula +6163 tastebud +6164 tattoo +6165 tavern +6166 thaw +6211 theater +6212 thimble +6213 thorn +6214 throat +6215 thumb +6216 thwarting +6221 tiara +6222 tidbit +6223 tiebreaker +6224 tiger +6225 timid +6226 tinsel +6231 tiptoeing +6232 tirade +6233 tissue +6234 tractor +6235 tree +6236 tripod +6241 trousers +6242 trucks +6243 tryout +6244 tubeless +6245 tuesday +6246 tugboat +6251 tulip +6252 tumbleweed +6253 tupperware +6254 turtle +6255 tusk +6256 tutorial +6261 tuxedo +6262 tweezers +6263 twins +6264 tyrannical +6265 ultrasound +6266 umbrella +6311 umpire +6312 unarmored +6313 unbuttoned +6314 uncle +6315 underwear +6316 unevenness +6321 unflavored +6322 ungloved +6323 unhinge +6324 unicycle +6325 unjustly +6326 unknown +6331 unlocking +6332 unmarked +6333 unnoticed +6334 unopened +6335 unpaved +6336 unquenched +6341 unroll +6342 unscrewing +6343 untied +6344 unusual +6345 unveiled +6346 unwrinkled +6351 unyielding +6352 unzip +6353 upbeat +6354 upcountry +6355 update +6356 upfront +6361 upgrade +6362 upholstery +6363 upkeep +6364 upload +6365 uppercut +6366 upright +6411 upstairs +6412 uptown +6413 upwind +6414 uranium +6415 urban +6416 urchin +6421 urethane +6422 urgent +6423 urologist +6424 username +6425 usher +6426 utensil +6431 utility +6432 utmost +6433 utopia +6434 utterance +6435 vacuum +6436 vagrancy +6441 valuables +6442 vanquished +6443 vaporizer +6444 varied +6445 vaseline +6446 vegetable +6451 vehicle +6452 velcro +6453 vendor +6454 vertebrae +6455 vestibule +6456 veteran +6461 vexingly +6462 vicinity +6463 videogame +6464 viewfinder +6465 vigilante +6466 village +6511 vinegar +6512 violin +6513 viperfish +6514 virus +6515 visor +6516 vitamins +6521 vivacious +6522 vixen +6523 vocalist +6524 vogue +6525 voicemail +6526 volleyball +6531 voucher +6532 voyage +6533 vulnerable +6534 waffle +6535 wagon +6536 wakeup +6541 walrus +6542 wanderer +6543 wasp +6544 water +6545 waving +6546 wheat +6551 whisper +6552 wholesaler +6553 wick +6554 widow +6555 wielder +6556 wifeless +6561 wikipedia +6562 wildcat +6563 windmill +6564 wipeout +6565 wired +6566 wishbone +6611 wizardry +6612 wobbliness +6613 wolverine +6614 womb +6615 woolworker +6616 workbasket +6621 wound +6622 wrangle +6623 wreckage +6624 wristwatch +6625 wrongdoing +6626 xerox +6631 xylophone +6632 yacht +6633 yahoo +6634 yard +6635 yearbook +6636 yesterday +6641 yiddish +6642 yield +6643 yo-yo +6644 yodel +6645 yogurt +6646 yuppie +6651 zealot +6652 zebra +6653 zeppelin +6654 zestfully +6655 zigzagged +6656 zillion +6661 zipping +6662 zirconium +6663 zodiac +6664 zombie +6665 zookeeper +6666 zucchini` diff --git a/test/includes/microcloud.sh b/test/includes/microcloud.sh index ce4daa047..db28805a0 100644 --- a/test/includes/microcloud.sh +++ b/test/includes/microcloud.sh @@ -2,14 +2,14 @@ # unset_interactive_vars: Unsets all variables related to the test console. unset_interactive_vars() { - unset SKIP_LOOKUP LOOKUP_IFACE LIMIT_SUBNET SKIP_SERVICE EXPECT_PEERS PEERS_FILTER REUSE_EXISTING REUSE_EXISTING_COUNT \ + unset SKIP_LOOKUP LOOKUP_IFACE SKIP_SERVICE EXPECT_PEERS PEERS_FILTER REUSE_EXISTING REUSE_EXISTING_COUNT \ SETUP_ZFS ZFS_FILTER ZFS_WIPE \ SETUP_CEPH CEPH_MISSING_DISKS CEPH_FILTER CEPH_WIPE CEPH_ENCRYPT SETUP_CEPHFS CEPH_CLUSTER_NETWORK \ PROCEED_WITH_NO_OVERLAY_NETWORKING SETUP_OVN OVN_UNDERLAY_NETWORK OVN_UNDERLAY_FILTER OVN_WARNING OVN_FILTER IPV4_SUBNET IPV4_START IPV4_END DNS_ADDRESSES IPV6_SUBNET \ REPLACE_PROFILE CEPH_RETRY_HA MULTI_NODE } -# microcloud_interactive: outputs text that can be passed to `TEST_CONSOLE=1 microcloud init` +# microcloud_interactive: generates text that is being passed to `TEST_CONSOLE=1 microcloud *` # to simulate terminal input to the interactive CLI. # The lines that are output are based on the values passed to the listed environment variables. # Any unset variables will be omitted. @@ -24,7 +24,6 @@ microcloud_interactive() { MULTI_NODE=${MULTI_NODE:-} # (yes/no) whether to set up multiple nodes SKIP_LOOKUP=${SKIP_LOOKUP:-} # whether or not to skip the whole lookup block in the interactive command list. LOOKUP_IFACE=${LOOKUP_IFACE:-} # filter string for the lookup interface table. - LIMIT_SUBNET=${LIMIT_SUBNET:-} # (yes/no) input for limiting lookup of systems to the above subnet. SKIP_SERVICE=${SKIP_SERVICE:-} # (yes/no) input to skip any missing services. Should be unset if all services are installed. EXPECT_PEERS=${EXPECT_PEERS:-} # wait for this number of systems to be available to join the cluster. PEERS_FILTER=${PEERS_FILTER:-} # filter string for the particular peer to init/add @@ -67,7 +66,6 @@ $(true) if ! [ "${SKIP_LOOKUP}" = 1 ]; then setup="${setup} -${LIMIT_SUBNET} # limit lookup subnet (yes/no) $([ "${SKIP_SERVICE}" = "yes" ] && printf "%s" "${SKIP_SERVICE}") # skip MicroOVN/MicroCeph (yes/no) expect ${EXPECT_PEERS} # wait until the systems show up ${PEERS_FILTER} # filter discovered peers @@ -161,10 +159,83 @@ $(true) # workaround for set -e fi # clear comments and empty lines. - echo "${setup}" | sed '/^\s*#/d; s/\s*#.*//; /^$/d' | tee /dev/stderr + setup="$(echo "${setup}" | sed '/^\s*#/d; s/\s*#.*//; /^$/d' | tee /dev/stderr)" if [ ${enable_xtrace} = 1 ]; then set -x fi + + # append the session timeout if applicable. + args="" + if [ "${1}" = "init" ] || [ "${1}" = "add" ]; then + args="--session-timeout=60" + fi + + echo "${setup}" | lxc exec "${2}" -- sh -c "tee in | microcloud ${1} ${args} 2>&1 | tee out" +} + +# capture_and_join: extracts the passphrase from stdin and outputs text that is being passed to `TEST_CONSOLE=1 microcloud join` +# to simulate terminal input to the interactive CLI. +# Set the first argument to either true or false if you want to skip missing services. +# All the remaining arguments are systems you want to join. +capture_and_join() { + enable_xtrace=0 + + if set -o | grep -q "xtrace.*on" ; then + enable_xtrace=1 + set +x + fi + + next_line=0 + passphrase="" + while IFS= read -r line; do + # Skip the empty placeholder line. + # Indicate that the next line will be the one. + if [ "$next_line" = 1 ]; then + next_line=2 + continue + # Passphrase found. + elif [ "$next_line" = 2 ]; then + # Trim the trailing whitespace. + passphrase="${line## }" + break + fi + + # The second next line contains the passphrase. + if [ "$line" = "When requested enter the passphrase:" ]; then + next_line=1 + fi + done + + LOOKUP_IFACE=${LOOKUP_IFACE:-} # filter string for the lookup interface table. + + # Select the first usable address and enter the passphrase. + setup="${LOOKUP_IFACE} # filter the lookup interface +$([ -n "${LOOKUP_IFACE}" ] && printf "select") # select the interface +$([ -n "${LOOKUP_IFACE}" ] && printf -- "---") +${passphrase} # the captured passphrase +$(true) # workaround for set -e +" + + # clear comments and empty lines. + setup="$(echo "${setup}" | sed '/^\s*#/d; s/\s*#.*//; /^$/d' | tee /dev/stderr)" + if [ ${enable_xtrace} = 1 ]; then + set -x + fi + + for member in "$@"; do + lxc exec "${member}" -- sh -c "tee in | microcloud join 2>&1 | tee out" <<< "${setup}" & + done + + # wait for the parent. + cat + + # kill the childs if they are still running. + child_processes="$(jobs -pr)" + if [ -n "${child_processes}" ]; then + for p in ${child_processes}; do + kill -9 "${p}" + done + fi } # set_debug_binaries: Adds {app}.debug binaries if the corresponding {APP}_DEBUG_PATH environment variable is set. diff --git a/test/suites/add.sh b/test/suites/add.sh index 6524e8cf8..d92754f2b 100644 --- a/test/suites/add.sh +++ b/test/suites/add.sh @@ -21,7 +21,6 @@ test_add_interactive() { unset_interactive_vars export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=2 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -39,8 +38,10 @@ test_add_interactive() { export IPV6_SUBNET="fd42:1:1234:1234::1/64" export DNS_ADDRESSES="10.1.123.1,fd42:1:1234:1234::1" export OVN_UNDERLAY_NETWORK="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q # Re-enable the nodes. # shellcheck disable=SC2043 @@ -50,7 +51,6 @@ test_add_interactive() { done unset_interactive_vars - export LIMIT_SUBNET="yes" export EXPECT_PEERS=1 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -61,8 +61,10 @@ test_add_interactive() { export SETUP_OVN="yes" export OVN_FILTER="enp6s0" export OVN_UNDERLAY_NETWORK="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud add > out" + microcloud_interactive add micro01 | + LOOKUP_IFACE="enp5s0" capture_and_join micro04 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro04 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 micro03 micro04 ; do validate_system_lxd "${m}" 4 disk1 1 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,fd42:1:1234:1234::1 @@ -70,13 +72,11 @@ test_add_interactive() { validate_system_microovn "${m}" done - reset_systems 4 2 1 echo "Test growing a MicroCloud with missing services" unset_interactive_vars export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export SKIP_SERVICE="yes" export EXPECT_PEERS=2 export SETUP_ZFS="no" @@ -91,20 +91,24 @@ test_add_interactive() { lxc exec micro04 -- snap disable microcloud - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro04 -- snap enable microcloud lxc exec micro04 -- snap start microcloud unset_interactive_vars - export LIMIT_SUBNET="yes" export SKIP_SERVICE=yes export EXPECT_PEERS=1 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" export ZFS_WIPE="yes" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud add > out" + microcloud_interactive add micro01 | + LOOKUP_IFACE="enp5s0" capture_and_join micro04 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro04 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 micro03 micro04 ; do validate_system_lxd "${m}" 4 disk1 @@ -115,14 +119,13 @@ test_add_interactive() { unset_interactive_vars export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=2 export SETUP_ZFS="no" export SETUP_CEPH="no" export SETUP_OVN="no" lxc exec micro04 -- snap disable microcloud - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03; do @@ -132,7 +135,6 @@ test_add_interactive() { done unset_interactive_vars - export LIMIT_SUBNET="yes" export EXPECT_PEERS=1 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -153,9 +155,10 @@ test_add_interactive() { lxc exec micro04 -- snap enable microcloud lxc exec micro04 -- snap start microcloud - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud add > out" - + microcloud_interactive add micro01 | + LOOKUP_IFACE="enp5s0" capture_and_join micro04 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro04 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q default_cluster_subnet="$(lxc exec micro01 -- ip -4 -br a show enp5s0 | awk '{print $3}')" for m in micro01 micro02 micro03 micro04 ; do diff --git a/test/suites/basic.sh b/test/suites/basic.sh index 204e0bf9e..52c77b7f8 100644 --- a/test/suites/basic.sh +++ b/test/suites/basic.sh @@ -8,15 +8,16 @@ test_interactive() { echo "Creating a MicroCloud with all services but no devices" export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=2 export SETUP_ZFS="no" export SETUP_CEPH="no" export SETUP_OVN="no" export CEPH_CLUSTER_NETWORK="${microcloud_internal_net_addr}" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 micro03 ; do validate_system_lxd "${m}" 3 validate_system_microceph "${m}" @@ -41,55 +42,74 @@ test_interactive() { export ZFS_WIPE="yes" export CEPH_CLUSTER_NETWORK="${microcloud_internal_net_addr}" unset SETUP_CEPH SETUP_OVN - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 micro03 ; do validate_system_lxd "${m}" 3 disk1 done # Reset the systems with just LXD and no IPv6 support. + # First replace the bridge driver to use openvswitch instead of native. + # This is a workaround to allow the multicast traffic flowing through seamlessly. + lxc network set lxdbr0 bridge.driver=openvswitch reset_systems 3 3 1 for m in micro01 micro02 micro03 ; do - lxc exec "${m}" -- echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 + lxc exec "${m}" -- sh -c "echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6" lxc exec "${m}" -- snap disable microceph || true lxc exec "${m}" -- snap disable microovn || true lxc exec "${m}" -- snap restart microcloud done + # Unset the lookup interface because we don't have multiple addresses to select from anymore. + unset LOOKUP_IFACE + echo "Creating a MicroCloud with ZFS storage and no IPv6 support" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 micro03 ; do validate_system_lxd "${m}" 3 disk1 done - # Reset the systems with just LXD and no IPv4 support. + # Reset the bridge driver back to native. + lxc network unset lxdbr0 bridge.driver + + # Reset the systems with no IPv4 support. gw_net_addr=$(lxc network get lxdbr0 ipv4.address) lxc network set lxdbr0 ipv4.address none reset_systems 3 3 1 for m in micro01 micro02 micro03 ; do - lxc exec "${m}" -- snap disable microceph || true - lxc exec "${m}" -- snap disable microovn || true lxc exec "${m}" -- snap restart microcloud done - # Unset the lookup interface because we don't have multiple addresses to select from anymore. - unset LOOKUP_IFACE - export PROCEED_WITH_NO_OVERLAY_NETWORKING="no" # This will avoid to setup the cluster if no overlay networking is available. + # As there is no remote networking, deploy a single node local MicroCloud + export MULTI_NODE="no" + export SKIP_LOOKUP=1 + + # This will avoid to setup the cluster if no IPv4 overlay networking is available. + export SETUP_CEPH="no" + export SETUP_OVN="no" + export PROCEED_WITH_NO_OVERLAY_NETWORKING="no" + echo "Creating a MicroCloud with ZFS storage and no IPv4 support" - ! microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init 2> err" || false + microcloud_interactive init micro01 # Ensure we error out due to a lack of usable overlay networking. - lxc exec micro01 -- cat err | grep "Cluster bootstrapping aborted due to lack of usable networking" -q + lxc exec micro01 -- cat out | grep "Cluster bootstrapping aborted due to lack of usable networking" -q # Set the IPv4 address back to the original value. lxc network set lxdbr0 ipv4.address "${gw_net_addr}" - unset PROCEED_WITH_NO_OVERLAY_NETWORKING - export LOOKUP_IFACE=enp5s0 + unset PROCEED_WITH_NO_OVERLAY_NETWORKING SKIP_LOOKUP SETUP_CEPH SETUP_OVN + + export MULTI_NODE="yes" + export LOOKUP_IFACE="enp5s0" # Reset the systems and install microceph. reset_systems 3 3 1 @@ -108,7 +128,7 @@ test_interactive() { export CEPH_WIPE="yes" export CEPH_CLUSTER_NETWORK="${microcloud_internal_net_addr}" export CEPH_ENCRYPT="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -138,7 +158,7 @@ test_interactive() { export DNS_ADDRESSES="10.1.123.1,8.8.8.8" export CEPH_CLUSTER_NETWORK="${microcloud_internal_net_addr}" export OVN_UNDERLAY_NETWORK="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -159,7 +179,7 @@ test_interactive() { export CEPH_WIPE="yes" export CEPH_CLUSTER_NETWORK="${microcloud_internal_net_addr}" export CEPH_ENCRYPT="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -173,7 +193,7 @@ test_interactive() { echo "Creating a MicroCloud with ZFS and Ceph storage, and OVN network with Ceph encryption" export CEPH_ENCRYPT="yes" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -209,7 +229,7 @@ test_interactive() { export IPV4_START="10.1.123.100" export IPV4_END="10.1.123.254" export OVN_UNDERLAY_NETWORK="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -240,7 +260,7 @@ test_interactive() { export CEPH_CLUSTER_NETWORK="${ceph_cluster_subnet_prefix}.0/24" export OVN_UNDERLAY_NETWORK="yes" export OVN_UNDERLAY_FILTER="${ovn_underlay_subnet_prefix}" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -269,7 +289,6 @@ test_interactive() { microcloud_internal_net_addr="$(ip_config_to_netaddr lxdbr0)" export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=1 export SKIP_SERVICE="yes" export SETUP_ZFS="yes" @@ -279,7 +298,7 @@ test_interactive() { export SETUP_OVN="no" # Run a 2 nodes MicroCloud without MicroOVN first. - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 ; do @@ -291,8 +310,7 @@ test_interactive() { lxc exec micro03 -- snap start microcloud unset_interactive_vars - export LIMIT_SUBNET="yes" - export EXPECT_PEERS=2 + export EXPECT_PEERS=1 export PEERS_FILTER="micro03" export REUSE_EXISTING_COUNT=1 export REUSE_EXISTING="add" @@ -310,7 +328,8 @@ test_interactive() { export DNS_ADDRESSES="10.1.123.1,8.8.8.8" export IPV6_SUBNET="fd42:1:1234:1234::1/64" export REPLACE_PROFILE="yes" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud add > out" + microcloud_interactive add micro01 | + LOOKUP_IFACE="enp5s0" capture_and_join micro03 for m in micro01 micro02 micro03 ; do validate_system_lxd "${m}" 3 disk1 0 0 "${OVN_FILTER}" "${IPV4_SUBNET}" "${IPV4_START}"-"${IPV4_END}" "${IPV6_SUBNET}" @@ -326,9 +345,10 @@ test_instances_config() { # Setup a MicroCloud with 3 systems, ZFS storage, and a FAN network. addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 storage: @@ -346,6 +366,15 @@ systems: path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 wipe: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q # Init a container and VM with ZFS storage & FAN network. lxc exec micro01 -- lxc init --empty v1 --vm @@ -361,9 +390,10 @@ EOF # Create a MicroCloud with ceph and ovn setup. addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 storage: @@ -393,6 +423,15 @@ ovn: ceph: cephfs: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q # Delete any instances left behind. lxc exec micro01 -- sh -c " @@ -417,9 +456,10 @@ test_instances_launch() { # Setup a MicroCloud with 3 systems, ZFS storage, and a FAN network. addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 ovn_uplink_interface: enp6s0 @@ -440,6 +480,15 @@ systems: path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk1 wipe: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q # Delete any instances left behind. lxc exec micro01 -- sh -c " @@ -496,9 +545,10 @@ EOF # Create a MicroCloud with ceph and ovn setup. addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 storage: @@ -528,6 +578,15 @@ ovn: ceph: cephfs: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q # Delete any instances left behind. lxc exec micro01 -- sh -c " @@ -630,9 +689,10 @@ EOF lxc exec "micro0$((n-1))" -- ip addr add "${dedicated_ip}" dev "${ceph_dedicated_subnet_iface}" done - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 < out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q # Add cloud-init entry for checking ready state on launched instances. lxc exec micro01 -- lxc profile edit default << EOF @@ -767,7 +836,6 @@ _test_case() { export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" # filter string for the lookup interface table. - export LIMIT_SUBNET="yes" # (yes/no) input for limiting lookup of systems to the above subnet. export CEPH_CLUSTER_NETWORK="${microcloud_internal_net_addr}" export OVN_UNDERLAY_NETWORK="no" @@ -828,7 +896,13 @@ _test_case() { fi fi - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + join_systems="" + for i in $(seq -f "%02g" 2 "${num_systems}") ; do + join_systems+=" micro${i}" + done + + # shellcheck disable=SC2086 + microcloud_interactive init micro01 | capture_and_join $join_systems lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for i in $(seq -f "%02g" 1 "${num_systems}") ; do name="micro${i}" @@ -855,7 +929,7 @@ _test_case() { validate_system_microceph "${name}" "${expected_cephfs}" "${ceph_disks}" validate_system_microovn "${name}" done - } +} test_interactive_combinations() { @@ -916,7 +990,6 @@ test_service_mismatch() { # Selects all available systems, adds 1 local disk per system, skips ceph and ovn setup. export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=2 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -940,12 +1013,14 @@ test_service_mismatch() { done # Init should fail to find the other systems as they don't have the same services. - # 30s should be enough time to find the other systems. - echo "Peers with missing services won't be found after 30s" - ! microcloud_interactive | lxc exec micro01 -- sh -c "timeout -k 5 30 microcloud init > out" || false + # The error is reported on the joining side. + echo "Peers with missing services cannot join" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 - # Ensure we exited while still looking for servers, and found none. - lxc exec micro01 -- tail -1 out | grep "Scanning for eligible servers" -q + # Ensure the joiners exited due to missing services. + # The initiator exits automatically after the session timeout. + lxc exec micro02 -- tail -1 out | grep "Rejecting peer \"micro02\" due to missing services" -q + lxc exec micro03 -- tail -1 out | grep "Rejecting peer \"micro03\" due to missing services" -q # Install the remaining services on the other systems. lxc exec micro02 -- snap enable microceph @@ -955,7 +1030,7 @@ test_service_mismatch() { # Init should now work. echo "Creating a MicroCloud with MicroCeph and MicroOVN, but without their LXD devices" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -972,12 +1047,11 @@ test_service_mismatch() { lxc exec micro01 -- snap disable microovn || true lxc exec micro01 -- snap restart microcloud - SKIP_SERVICE="yes" unset SETUP_CEPH SETUP_OVN # Init from the minimal system should work, but not set up any services it doesn't have. echo "Creating a MicroCloud without setting up MicroOVN and MicroCeph on peers" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 ; do @@ -1002,7 +1076,6 @@ test_disk_mismatch() { unset_interactive_vars export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=3 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -1014,7 +1087,7 @@ test_disk_mismatch() { export CEPH_ENCRYPT="no" export SETUP_OVN="no" export CEPH_CLUSTER_NETWORK="${microcloud_internal_net_addr}" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 micro04 lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q for m in micro01 micro02 micro03 micro04 ; do validate_system_lxd "${m}" 4 disk1 6 1 @@ -1043,7 +1116,6 @@ test_reuse_cluster() { # Set the default config for interactive setup. export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=2 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -1067,13 +1139,13 @@ test_reuse_cluster() { export REUSE_EXISTING_COUNT=1 export REUSE_EXISTING="add" lxc exec micro02 -- microceph cluster bootstrap - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 services_validator reset_systems 3 3 3 echo "Create a MicroCloud that re-uses an existing service on the local node" lxc exec micro01 -- microceph cluster bootstrap - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 services_validator reset_systems 3 3 3 @@ -1082,14 +1154,14 @@ test_reuse_cluster() { export REUSE_EXISTING="add" lxc exec micro02 -- microceph cluster bootstrap lxc exec micro02 -- microovn cluster bootstrap - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 services_validator reset_systems 3 3 3 echo "Create a MicroCloud that re-uses an existing MicroCeph and MicroOVN on different nodes" lxc exec micro02 -- microceph cluster bootstrap lxc exec micro03 -- microovn cluster bootstrap - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 services_validator reset_systems 3 3 3 @@ -1099,7 +1171,7 @@ test_reuse_cluster() { lxc exec micro02 -- microceph cluster bootstrap token="$(lxc exec micro02 -- microceph cluster add micro01)" lxc exec micro01 -- microceph cluster join "${token}" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 services_validator reset_systems 3 3 3 @@ -1109,7 +1181,7 @@ test_reuse_cluster() { lxc exec micro01 -- microceph cluster join "${token}" token="$(lxc exec micro02 -- microceph cluster add micro03)" lxc exec micro03 -- microceph cluster join "${token}" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 services_validator reset_systems 4 3 3 @@ -1118,21 +1190,24 @@ test_reuse_cluster() { lxc exec micro02 -- microceph cluster bootstrap token="$(lxc exec micro02 -- microceph cluster add micro04)" lxc exec micro04 -- microceph cluster join "${token}" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 micro04 services_validator validate_system_microceph micro04 1 reset_systems 3 3 3 echo "Fail to create a MicroCloud due to conflicting existing services" + lxc exec micro02 -- microceph cluster bootstrap lxc exec micro03 -- microceph cluster bootstrap - ! microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" || true + microcloud_interactive init micro01 | capture_and_join micro02 micro03 + lxc exec micro01 -- tail -1 out | grep "Some systems are already part of different MicroCeph clusters. Aborting initialization" -q reset_systems 3 3 3 echo "Create a MicroCloud that re-uses an existing service with preseed" addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo reuse_existing_clusters: true systems: - name: micro01 @@ -1154,6 +1229,15 @@ storage: find_min: 3 wipe: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q services_validator } @@ -1166,7 +1250,6 @@ test_remove_cluster_member() { # Set the default config for interactive setup. export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=2 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -1188,7 +1271,7 @@ test_remove_cluster_member() { reset_systems 3 3 3 echo "Fail to remove member from MicroCeph and LXD until OSDs are removed" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 # Wait for roles to refresh from the next heartbeat. for i in $(seq 1 40) ; do @@ -1234,7 +1317,7 @@ test_remove_cluster_member() { reset_systems 3 3 3 lxc exec micro01 -- snap disable microceph echo "Create a MicroCloud and remove a node from all services" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 # Wait for roles to refresh from the next heartbeat. for i in $(seq 1 40) ; do @@ -1256,7 +1339,7 @@ test_remove_cluster_member() { reset_systems 3 3 3 lxc exec micro01 -- snap disable microceph echo "Create a MicroCloud and remove a node from all services, but manually remove it from the MicroCloud daemon first" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 # Wait for roles to refresh from the next heartbeat. for i in $(seq 1 40) ; do @@ -1287,7 +1370,7 @@ test_remove_cluster_member() { reset_systems 3 3 3 lxc exec micro01 -- snap disable microceph echo "Create a MicroCloud and fail to remove a non-existent member" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 for i in $(seq 1 40) ; do if lxc exec micro01 --env "TEST_CONSOLE=0" -- microcloud cluster list | grep -q PENDING ; then @@ -1306,7 +1389,6 @@ test_remove_cluster_member() { done } - test_add_services() { unset_interactive_vars # Set the default config for interactive setup. @@ -1315,7 +1397,6 @@ test_add_services() { ceph_cluster_subnet_iface="enp7s0" export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=2 export SETUP_ZFS="yes" export ZFS_FILTER="lxd_disk1" @@ -1341,7 +1422,7 @@ test_add_services() { lxc exec micro01 -- snap disable microceph unset SETUP_CEPH export SKIP_SERVICE="yes" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- snap enable microceph export SETUP_CEPH="yes" export SKIP_LOOKUP=1 @@ -1349,7 +1430,7 @@ test_add_services() { unset SETUP_ZFS unset SETUP_OVN export REPLACE_PROFILE="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud service add > out" + microcloud_interactive "service add" micro01 services_validator reset_systems 3 3 3 @@ -1363,7 +1444,7 @@ test_add_services() { export SKIP_SERVICE="yes" export SETUP_ZFS="yes" export SETUP_OVN="yes" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- snap enable microceph export SETUP_CEPH="yes" export SKIP_LOOKUP=1 @@ -1371,7 +1452,7 @@ test_add_services() { unset SETUP_ZFS unset SETUP_OVN export REPLACE_PROFILE="yes" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud service add > out" + microcloud_interactive "service add" micro01 services_validator reset_systems 3 3 3 @@ -1382,14 +1463,14 @@ test_add_services() { export MULTI_NODE="yes" export SETUP_ZFS="yes" unset SKIP_LOOKUP - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- snap enable microovn export SETUP_OVN="yes" export SKIP_LOOKUP=1 unset MULTI_NODE unset SETUP_ZFS unset SETUP_CEPH - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud service add > out" + microcloud_interactive "service add" micro01 services_validator reset_systems 3 3 3 @@ -1402,7 +1483,7 @@ test_add_services() { export SETUP_ZFS="yes" unset SKIP_LOOKUP unset SETUP_OVN - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- snap enable microovn lxc exec micro01 -- snap enable microceph export SETUP_OVN="yes" @@ -1410,7 +1491,7 @@ test_add_services() { export SKIP_LOOKUP=1 unset MULTI_NODE unset SETUP_ZFS - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud service add > out" + microcloud_interactive "service add" micro01 services_validator reset_systems 3 3 3 @@ -1423,7 +1504,7 @@ test_add_services() { export SETUP_ZFS="yes" unset SETUP_CEPH unset SKIP_LOOKUP - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 lxc exec micro01 -- snap enable microceph export REUSE_EXISTING_COUNT=1 export REUSE_EXISTING="add" @@ -1433,7 +1514,7 @@ test_add_services() { unset SETUP_ZFS unset SETUP_OVN unset CEPH_CLUSTER_NETWORK - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud service add > out" + microcloud_interactive "service add" micro01 services_validator reset_systems 3 3 3 @@ -1448,10 +1529,10 @@ test_add_services() { unset SKIP_LOOKUP unset SKIP_SERVICE export CEPH_CLUSTER_NETWORK="${ceph_cluster_subnet_prefix}.0/24" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 export SKIP_LOOKUP=1 unset MULTI_NODE - ! microcloud_interactive | lxc exec micro01 -- sh -c "microcloud service add > out" || true + ! microcloud_interactive "service add" micro01 || true } test_non_ha() { @@ -1460,7 +1541,6 @@ test_non_ha() { export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=1 export SETUP_ZFS="no" export SETUP_CEPH="yes" @@ -1481,7 +1561,7 @@ test_non_ha() { reset_systems 2 1 3 echo "Creating a MicroCloud with 2 systems and only Ceph storage" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 for m in micro01 micro02 ; do validate_system_lxd ${m} 2 "" 1 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 validate_system_microceph ${m} 1 disk1 @@ -1494,7 +1574,7 @@ test_non_ha() { unset SETUP_CEPH reset_systems 2 1 3 echo "Creating a MicroCloud with 2 systems and only ZFS storage" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 for m in micro01 micro02 ; do validate_system_lxd ${m} 2 "disk1" 0 0 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 validate_system_microceph ${m} @@ -1505,7 +1585,7 @@ test_non_ha() { export CEPH_FILTER="lxd_disk2" reset_systems 2 2 3 echo "Creating a MicroCloud with 2 systems and all storage & networks" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 for m in micro01 micro02 ; do validate_system_lxd ${m} 2 "disk1" 1 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 validate_system_microceph ${m} 1 "disk2" @@ -1520,7 +1600,7 @@ test_non_ha() { export MULTI_NODE="no" export SKIP_LOOKUP=1 echo "Creating a MicroCloud with 1 system, and grow it to 3 with all storage & networks" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 validate_system_lxd "micro01" 1 "disk1" 1 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 validate_system_microceph "micro01" 1 "disk2" validate_system_microovn "micro01" @@ -1537,7 +1617,7 @@ test_non_ha() { unset IPV4_SUBNET IPV4_START IPV4_END DNS_ADDRESSES IPV6_SUBNET unset SETUP_CEPHFS export EXPECT_PEERS=2 - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud add > out" + microcloud_interactive add micro01 | capture_and_join micro02 micro03 for m in micro1 micro2 micro3 ; do validate_system_lxd "micro01" 3 "disk1" 1 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 validate_system_microceph "micro01" 1 "disk2" @@ -1547,9 +1627,10 @@ test_non_ha() { reset_systems 2 3 3 echo "Creating a MicroCloud with 1 system and growing it to 3, using preseed" addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 storage: @@ -1566,15 +1647,20 @@ ovn: ceph: cephfs: true EOF + )" + + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q validate_system_lxd "micro01" 1 "" 2 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 validate_system_microceph "micro01" 1 "disk2" "disk3" validate_system_microovn "micro01" addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud add --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro02 storage: @@ -1584,6 +1670,13 @@ systems: - path: /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_lxd_disk3 wipe: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 ; do validate_system_lxd ${m} 2 "" 2 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 @@ -1594,9 +1687,10 @@ EOF reset_systems 2 3 3 echo "Creating a MicroCloud with 2 systems with Ceph storage using preseed" addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${addr}/24 -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 storage: @@ -1620,6 +1714,13 @@ ovn: ceph: cephfs: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 ; do validate_system_lxd ${m} 2 "" 2 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 diff --git a/test/suites/preseed.sh b/test/suites/preseed.sh index 87bbe7056..062281707 100644 --- a/test/suites/preseed.sh +++ b/test/suites/preseed.sh @@ -1,6 +1,21 @@ #!/bin/bash +cleanup_preseed() { + # Cleanup child processes sent to the background using &. + child_processes="$(jobs -pr)" + if [ -n "${child_processes}" ]; then + for p in ${child_processes}; do + kill -9 "${p}" + done + fi + + cleanup +} + test_preseed() { + # Overwrite the regular trap to cleanup background processes. + trap cleanup_preseed EXIT HUP INT TERM + reset_systems 4 3 2 lookup_gateway=$(lxc network get lxdbr0 ipv4.address) @@ -21,9 +36,10 @@ test_preseed() { done # Create a MicroCloud with storage directly given by-path on one node, and by filter on other nodes. - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${lookup_gateway} -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 ovn_uplink_interface: enp6s0 @@ -74,6 +90,15 @@ storage: wipe: true encrypt: true EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro03 ; do validate_system_lxd ${m} 3 disk1 2 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8,fd42:1:1234:1234::1 @@ -87,9 +112,10 @@ EOF validate_system_microovn micro02 "${ovn_underlay_subnet_prefix}" # Grow the MicroCloud with a new node, with filter-based storage selection. - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud add --preseed --lookup-timeout 10 < out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro04 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q validate_system_lxd micro04 4 disk1 1 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 validate_system_microceph micro04 1 1 "${ceph_cluster_subnet_prefix}.0/24" disk2 disk2 @@ -117,14 +150,24 @@ EOF reset_systems 3 3 2 # Create a MicroCloud but don't set up storage or network (Should get a FAN setup). - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${lookup_gateway} -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 - name: micro02 - name: micro03 EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 micro03 ; do validate_system_lxd ${m} 3 @@ -139,14 +182,24 @@ EOF lxc exec micro01 -- snap disable microovn sleep 1 - lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed --lookup-timeout 10 << EOF + preseed="$(cat << EOF lookup_subnet: ${lookup_gateway} -lookup_interface: enp5s0 +initiator: micro01 +session_passphrase: foo systems: - name: micro01 - name: micro02 - name: micro03 EOF + )" + + lxc exec micro02 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro03 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" & + lxc exec micro01 --env TEST_CONSOLE=0 -- sh -c 'microcloud preseed > out' <<< "$preseed" + + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q for m in micro01 micro02 micro03 ; do validate_system_lxd ${m} 3 diff --git a/test/suites/recover.sh b/test/suites/recover.sh index 12299c36e..5ccda9ad7 100644 --- a/test/suites/recover.sh +++ b/test/suites/recover.sh @@ -8,13 +8,17 @@ test_recover() { unset_interactive_vars export MULTI_NODE="yes" export LOOKUP_IFACE="enp5s0" - export LIMIT_SUBNET="yes" export EXPECT_PEERS=3 export SETUP_ZFS="no" export SETUP_CEPH="no" export SETUP_OVN="no" - microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + microcloud_interactive init micro01 | capture_and_join micro02 micro03 micro04 + lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q + lxc exec micro02 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro03 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + lxc exec micro04 -- tail -2 out | head -1 | grep "Successfully joined the MicroCloud cluster and closing the session" -q + for m in "${systems[@]}" ; do validate_system_lxd "${m}" 4 validate_system_microceph "${m}"