Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ func supervise() error {
// run the child binary
cmd := exec.Command(binPath)

cmd.Env = append(os.Environ(), []string{envChildID + "=" + kvm.GetBuiltAppVersion()}...)
lastFilePath := filepath.Join(errorDumpDir, errorDumpLastFile)

cmd.Env = append(os.Environ(), []string{
fmt.Sprintf("%s=%s", envChildID, kvm.GetBuiltAppVersion()),
fmt.Sprintf("JETKVM_LAST_ERROR_PATH=%s", lastFilePath),
}...)
cmd.Args = os.Args

logFile, err := os.CreateTemp("", "jetkvm-stdout.log")
Expand Down
107 changes: 107 additions & 0 deletions failsafe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package kvm

import (
"fmt"
"os"
"strings"
"sync"
)

const (
failsafeDefaultLastCrashPath = "/userdata/jetkvm/crashdump/last-crash.log"
failsafeFile = "/userdata/jetkvm/.enablefailsafe"
failsafeLastCrashEnv = "JETKVM_LAST_ERROR_PATH"
failsafeEnv = "JETKVM_FORCE_FAILSAFE"
)

var (
failsafeOnce sync.Once
failsafeCrashLog = ""
failsafeModeActive = false
failsafeModeReason = ""
)

type FailsafeModeNotification struct {
Active bool `json:"active"`
Reason string `json:"reason"`
}

// this function has side effects and can be only executed once
func checkFailsafeReason() {
failsafeOnce.Do(func() {
// check if the failsafe environment variable is set
if os.Getenv(failsafeEnv) == "1" {
failsafeModeActive = true
failsafeModeReason = "failsafe_env_set"
return
}

// check if the failsafe file exists
if _, err := os.Stat(failsafeFile); err == nil {
failsafeModeActive = true
failsafeModeReason = "failsafe_file_exists"
_ = os.Remove(failsafeFile)
return
}

// get the last crash log path from the environment variable
lastCrashPath := os.Getenv(failsafeLastCrashEnv)
if lastCrashPath == "" {
lastCrashPath = failsafeDefaultLastCrashPath
}

// check if the last crash log file exists
l := failsafeLogger.With().Str("path", lastCrashPath).Logger()
fi, err := os.Lstat(lastCrashPath)
if err != nil {
if !os.IsNotExist(err) {
l.Warn().Err(err).Msg("failed to stat last crash log")
}
return
}

if fi.Mode()&os.ModeSymlink != os.ModeSymlink {
l.Warn().Msg("last crash log is not a symlink, ignoring")
return
}

// open the last crash log file and find if it contains the string "panic"
content, err := os.ReadFile(lastCrashPath)
if err != nil {
l.Warn().Err(err).Msg("failed to read last crash log")
return
}

// unlink the last crash log file
failsafeCrashLog = string(content)
_ = os.Remove(lastCrashPath)

// TODO: read the goroutine stack trace and check which goroutine is panicking
if strings.Contains(failsafeCrashLog, "runtime.cgocall") {
failsafeModeActive = true
failsafeModeReason = "video"
return
}
})
}

func notifyFailsafeMode(session *Session) {
if !failsafeModeActive || session == nil {
return
}

jsonRpcLogger.Info().Str("reason", failsafeModeReason).Msg("sending failsafe mode notification")

writeJSONRPCEvent("failsafeMode", FailsafeModeNotification{
Active: true,
Reason: failsafeModeReason,
}, session)
}

func rpcGetFailsafeLogs() (string, error) {
if !failsafeModeActive {
return "", fmt.Errorf("failsafe mode is not active")
}

return failsafeCrashLog, nil
}
Loading