From e8ad201d03478e7c0251438b7a68313be57b748a Mon Sep 17 00:00:00 2001 From: Darren Shepherd Date: Tue, 5 Nov 2024 20:51:22 -0700 Subject: [PATCH] bug: fix dataset leaking workspace directories --- pkg/engine/http.go | 8 +++++--- pkg/loader/loader.go | 25 ++++++++++++++++++++++++- pkg/openai/client.go | 2 +- pkg/runner/input.go | 3 +++ pkg/runner/output.go | 3 +++ pkg/sdkserver/datasets.go | 11 +++++++++++ 6 files changed, 47 insertions(+), 5 deletions(-) diff --git a/pkg/engine/http.go b/pkg/engine/http.go index 87348ce9..a30c01e1 100644 --- a/pkg/engine/http.go +++ b/pkg/engine/http.go @@ -5,9 +5,11 @@ import ( "encoding/json" "fmt" "io" + "maps" "net/http" "net/url" "os" + "slices" "strings" "github.com/gptscript-ai/gptscript/pkg/types" @@ -75,9 +77,9 @@ func (e *Engine) runHTTP(ctx context.Context, prg *types.Program, tool types.Too return nil, err } - for _, env := range e.Env { - if strings.HasPrefix(env, "GPTSCRIPT_WORKSPACE_") { - req.Header.Add("X-GPTScript-Env", env) + for _, k := range slices.Sorted(maps.Keys(envMap)) { + if strings.HasPrefix(k, "GPTSCRIPT_WORKSPACE_") { + req.Header.Add("X-GPTScript-Env", k+"="+envMap[k]) } } diff --git a/pkg/loader/loader.go b/pkg/loader/loader.go index 80342f2b..f2679c6f 100644 --- a/pkg/loader/loader.go +++ b/pkg/loader/loader.go @@ -29,6 +29,18 @@ import ( const CacheTimeout = time.Hour +var Remap = map[string]string{} + +func init() { + remap := os.Getenv("GPTSCRIPT_TOOL_REMAP") + for _, pair := range strings.Split(remap, ",") { + k, v, ok := strings.Cut(pair, "=") + if ok { + Remap[k] = v + } + } +} + type source struct { // Content The content of the source Content []byte @@ -68,8 +80,19 @@ func openFile(path string) (io.ReadCloser, bool, error) { } func loadLocal(base *source, name string) (*source, bool, error) { + var remapped bool + if !strings.HasPrefix(name, ".") { + for k, v := range Remap { + if strings.HasPrefix(name, k) { + name = v + name[len(k):] + remapped = true + break + } + } + } + filePath := name - if !filepath.IsAbs(name) { + if !remapped && !filepath.IsAbs(name) { // We want to keep all strings in / format, and only convert to platform specific when reading // This is why we use path instead of filepath. filePath = path.Join(base.Path, name) diff --git a/pkg/openai/client.go b/pkg/openai/client.go index 6178c997..3dbbca44 100644 --- a/pkg/openai/client.go +++ b/pkg/openai/client.go @@ -617,7 +617,7 @@ func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest, } partialMessage = appendMessage(partialMessage, response) if partial != nil { - if time.Since(start) > 500*time.Millisecond { + if time.Since(start) > 100*time.Millisecond { last = last[:0] partial <- types.CompletionStatus{ CompletionID: transactionID, diff --git a/pkg/runner/input.go b/pkg/runner/input.go index 360e6274..04d17cc3 100644 --- a/pkg/runner/input.go +++ b/pkg/runner/input.go @@ -15,6 +15,9 @@ func (r *Runner) handleInput(callCtx engine.Context, monitor Monitor, env []stri } for _, inputToolRef := range inputToolRefs { + if callCtx.Program.ToolSet[inputToolRef.ToolID].IsNoop() { + continue + } data := map[string]any{} _ = json.Unmarshal([]byte(input), &data) data["input"] = input diff --git a/pkg/runner/output.go b/pkg/runner/output.go index 5f1d2818..87e9670f 100644 --- a/pkg/runner/output.go +++ b/pkg/runner/output.go @@ -76,6 +76,9 @@ func (r *Runner) handleOutput(callCtx engine.Context, monitor Monitor, env []str } for _, outputToolRef := range outputToolRefs { + if callCtx.Program.ToolSet[outputToolRef.ToolID].IsNoop() { + continue + } inputData, err := argsForFilters(callCtx.Program, outputToolRef, startState, map[string]any{ "output": output, "continuation": continuation, diff --git a/pkg/sdkserver/datasets.go b/pkg/sdkserver/datasets.go index e922cd97..c00308e7 100644 --- a/pkg/sdkserver/datasets.go +++ b/pkg/sdkserver/datasets.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "net/http" + "strings" gcontext "github.com/gptscript-ai/gptscript/pkg/context" "github.com/gptscript-ai/gptscript/pkg/gptscript" @@ -39,6 +40,12 @@ func (r datasetRequest) opts(o gptscript.Options) gptscript.Options { Monitor: o.Monitor, Runner: o.Runner, } + for _, e := range r.Env { + v, ok := strings.CutPrefix(e, "GPTSCRIPT_WORKSPACE_ID=") + if ok { + opts.Workspace = v + } + } return opts } @@ -61,6 +68,7 @@ func (s *server) listDatasets(w http.ResponseWriter, r *http.Request) { writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err)) return } + defer g.Close(false) prg, err := loader.Program(r.Context(), s.getDatasetTool(req), "List Datasets", loader.Options{ Cache: g.Cache, @@ -118,6 +126,7 @@ func (s *server) addDatasetElements(w http.ResponseWriter, r *http.Request) { writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err)) return } + defer g.Close(false) var args addDatasetElementsArgs if err := json.Unmarshal([]byte(req.Input), &args); err != nil { @@ -177,6 +186,7 @@ func (s *server) listDatasetElements(w http.ResponseWriter, r *http.Request) { writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err)) return } + defer g.Close(false) var args listDatasetElementsArgs if err := json.Unmarshal([]byte(req.Input), &args); err != nil { @@ -239,6 +249,7 @@ func (s *server) getDatasetElement(w http.ResponseWriter, r *http.Request) { writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err)) return } + defer g.Close(false) var args getDatasetElementArgs if err := json.Unmarshal([]byte(req.Input), &args); err != nil {