-
Notifications
You must be signed in to change notification settings - Fork 11
/
sandbox_create.go
200 lines (147 loc) · 6.24 KB
/
sandbox_create.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
package handlers
import (
"context"
"fmt"
"net/http"
"time"
"github.com/gin-gonic/gin"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/semaphore"
"github.com/e2b-dev/infra/packages/api/internal/api"
"github.com/e2b-dev/infra/packages/api/internal/auth"
"github.com/e2b-dev/infra/packages/api/internal/cache/instance"
"github.com/e2b-dev/infra/packages/api/internal/utils"
"github.com/e2b-dev/infra/packages/shared/pkg/models"
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
)
const defaultRequestLimit = 16
const InstanceIDPrefix = "i"
var postSandboxParallelLimit = semaphore.NewWeighted(defaultRequestLimit)
func (a *APIStore) PostSandboxes(c *gin.Context) {
ctx := c.Request.Context()
telemetry.ReportEvent(ctx, "Parsed body")
body, err := utils.ParseBody[api.PostSandboxesJSONRequestBody](ctx, c)
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))
errMsg := fmt.Errorf("error when parsing request: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
return
}
span := trace.SpanFromContext(ctx)
c.Set("traceID", span.SpanContext().TraceID().String())
cleanedAliasOrEnvID, err := utils.CleanEnvID(body.TemplateID)
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Invalid environment ID: %s", err))
errMsg := fmt.Errorf("error when cleaning env ID: %w", err)
telemetry.ReportCriticalError(ctx, errMsg)
return
}
telemetry.ReportEvent(ctx, "Cleaned sandbox ID")
// Get team from context, use TeamContextKey
team := c.Value(auth.TeamContextKey).(models.Team)
// Check if team has access to the environment
env, build, checkErr := a.CheckTeamAccessEnv(ctx, cleanedAliasOrEnvID, team.ID, true)
if checkErr != nil {
errMsg := fmt.Errorf("error when checking team access: %w", checkErr)
telemetry.ReportCriticalError(ctx, errMsg)
a.sendAPIStoreError(c, http.StatusInternalServerError, fmt.Sprintf("Error when checking team access: %s", checkErr))
return
}
telemetry.ReportEvent(ctx, "Checked team access")
var alias string
if env.Aliases != nil && len(*env.Aliases) > 0 {
alias = (*env.Aliases)[0]
}
c.Set("envID", env.TemplateID)
c.Set("teamID", team.ID.String())
telemetry.SetAttributes(ctx,
attribute.String("env.team.id", team.ID.String()),
attribute.String("env.id", env.TemplateID),
attribute.String("env.alias", alias),
attribute.String("env.kernel.version", build.KernelVersion),
attribute.String("env.firecracker.version", build.FirecrackerVersion),
)
telemetry.ReportEvent(ctx, "waiting for create sandbox parallel limit semaphore slot")
limitErr := postSandboxParallelLimit.Acquire(ctx, 1)
if limitErr != nil {
errMsg := fmt.Errorf("error when acquiring parallel lock: %w", limitErr)
telemetry.ReportCriticalError(ctx, errMsg)
a.sendAPIStoreError(c, http.StatusInternalServerError, "Request canceled or timed out.")
return
}
defer postSandboxParallelLimit.Release(1)
telemetry.ReportEvent(ctx, "create sandbox parallel limit semaphore slot acquired")
// Check if team has reached max instances
maxInstancesPerTeam := team.Edges.TeamTier.ConcurrentInstances
sandboxID := InstanceIDPrefix + utils.GenerateID()
err, releaseTeamSandboxReservation := a.instanceCache.Reserve(sandboxID, team.ID, maxInstancesPerTeam)
if err != nil {
errMsg := fmt.Errorf("team '%s' has reached the maximum number of instances (%d)", team.ID, team.Edges.TeamTier.ConcurrentInstances)
telemetry.ReportCriticalError(ctx, fmt.Errorf("%w (error: %w)", errMsg, err))
a.sendAPIStoreError(c, http.StatusForbidden, fmt.Sprintf(
"You have reached the maximum number of concurrent E2B sandboxes (%d). If you need more, "+
"please contact us at 'https://e2b.dev/docs/getting-help'", maxInstancesPerTeam))
return
}
telemetry.ReportEvent(ctx, "Reserved team sandbox slot")
defer releaseTeamSandboxReservation()
var metadata map[string]string
if body.Metadata != nil {
metadata = *body.Metadata
}
sandbox, instanceErr := a.orchestrator.CreateSandbox(a.tracer, ctx, sandboxID, env.TemplateID, alias, team.ID.String(), build.ID.String(), team.Edges.TeamTier.MaxLengthHours, metadata, build.KernelVersion, build.FirecrackerVersion)
if instanceErr != nil {
errMsg := fmt.Errorf("error when creating instance: %w", instanceErr)
telemetry.ReportCriticalError(ctx, errMsg)
apiErr := api.Error{
Code: http.StatusInternalServerError,
Message: errMsg.Error(),
}
a.sendAPIStoreError(c, int(apiErr.Code), apiErr.Message)
return
}
telemetry.ReportEvent(ctx, "Created sandbox")
if cacheErr := a.instanceCache.Add(instance.InstanceInfo{
StartTime: nil,
Instance: sandbox,
BuildID: &build.ID,
TeamID: &team.ID,
Metadata: metadata,
MaxInstanceLength: time.Duration(team.Edges.TeamTier.MaxLengthHours) * time.Hour,
}); cacheErr != nil {
errMsg := fmt.Errorf("error when adding instance to cache: %w", cacheErr)
telemetry.ReportError(ctx, errMsg)
delErr := a.DeleteInstance(sandbox.SandboxID, true)
if delErr != nil {
delErrMsg := fmt.Errorf("couldn't delete instance that couldn't be added to cache: %w", delErr.Err)
telemetry.ReportError(ctx, delErrMsg)
} else {
telemetry.ReportEvent(ctx, "deleted instance that couldn't be added to cache")
}
a.sendAPIStoreError(c, http.StatusInternalServerError, "Cannot create a sandbox right now")
return
}
c.Set("instanceID", sandbox.SandboxID)
telemetry.ReportEvent(ctx, "Added sandbox to cache")
a.posthog.IdentifyAnalyticsTeam(team.ID.String(), team.Name)
properties := a.posthog.GetPackageToPosthogProperties(&c.Request.Header)
a.posthog.CreateAnalyticsTeamEvent(team.ID.String(), "created_instance",
properties.
Set("environment", env.TemplateID).
Set("instance_id", sandbox.SandboxID).
Set("alias", alias),
)
telemetry.ReportEvent(ctx, "Created analytics event")
go func() {
err = a.db.UpdateEnvLastUsed(context.Background(), env.TemplateID)
if err != nil {
a.logger.Errorf("Error when updating last used for env: %s", err)
}
}()
telemetry.SetAttributes(ctx,
attribute.String("instance.id", sandbox.SandboxID),
)
a.logger.Infof("Created sandbox '%s' for team '%s'", sandbox.SandboxID, team.ID)
c.JSON(http.StatusCreated, &sandbox)
}