Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 27 additions & 196 deletions v1/providers/nebius/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ import (
)

const (
platformTypeCPU = "cpu"
platformTypeCPU = "cpu"
nebiusGPUImageFamily = "ubuntu24.04-cuda13.0"
nebiusCPUImageFamily = "ubuntu24.04-driverless"
)

//nolint:gocyclo,funlen // Complex instance creation with resource management
Expand Down Expand Up @@ -1151,10 +1153,7 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan
diskName := fmt.Sprintf("%s-boot-disk", attrs.RefID)

// Try to use image family first, then fallback to specific image ID
createReq, err := c.buildDiskCreateRequest(ctx, diskName, attrs)
if err != nil {
return "", fmt.Errorf("failed to build disk create request: %w", err)
}
createReq := c.buildDiskCreateRequest(ctx, diskName, attrs)

operation, err := c.sdk.Services().Compute().V1().Disk().Create(ctx, createReq)
if err != nil {
Expand All @@ -1180,12 +1179,14 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan
return diskID, nil
}

// buildDiskCreateRequest builds a disk creation request, trying image family first, then image ID
func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName string, attrs v1.CreateInstanceAttrs) (*compute.CreateDiskRequest, error) {
// buildDiskCreateRequest builds a disk creation request using the fixed Nebius image family for the instance type.
func (c *NebiusClient) buildDiskCreateRequest(_ context.Context, diskName string, attrs v1.CreateInstanceAttrs) *compute.CreateDiskRequest {
if attrs.DiskSize == 0 {
attrs.DiskSize = 1280 * units.Gibibyte // Defaulted by the Nebius Console
}

imageFamily := getNebiusBootImageFamily(attrs.InstanceType)

baseReq := &compute.CreateDiskRequest{
Metadata: &common.ResourceMetadata{
ParentId: c.projectID,
Expand All @@ -1194,148 +1195,45 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
"created-by": "brev-cloud-sdk",
"brev-user": c.refID,
"environment-id": attrs.RefID,
"image-family": imageFamily,
},
},
Spec: &compute.DiskSpec{
Size: &compute.DiskSpec_SizeGibibytes{
SizeGibibytes: int64(attrs.DiskSize / units.Gibibyte),
},
Type: compute.DiskSpec_NETWORK_SSD,
},
}

// First, try to resolve and use image family
if imageFamily, err := c.resolveImageFamily(ctx, attrs.ImageID); err == nil {
publicImagesParent := c.getPublicImagesParent()

// Skip validation for known-good common families to speed up instance start
knownFamilies := []string{"ubuntu22.04-cuda12", "mk8s-worker-node-v-1-32-ubuntu24.04", "mk8s-worker-node-v-1-32-ubuntu24.04-cuda12.8"}
isKnownFamily := false
for _, known := range knownFamilies {
if imageFamily == known {
isKnownFamily = true
break
}
}

if isKnownFamily {
// Use known family without validation
baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{
Source: &compute.DiskSpec_SourceImageFamily{
SourceImageFamily: &compute.SourceImageFamily{
ImageFamily: imageFamily,
ParentId: publicImagesParent,
ParentId: c.getPublicImagesParent(),
},
}
baseReq.Metadata.Labels["image-family"] = imageFamily
return baseReq, nil
}

// For unknown families, validate first
_, err := c.sdk.Services().Compute().V1().Image().GetLatestByFamily(ctx, &compute.GetImageLatestByFamilyRequest{
ParentId: publicImagesParent,
ImageFamily: imageFamily,
})
if err == nil {
// Family works, use it
baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{
SourceImageFamily: &compute.SourceImageFamily{
ImageFamily: imageFamily,
ParentId: publicImagesParent,
},
}
baseReq.Metadata.Labels["image-family"] = imageFamily
return baseReq, nil
}
}

// Family approach failed, try to use a known working public image ID
publicImageID, err := c.getWorkingPublicImageID(ctx, attrs.ImageID)
if err == nil {
baseReq.Spec.Source = &compute.DiskSpec_SourceImageId{
SourceImageId: publicImageID,
}
baseReq.Metadata.Labels["source-image-id"] = publicImageID
return baseReq, nil
},
},
}

// Both approaches failed
return nil, fmt.Errorf("could not resolve image %s to either a working family or image ID: %w", attrs.ImageID, err)
return baseReq
}

// getWorkingPublicImageID gets a working public image ID based on the requested image type
//
//nolint:gocognit,gocyclo // Complex function trying multiple image resolution strategies
func (c *NebiusClient) getWorkingPublicImageID(ctx context.Context, requestedImage string) (string, error) {
// Get available public images from the correct region
publicImagesParent := c.getPublicImagesParent()
imagesResp, err := c.sdk.Services().Compute().V1().Image().List(ctx, &compute.ListImagesRequest{
ParentId: publicImagesParent,
})
if err != nil {
return "", fmt.Errorf("failed to list public images: %w", err)
}

if len(imagesResp.GetItems()) == 0 {
return "", fmt.Errorf("no public images available")
}

// Try to find the best match based on the requested image
requestedLower := strings.ToLower(requestedImage)

var bestMatch *compute.Image
var fallbackImage *compute.Image

for _, image := range imagesResp.GetItems() {
if image.Metadata == nil {
continue
}

imageName := strings.ToLower(image.Metadata.Name)

// Set fallback to first available image
if fallbackImage == nil {
fallbackImage = image
}

// Look for Ubuntu matches
if strings.Contains(requestedLower, "ubuntu") && strings.Contains(imageName, "ubuntu") {
// Prefer specific version matches
//nolint:gocritic // if-else chain is clearer than switch for version matching logic
if strings.Contains(requestedLower, "24.04") || strings.Contains(requestedLower, "24") {
if strings.Contains(imageName, "ubuntu24.04") {
bestMatch = image
break
}
} else if strings.Contains(requestedLower, "22.04") || strings.Contains(requestedLower, "22") {
if strings.Contains(imageName, "ubuntu22.04") {
bestMatch = image
break
}
} else if strings.Contains(requestedLower, "20.04") || strings.Contains(requestedLower, "20") {
if strings.Contains(imageName, "ubuntu20.04") {
bestMatch = image
break
}
}

// Any Ubuntu image is better than non-Ubuntu
if bestMatch == nil {
bestMatch = image
}
}
func getNebiusBootImageFamily(instanceType string) string {
if isNebiusGPUInstanceType(instanceType) {
return nebiusGPUImageFamily
}
return nebiusCPUImageFamily
}

// Use best match if found, otherwise fallback
selectedImage := bestMatch
if selectedImage == nil {
selectedImage = fallbackImage
func isNebiusGPUInstanceType(instanceType string) bool {
instanceTypeLower := strings.ToLower(instanceType)
if instanceTypeLower == "" {
return false
}

if selectedImage == nil {
return "", fmt.Errorf("no suitable public image found")
// GPU instance types start with "gpu-" (see getInstanceTypesForLocation)
if strings.HasPrefix(instanceTypeLower, "gpu-") {
return true
}

return selectedImage.Metadata.Id, nil
return false
}

// getPublicImagesParent determines the correct public images parent ID based on project routing code
Expand Down Expand Up @@ -1575,73 +1473,6 @@ func (c *NebiusClient) parseInstanceType(ctx context.Context, instanceTypeID str
return "", "", fmt.Errorf("could not parse instance type %s or find suitable platform/preset", instanceTypeID)
}

// resolveImageFamily resolves an ImageID to an image family name
// If ImageID is already a family name, use it directly
// Otherwise, try to get the image and extract its family
//
//nolint:gocyclo,unparam // Complex image family resolution with fallback logic
func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (string, error) {
// Common Nebius image families - if ImageID matches one of these, use it directly
commonFamilies := []string{
"ubuntu22.04-cuda12",
"mk8s-worker-node-v-1-32-ubuntu24.04",
"mk8s-worker-node-v-1-32-ubuntu24.04-cuda12.8",
"mk8s-worker-node-v-1-31-ubuntu24.04-cuda12",
"ubuntu22.04",
"ubuntu20.04",
}

// Check if ImageID is already a known family name
for _, family := range commonFamilies {
if imageID == family {
return family, nil
}
}

// If ImageID looks like a family name pattern (contains dots, dashes, no UUIDs)
// and doesn't look like a UUID, assume it's a family name
if !strings.Contains(imageID, "-") || len(imageID) < 32 {
// Likely a family name, use it directly
return imageID, nil
}

// If it looks like a UUID/ID, try to get the image and extract its family
image, err := c.sdk.Services().Compute().V1().Image().Get(ctx, &compute.GetImageRequest{
Id: imageID,
})
if err != nil {
// If we can't get the image, try using the ID as a family name anyway
// This allows for custom family names that don't match our patterns
return imageID, nil
}

// Extract family from image metadata/labels if available
if image.Metadata != nil && image.Metadata.Labels != nil {
if family, exists := image.Metadata.Labels["family"]; exists && family != "" {
return family, nil
}
if family, exists := image.Metadata.Labels["image-family"]; exists && family != "" {
return family, nil
}
}

// Extract family from image name as fallback
if image.Metadata != nil && image.Metadata.Name != "" {
// Try to extract a reasonable family name from the image name
name := strings.ToLower(image.Metadata.Name)
if strings.Contains(name, "ubuntu22") || strings.Contains(name, "ubuntu-22") {
return "ubuntu22.04", nil
}
if strings.Contains(name, "ubuntu20") || strings.Contains(name, "ubuntu-20") {
return "ubuntu20.04", nil
}
}

// Default fallback - use the original ImageID as family
// This handles cases where users provide custom family names
return imageID, nil
}

// deleteBootDisk deletes a boot disk by ID
func (c *NebiusClient) deleteBootDisk(ctx context.Context, diskID string) error {
operation, err := c.sdk.Services().Compute().V1().Disk().Delete(ctx, &compute.DeleteDiskRequest{
Expand Down
30 changes: 30 additions & 0 deletions v1/providers/nebius/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,36 @@ func TestExtractGPUTypeAndName(t *testing.T) {
}
}

func TestGetNebiusBootImageFamily(t *testing.T) {
tests := []struct {
name string
instanceType string
expected string
}{
{
name: "gpu dot format uses cuda image",
instanceType: "gpu-h100-sxm.8gpu-128vcpu-1600gb",
expected: nebiusGPUImageFamily,
},
{
name: "cpu dot format uses driverless image",
instanceType: "cpu-e2.4vcpu-16gb",
expected: nebiusCPUImageFamily,
},
{
name: "empty instance type defaults to cpu image",
instanceType: "",
expected: nebiusCPUImageFamily,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.expected, getNebiusBootImageFamily(tt.instanceType))
})
}
}

func TestIsPlatformSupported(t *testing.T) {
client := createTestClient()

Expand Down
Loading