diff --git a/v1/providers/nebius/instance.go b/v1/providers/nebius/instance.go index b474773..6bc058e 100644 --- a/v1/providers/nebius/instance.go +++ b/v1/providers/nebius/instance.go @@ -15,7 +15,9 @@ import ( ) const ( - platformTypeCPU = "cpu" + platformTypeCPU = "cpu" + nebiusGPUImageFamily = "ubuntu24.04-cuda13.0" + nebiusCPUImageFamily = "ubuntu24.04-driverless" ) //nolint:gocyclo,funlen // Complex instance creation with resource management @@ -1151,10 +1153,7 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan diskName := fmt.Sprintf("%s-boot-disk", attrs.RefID) // Try to use image family first, then fallback to specific image ID - createReq, err := c.buildDiskCreateRequest(ctx, diskName, attrs) - if err != nil { - return "", fmt.Errorf("failed to build disk create request: %w", err) - } + createReq := c.buildDiskCreateRequest(ctx, diskName, attrs) operation, err := c.sdk.Services().Compute().V1().Disk().Create(ctx, createReq) if err != nil { @@ -1180,12 +1179,14 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan return diskID, nil } -// buildDiskCreateRequest builds a disk creation request, trying image family first, then image ID -func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName string, attrs v1.CreateInstanceAttrs) (*compute.CreateDiskRequest, error) { +// buildDiskCreateRequest builds a disk creation request using the fixed Nebius image family for the instance type. +func (c *NebiusClient) buildDiskCreateRequest(_ context.Context, diskName string, attrs v1.CreateInstanceAttrs) *compute.CreateDiskRequest { if attrs.DiskSize == 0 { attrs.DiskSize = 1280 * units.Gibibyte // Defaulted by the Nebius Console } + imageFamily := getNebiusBootImageFamily(attrs.InstanceType) + baseReq := &compute.CreateDiskRequest{ Metadata: &common.ResourceMetadata{ ParentId: c.projectID, @@ -1194,6 +1195,7 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri "created-by": "brev-cloud-sdk", "brev-user": c.refID, "environment-id": attrs.RefID, + "image-family": imageFamily, }, }, Spec: &compute.DiskSpec{ @@ -1201,141 +1203,37 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri SizeGibibytes: int64(attrs.DiskSize / units.Gibibyte), }, Type: compute.DiskSpec_NETWORK_SSD, - }, - } - - // First, try to resolve and use image family - if imageFamily, err := c.resolveImageFamily(ctx, attrs.ImageID); err == nil { - publicImagesParent := c.getPublicImagesParent() - - // Skip validation for known-good common families to speed up instance start - knownFamilies := []string{"ubuntu22.04-cuda12", "mk8s-worker-node-v-1-32-ubuntu24.04", "mk8s-worker-node-v-1-32-ubuntu24.04-cuda12.8"} - isKnownFamily := false - for _, known := range knownFamilies { - if imageFamily == known { - isKnownFamily = true - break - } - } - - if isKnownFamily { - // Use known family without validation - baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{ + Source: &compute.DiskSpec_SourceImageFamily{ SourceImageFamily: &compute.SourceImageFamily{ ImageFamily: imageFamily, - ParentId: publicImagesParent, + ParentId: c.getPublicImagesParent(), }, - } - baseReq.Metadata.Labels["image-family"] = imageFamily - return baseReq, nil - } - - // For unknown families, validate first - _, err := c.sdk.Services().Compute().V1().Image().GetLatestByFamily(ctx, &compute.GetImageLatestByFamilyRequest{ - ParentId: publicImagesParent, - ImageFamily: imageFamily, - }) - if err == nil { - // Family works, use it - baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{ - SourceImageFamily: &compute.SourceImageFamily{ - ImageFamily: imageFamily, - ParentId: publicImagesParent, - }, - } - baseReq.Metadata.Labels["image-family"] = imageFamily - return baseReq, nil - } - } - - // Family approach failed, try to use a known working public image ID - publicImageID, err := c.getWorkingPublicImageID(ctx, attrs.ImageID) - if err == nil { - baseReq.Spec.Source = &compute.DiskSpec_SourceImageId{ - SourceImageId: publicImageID, - } - baseReq.Metadata.Labels["source-image-id"] = publicImageID - return baseReq, nil + }, + }, } - // Both approaches failed - return nil, fmt.Errorf("could not resolve image %s to either a working family or image ID: %w", attrs.ImageID, err) + return baseReq } -// getWorkingPublicImageID gets a working public image ID based on the requested image type -// -//nolint:gocognit,gocyclo // Complex function trying multiple image resolution strategies -func (c *NebiusClient) getWorkingPublicImageID(ctx context.Context, requestedImage string) (string, error) { - // Get available public images from the correct region - publicImagesParent := c.getPublicImagesParent() - imagesResp, err := c.sdk.Services().Compute().V1().Image().List(ctx, &compute.ListImagesRequest{ - ParentId: publicImagesParent, - }) - if err != nil { - return "", fmt.Errorf("failed to list public images: %w", err) - } - - if len(imagesResp.GetItems()) == 0 { - return "", fmt.Errorf("no public images available") - } - - // Try to find the best match based on the requested image - requestedLower := strings.ToLower(requestedImage) - - var bestMatch *compute.Image - var fallbackImage *compute.Image - - for _, image := range imagesResp.GetItems() { - if image.Metadata == nil { - continue - } - - imageName := strings.ToLower(image.Metadata.Name) - - // Set fallback to first available image - if fallbackImage == nil { - fallbackImage = image - } - - // Look for Ubuntu matches - if strings.Contains(requestedLower, "ubuntu") && strings.Contains(imageName, "ubuntu") { - // Prefer specific version matches - //nolint:gocritic // if-else chain is clearer than switch for version matching logic - if strings.Contains(requestedLower, "24.04") || strings.Contains(requestedLower, "24") { - if strings.Contains(imageName, "ubuntu24.04") { - bestMatch = image - break - } - } else if strings.Contains(requestedLower, "22.04") || strings.Contains(requestedLower, "22") { - if strings.Contains(imageName, "ubuntu22.04") { - bestMatch = image - break - } - } else if strings.Contains(requestedLower, "20.04") || strings.Contains(requestedLower, "20") { - if strings.Contains(imageName, "ubuntu20.04") { - bestMatch = image - break - } - } - - // Any Ubuntu image is better than non-Ubuntu - if bestMatch == nil { - bestMatch = image - } - } +func getNebiusBootImageFamily(instanceType string) string { + if isNebiusGPUInstanceType(instanceType) { + return nebiusGPUImageFamily } + return nebiusCPUImageFamily +} - // Use best match if found, otherwise fallback - selectedImage := bestMatch - if selectedImage == nil { - selectedImage = fallbackImage +func isNebiusGPUInstanceType(instanceType string) bool { + instanceTypeLower := strings.ToLower(instanceType) + if instanceTypeLower == "" { + return false } - if selectedImage == nil { - return "", fmt.Errorf("no suitable public image found") + // GPU instance types start with "gpu-" (see getInstanceTypesForLocation) + if strings.HasPrefix(instanceTypeLower, "gpu-") { + return true } - return selectedImage.Metadata.Id, nil + return false } // getPublicImagesParent determines the correct public images parent ID based on project routing code @@ -1575,73 +1473,6 @@ func (c *NebiusClient) parseInstanceType(ctx context.Context, instanceTypeID str return "", "", fmt.Errorf("could not parse instance type %s or find suitable platform/preset", instanceTypeID) } -// resolveImageFamily resolves an ImageID to an image family name -// If ImageID is already a family name, use it directly -// Otherwise, try to get the image and extract its family -// -//nolint:gocyclo,unparam // Complex image family resolution with fallback logic -func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (string, error) { - // Common Nebius image families - if ImageID matches one of these, use it directly - commonFamilies := []string{ - "ubuntu22.04-cuda12", - "mk8s-worker-node-v-1-32-ubuntu24.04", - "mk8s-worker-node-v-1-32-ubuntu24.04-cuda12.8", - "mk8s-worker-node-v-1-31-ubuntu24.04-cuda12", - "ubuntu22.04", - "ubuntu20.04", - } - - // Check if ImageID is already a known family name - for _, family := range commonFamilies { - if imageID == family { - return family, nil - } - } - - // If ImageID looks like a family name pattern (contains dots, dashes, no UUIDs) - // and doesn't look like a UUID, assume it's a family name - if !strings.Contains(imageID, "-") || len(imageID) < 32 { - // Likely a family name, use it directly - return imageID, nil - } - - // If it looks like a UUID/ID, try to get the image and extract its family - image, err := c.sdk.Services().Compute().V1().Image().Get(ctx, &compute.GetImageRequest{ - Id: imageID, - }) - if err != nil { - // If we can't get the image, try using the ID as a family name anyway - // This allows for custom family names that don't match our patterns - return imageID, nil - } - - // Extract family from image metadata/labels if available - if image.Metadata != nil && image.Metadata.Labels != nil { - if family, exists := image.Metadata.Labels["family"]; exists && family != "" { - return family, nil - } - if family, exists := image.Metadata.Labels["image-family"]; exists && family != "" { - return family, nil - } - } - - // Extract family from image name as fallback - if image.Metadata != nil && image.Metadata.Name != "" { - // Try to extract a reasonable family name from the image name - name := strings.ToLower(image.Metadata.Name) - if strings.Contains(name, "ubuntu22") || strings.Contains(name, "ubuntu-22") { - return "ubuntu22.04", nil - } - if strings.Contains(name, "ubuntu20") || strings.Contains(name, "ubuntu-20") { - return "ubuntu20.04", nil - } - } - - // Default fallback - use the original ImageID as family - // This handles cases where users provide custom family names - return imageID, nil -} - // deleteBootDisk deletes a boot disk by ID func (c *NebiusClient) deleteBootDisk(ctx context.Context, diskID string) error { operation, err := c.sdk.Services().Compute().V1().Disk().Delete(ctx, &compute.DeleteDiskRequest{ diff --git a/v1/providers/nebius/instance_test.go b/v1/providers/nebius/instance_test.go index 389dea2..6550484 100644 --- a/v1/providers/nebius/instance_test.go +++ b/v1/providers/nebius/instance_test.go @@ -270,6 +270,36 @@ func TestExtractGPUTypeAndName(t *testing.T) { } } +func TestGetNebiusBootImageFamily(t *testing.T) { + tests := []struct { + name string + instanceType string + expected string + }{ + { + name: "gpu dot format uses cuda image", + instanceType: "gpu-h100-sxm.8gpu-128vcpu-1600gb", + expected: nebiusGPUImageFamily, + }, + { + name: "cpu dot format uses driverless image", + instanceType: "cpu-e2.4vcpu-16gb", + expected: nebiusCPUImageFamily, + }, + { + name: "empty instance type defaults to cpu image", + instanceType: "", + expected: nebiusCPUImageFamily, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, getNebiusBootImageFamily(tt.instanceType)) + }) + } +} + func TestIsPlatformSupported(t *testing.T) { client := createTestClient()