Skip to content

Commit

Permalink
Xelink support with suggested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
hsyrja committed Oct 30, 2023
1 parent 44562ac commit 8769a0f
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 1 deletion.
13 changes: 13 additions & 0 deletions cmd/gpu_fakedev/configs/2x4-PVC-xelink.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"Info": "2x 4 tile 4 GiB PVC [Ponte Vecchio] GPUs",
"DevCount": 2,
"TilesPerDev": 4,
"DevsPerNode": 1,
"DevMemSize": 4294967296,
"Capabilities": {
"platform": "fake_PVC",
"connections": "0.1-0.0_0.2-0.0_0.3-0.0_1.0-0.0_1.1-0.0_1.2-0.0_1.3-0.0_0.2-0.1_0.3-0.1_1.0-0.1_1.1-0.1_1.2-0.1_1.3-0.1_0.3-0.2_1.0-0.2_1.1-0.2_1.2-0.2_1.3-0.2_1.0-0.3_1.1-0.3_1.2-0.3_1.3-0.3_1.1-1.0_1.2-1.0_1.3-1.0_1.2-1.1_1.3-1.1_1.3-1.2",
"connection-topology": "RAW"
}
}

12 changes: 12 additions & 0 deletions cmd/gpu_fakedev/configs/8x2-PVC-xelink.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"Info": "8x 4 GiB PVC [Ponte Vecchio] GPUs",
"DevCount": 8,
"TilesPerDev": 2,
"DevsPerNode": 2,
"DevMemSize": 4294967296,
"Capabilities": {
"platform": "fake_PVC",
"connections": "",
"connection-topology": "FULL"
}
}
81 changes: 80 additions & 1 deletion cmd/gpu_fakedev/gpu_fakedev.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021-2022 Intel Corporation. All Rights Reserved.
// Copyright 2021-2023 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,6 +44,7 @@ import (
"os"
"path/filepath"
"strconv"
"strings"

"golang.org/x/sys/unix"
)
Expand All @@ -61,6 +62,9 @@ const (
devNullMajor = 1
devNullMinor = 3
devNullType = unix.S_IFCHR
// GPU connectivity
maxK8sLabelSize = 63
fullyConnected = "FULL"
)

var verbose bool
Expand Down Expand Up @@ -270,6 +274,8 @@ func generateDriFiles(opts genOptions) {
log.Printf("Generating fake DRI device(s) sysfs, debugfs and devfs content under '%s' & '%s'",
sysfsPath, devfsPath)

makeXelinkSideCar(opts.Capabilities["connection-topology"], opts.DevCount, opts.TilesPerDev, opts.Capabilities["connections"])

opts.dirs, opts.files = 0, 0
for i := 0; i < opts.DevCount; i++ {
if err := addSysfsDriTree(sysfsPath, &opts, i); err != nil {
Expand All @@ -291,6 +297,79 @@ func generateDriFiles(opts genOptions) {
log.Printf("Done, created %d dirs, %d devices and %d files.", opts.dirs, opts.devs, opts.files)
}

func makeXelinkSideCar(topology string, gpus, tiles int, connections string) {
if topology != fullyConnected {
log.Printf("XELINK: generate xelink sidecar label file, using (GPUs: %d, Tiles: %d)", gpus, tiles)
saveSideCarFile(connections)
} else {
log.Printf("XELINK: generate xelink sidecar label file, using (GPUs: %d, Tiles: %d, Topology: %s)", gpus, tiles, topology)
saveSideCarFile(buildConnectionList(gpus, tiles))
}
}

func buildConnectionList(gpus, tiles int) string {
var nodes = make([]string, 0)

for mm := 0; mm < gpus; mm++ {
for nn := 0; nn < tiles; nn++ {
nodes = append(nodes, fmt.Sprintf("%d.%d", mm, nn))
}
}

var links = make(map[string]struct{}, 0)

var smap = make([]string, 0)

for _, from := range nodes {
for _, to := range nodes {
// no self links, TODO ignore in-gpu xelinks
if to == from {
continue
}

link := fmt.Sprintf("%s-%s", to, from)

reverselink := fmt.Sprintf("%s-%s", from, to)
if _, exists := links[reverselink]; !exists {
links[link] = struct{}{}

smap = append(smap, link)
}
}
}

return fmt.Sprintf("%s", strings.Join(smap, "_"))
}

func saveSideCarFile(connections string) {
f, err := os.Create("xpum-sidecar-labels.txt")
if err != nil {
panic(err)
}
defer f.Close()

// Write first line without Z prefix
line := fmt.Sprintf("xpumanager.intel.com/xe-links=%s", connections[:min(len(connections), maxK8sLabelSize)])
fmt.Println(line)

if _, err := f.WriteString(line + "\n"); err != nil {
panic(err)
}

index := 2

// Write next lines with Z prefix
for i := maxK8sLabelSize; i < len(connections); i += (maxK8sLabelSize - 1) {
line := fmt.Sprintf("xpumanager.intel.com/xe-links%d=Z%s", index, connections[i:min(len(connections), i+maxK8sLabelSize-1)])
fmt.Println(line)

if _, err := f.WriteString(line + "\n"); err != nil {
panic(err)
}
index++
}
}

// getOptions parses options from given JSON file, validates and returns them.
func getOptions(name string) genOptions {
if name == "" {
Expand Down

0 comments on commit 8769a0f

Please sign in to comment.