Skip to content

Commit

Permalink
try adding very basic of ci/tests (#1)
Browse files Browse the repository at this point in the history
* try adding very basic of ci/tests
* hyperqueue not working yet - next item of action

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jun 4, 2023
1 parent 50add74 commit 6f9e774
Show file tree
Hide file tree
Showing 16 changed files with 293 additions and 26 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/build-deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: build hyperqueue-operator

on:
pull_request: []
push:
branches:
- main
workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
# catalog and bundle were removed, nobody using
command: [docker]

name: make and build ${{ matrix.command }}
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- uses: actions/setup-go@v3
with:
go-version: ^1.18.1
- name: GHCR Login
if: (github.event_name != 'pull_request')
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build Container
run: make ${{ matrix.command }}-build

- name: Deploy Container
if: (github.event_name != 'pull_request')
run: make ${{ matrix.command }}-push
90 changes: 90 additions & 0 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
name: test hyperqueue-operator

on:
pull_request: []

jobs:
formatting:
name: Formatting
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Setup black linter
run: conda create --quiet --name black pyflakes

- name: Check Spelling
uses: crate-ci/typos@7ad296c72fa8265059cc03d1eda562fbdfcd6df2 # v1.9.0
with:
files: ./README.md ./config/samples

unit-tests:
name: Unit Tests
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ^1.18
- name: fmt check
run: make fmt

# These aren't written yet
- name: Run Unit tests
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: make test

# Ensure build-config is the same as the one we have
- name: Check Updated hyperqueue-operator.yaml
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
cp examples/dist/hyperqueue-operator.yaml /tmp/hyperqueue-operator.yaml
make build-config
diff examples/dist/hyperqueue-operator.yaml /tmp/hyperqueue-operator.yaml
test-jobs:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
test: [["hello-world", "ubuntu", 20]]

steps:
- name: Clone the code
uses: actions/checkout@v3

- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ^1.18

- name: Start minikube
uses: medyagh/setup-minikube@697f2b7aaed5f70bf2a94ee21a4ec3dde7b12f92 # v0.0.9

- name: Create the namespace
run: kubectl create namespace hyperqueue-operator

- name: Pull Docker Containers to MiniKube
env:
container: ${{ matrix.test[1] }}
test: ${{ matrix[0] }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
export SHELL=/bin/bash
eval $(minikube -p minikube docker-env)
minikube ssh docker pull ${container}
make deploy-local
minikube image load ghcr.io/converged-computing/hyperqueue-operator:test
kubectl apply -f examples/dist/hyperqueue-operator-local.yaml
- name: Test ${{ matrix.test[0] }}
env:
name: ${{ matrix.test[0] }}
jobtime: ${{ matrix.test[2] }}
run: |
echo "TODO test ${name}"
# /bin/bash ./script/test.sh ${name} ${jobtime}
13 changes: 13 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ docker-build: test ## Build docker image with the manager.
docker-push: ## Push docker image with the manager.
docker push ${IMG}

# Build a local test image, load into minikube or kind and apply the build-config
.PHONY: deploy-local
deploy-local: manifests kustomize build
kubectl delete -f examples/dist/hyperqueue-operator-local.yaml || true
docker build -t ${DEVIMG} .
cd config/manager && $(KUSTOMIZE) edit set image controller=${DEVIMG}
$(KUSTOMIZE) build config/default > examples/dist/hyperqueue-operator-local.yaml
sed -i 's/ imagePullPolicy: Always/ imagePullPolicy: Never/' examples/dist/hyperqueue-operator-local.yaml

# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
# - able to use docker buildx . More info: https://docs.docker.com/build/buildx/
Expand Down Expand Up @@ -289,3 +298,7 @@ catalog-build: opm ## Build a catalog image.
.PHONY: catalog-push
catalog-push: ## Push a catalog image.
$(MAKE) docker-push IMG=$(CATALOG_IMG)

.PHONY: pre-push
pre-push: generate build-config
git status
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ This isn't tested or working yet (and I'm new to the tool so please don't expect
mkdir hyperqueue-operator
cd hyperqueue-operator/
operator-sdk init --domain flux-framework.org --repo github.com/converged-computing/hyperqueue-operator
operator-sdk create api --group hyperqueues --version v1alpha1 --kind Hyperqueue --resource --controller
operator-sdk create api --version v1alpha1 --kind Hyperqueue --resource --controller
```

## Getting Started
Expand Down Expand Up @@ -41,6 +41,9 @@ Generate the custom resource definition
```bash
# Build and push the image, and generate the examples/dist/hyperqueue-operator-dev.yaml
$ make test-deploy DEVIMG=<some-registry>/hyperqueue-operator:tag

# As an example
$ make test-deploy DEVIMG=vanessa/hyperqueue-operator:test
```

Apply the new config!
Expand All @@ -62,8 +65,6 @@ When you are done, cleanup.
$ kind delete cluster
```



### How it works

This project aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/).
Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha1/hyperqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ type HyperqueueSpec struct {
// Size of the hyperqueue (1 server + (N-1) nodes)
Size int32 `json:"size"`

// Interactive mode keeps the cluster running
// +optional
Interactive bool `json:"interactive"`

// Time limit for the job
// Approximately one year. This cannot be zero or job won't start
// +kubebuilder:default=31500000
Expand Down
3 changes: 3 additions & 0 deletions config/crd/bases/flux-framework.org_hyperqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ spec:
description: Release of Hyperqueue to installed (if hq binary not
found in PATH)
type: string
interactive:
description: Interactive mode keeps the cluster running
type: boolean
resources:
additionalProperties:
anyOf:
Expand Down
4 changes: 2 additions & 2 deletions config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
images:
- name: controller
newName: vanessa/hyperqueue-operator
newTag: test
newName: ghcr.io/converged-computing/hyperqueue-operator
newTag: latest
6 changes: 4 additions & 2 deletions controllers/hyperqueue/hyperqueue.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ func (r *HyperqueueReconciler) getCluster(
"Namespace:", job.Namespace,
"Name:", job.Name,
)
return job, ctrl.Result{}, err
// If there is an error, return the existing (empty)
return existing, ctrl.Result{}, err
}

r.Log.Info(
Expand All @@ -112,7 +113,7 @@ func (r *HyperqueueReconciler) getCluster(
"Namespace:", job.Namespace,
"Name:", job.Name,
)
return job, ctrl.Result{}, err
return existing, ctrl.Result{}, err
}
// Successful - return and requeue
return job, ctrl.Result{Requeue: true}, nil
Expand Down Expand Up @@ -159,6 +160,7 @@ func (r *HyperqueueReconciler) getConfigMap(
data["start-server"] = serverStart
data["start-worker"] = workerStart
}
fmt.Println(data)

// Create the config map with respective data!
cm = &corev1.ConfigMap{
Expand Down
43 changes: 29 additions & 14 deletions controllers/hyperqueue/jobset.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ func (r *HyperqueueReconciler) newJobSet(
cluster *api.Hyperqueue,
) (*jobset.JobSet, error) {

// When we have a success policy
// serverName := cluster.Name + "-server"

// When suspend is true we have a hard time debugging jobs, so keep false
suspend := false
jobs := jobset.JobSet{
Expand All @@ -37,6 +40,12 @@ func (r *HyperqueueReconciler) newJobSet(
},
Spec: jobset.JobSetSpec{

// The job is successful when the broker job finishes with completed (0)
//SuccessPolicy: &jobset.SuccessPolicy{
// Operator: jobset.OperatorAny,
// TargetReplicatedJobs: []string{serverName},
//},

// This might be the control for child jobs (worker)
// But I don't think we need this anymore.
Suspend: &suspend,
Expand All @@ -48,6 +57,7 @@ func (r *HyperqueueReconciler) newJobSet(
// Get leader server job, the parent in the JobSet
serverJob, err := r.getJob(cluster, cluster.Spec.Server, 1, "server", true)
if err != nil {
r.Log.Error(err, "There was an error getting the server ReplicatedJob")
return &jobs, err
}

Expand All @@ -63,6 +73,7 @@ func (r *HyperqueueReconciler) newJobSet(

workerJob, err := r.getJob(cluster, workerNode, workerNodes, "worker", true)
if err != nil {
r.Log.Error(err, "There was an error getting the worker ReplicatedJob")
return &jobs, err
}
jobs.Spec.ReplicatedJobs = []jobset.ReplicatedJob{serverJob, workerJob}
Expand Down Expand Up @@ -113,14 +124,6 @@ func (r *HyperqueueReconciler) getJob(
Replicas: 1,
}

// Do we have a pull secret for the image?
pullSecret := corev1.LocalObjectReference{}
if node.PullSecret != "" {
pullSecret = corev1.LocalObjectReference{
Name: node.PullSecret,
}
}

// Create the JobSpec for the job -> Template -> Spec
jobspec := batchv1.JobSpec{
BackoffLimit: &backoffLimit,
Expand All @@ -138,18 +141,25 @@ func (r *HyperqueueReconciler) getJob(
},
Spec: corev1.PodSpec{
// matches the service
Subdomain: serviceName,
Volumes: getVolumes(cluster),
RestartPolicy: corev1.RestartPolicyOnFailure,
ImagePullSecrets: []corev1.LocalObjectReference{pullSecret},
Subdomain: serviceName,
Volumes: getVolumes(cluster),
RestartPolicy: corev1.RestartPolicyOnFailure,
},
},
}

// Do we have a pull secret for the image?
if node.PullSecret != "" {
jobspec.Template.Spec.ImagePullSecrets = []corev1.LocalObjectReference{
{Name: node.PullSecret},
}
}

// Get resources for the node (server or worker)
resources, err := r.getNodeResources(cluster, node)
r.Log.Info("🌀 Hyperqueue", "Pod.Resources", resources)
r.Log.Info("👑️ Hyperqueue", "Pod.Resources", resources)
if err != nil {
r.Log.Info("🌀 Hyperqueue", "Pod.Resources", resources)
r.Log.Error(err, "❌ Hyperqueue", "Pod.Resources", resources)
return job, err
}
jobspec.Template.Spec.Overhead = resources
Expand All @@ -161,6 +171,11 @@ func (r *HyperqueueReconciler) getJob(
mounts,
entrypoint,
)
// Error creating containers
if err != nil {
r.Log.Error(err, "❌ Hyperqueue", "Pod.Resources", resources)
return job, err
}
jobspec.Template.Spec.Containers = containers
job.Template.Spec = jobspec
return job, err
Expand Down
6 changes: 5 additions & 1 deletion controllers/hyperqueue/templates/components.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,8 @@ workdir=${PWD}
mkdir -p ${workdir}

# End init logic
{{end}}
{{end}}

{{define "exit"}}
{{ if .Spec.Interactive }}sleep infinity{{ end }}
{{ end }}
4 changes: 3 additions & 1 deletion controllers/hyperqueue/templates/server.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/sh

# This script handles start logic for the broker
# This script handles shared start logic
{{template "init" .}}

# Start the server
Expand All @@ -12,3 +12,5 @@ else
hq server start &
hq server submit $@
fi

{{template "exit" .}}
3 changes: 2 additions & 1 deletion controllers/hyperqueue/templates/worker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

# Shared logic to install hq
{{template "init" .}}
hq worker start
hq worker start
{{template "exit" .}}
6 changes: 5 additions & 1 deletion examples/dist/hyperqueue-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ spec:
default: 0.15.0
description: Release of Hyperqueue to installed (if hq binary not found in PATH)
type: string
interactive:
description: Interactive mode keeps the cluster running
type: boolean
resources:
additionalProperties:
anyOf:
Expand Down Expand Up @@ -694,7 +697,8 @@ spec:
- --leader-elect
command:
- /manager
image: vanessa/hyperqueue-operator:test
image: ghcr.io/converged-computing/hyperqueue-operator:latest
imagePullPolicy: Always
livenessProbe:
httpGet:
path: /healthz
Expand Down

0 comments on commit 6f9e774

Please sign in to comment.