Skip to content

Commit

Permalink
give each calicoctl command 60s to finish rather than hold the juju-m…
Browse files Browse the repository at this point in the history
…achine-lock (#95)

* give each calicoctl command 60s to finish rather than hold the juju-machine-lock

* updated tox and workflow version

* use TimeoutExpired expression rather than TimeoutError

* use juju 3.1

* Update integration tests to run with juju 3.x
  • Loading branch information
addyess committed Mar 2, 2023
1 parent 2287a08 commit a164af4
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 113 deletions.
13 changes: 10 additions & 3 deletions .github/workflows/vsphere-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,39 @@ jobs:
steps:
- name: Check out code
uses: actions/checkout@v3

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10"

- name: Setup operator environment
uses: charmed-kubernetes/actions-operator@main
with:
provider: vsphere
juju-channel: 3.1/stable
credentials-yaml: ${{ secrets.CREDENTIALS_YAML }}
clouds-yaml: ${{ secrets.CLOUDS_YAML }}
bootstrap-constraints: "arch=amd64 cores=2 mem=4G"
bootstrap-options: "${{ secrets.FOCAL_BOOTSTRAP_OPTIONS }} --model-default datastore=vsanDatastore --model-default primary-network=VLAN_2763"
bootstrap-options: "${{ secrets.JAMMY_BOOTSTRAP_OPTIONS }} --model-default datastore=vsanDatastore --model-default primary-network=VLAN_2763"

- name: Run test
run: tox -e integration
run: tox -e integration -- --basetemp=/home/ubuntu/pytest

- name: Setup Debug Artifact Collection
if: ${{ failure() }}
run: mkdir tmp

- name: Collect Juju Status
if: ${{ failure() }}
run: |
juju status 2>&1 | tee tmp/juju-status.txt
juju-crashdump -s -m controller -a debug-layer -a config -o tmp/
mv juju-crashdump-* tmp/ | true
- name: Upload debug artifacts
if: ${{ failure() }}
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: test-run-artifacts
path: tmp
27 changes: 17 additions & 10 deletions reactive/calico.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

from conctl import getContainerRuntimeCtl
from socket import gethostname
from subprocess import check_call, check_output, CalledProcessError, PIPE
from subprocess import (
check_call, check_output, CalledProcessError, PIPE, TimeoutExpired
)

from charms.leadership import leader_get, leader_set
from charms.reactive import when, when_not, when_any, set_state, remove_state
Expand Down Expand Up @@ -355,7 +357,7 @@ def configure_calico_pool():
}

calicoctl_apply(pool)
except CalledProcessError:
except (CalledProcessError, TimeoutExpired):
log(traceback.format_exc())
if config['ipip'] != 'Never' and config['vxlan'] != 'Never':
status.blocked('ipip and vxlan configs are in conflict')
Expand Down Expand Up @@ -437,7 +439,7 @@ def configure_bgp_globals():
try:
try:
bgp_config = calicoctl_get('bgpconfig', 'default')
except CalledProcessError as e:
except (CalledProcessError, TimeoutExpired) as e:
if b'resource does not exist' in e.stderr:
log('default BGPConfiguration does not exist')
bgp_config = {
Expand Down Expand Up @@ -467,7 +469,7 @@ def configure_bgp_globals():
for cidr in config['bgp-service-loadbalancer-ips'].split()
]
calicoctl_apply(bgp_config)
except CalledProcessError:
except (CalledProcessError, TimeoutExpired):
log(traceback.format_exc())
status.waiting('Waiting to retry BGP global configuration')
return
Expand Down Expand Up @@ -499,7 +501,7 @@ def configure_node():
node['spec']['bgp']['routeReflectorClusterID'] = \
route_reflector_cluster_id
calicoctl_apply(node)
except CalledProcessError:
except (CalledProcessError, TimeoutExpired):
log(traceback.format_exc())
status.waiting('Waiting to retry Calico node configuration')
return
Expand Down Expand Up @@ -573,7 +575,7 @@ def configure_bgp_peers():

for peer in peers_to_delete:
calicoctl('delete', 'bgppeer', peer)
except CalledProcessError:
except (CalledProcessError, TimeoutExpired):
log(traceback.format_exc())
status.waiting('Waiting to retry BGP peer configuration')
return
Expand Down Expand Up @@ -612,13 +614,18 @@ def ready():
status.active('Calico is active')


def calicoctl(*args):
def calicoctl(*args, timeout: int = 60):
"""Call calicoctl with specified args.
@param int timeout: If the process does not terminate after timeout seconds,
raise a TimeoutExpired exception
"""
cmd = ['/opt/calicoctl/calicoctl'] + list(args)
env = os.environ.copy()
env.update(get_calicoctl_env())
try:
return check_output(cmd, env=env, stderr=PIPE)
except CalledProcessError as e:
return check_output(cmd, env=env, stderr=PIPE, timeout=timeout)
except (CalledProcessError, TimeoutExpired) as e:
log(e.stderr)
log(e.output)
raise
Expand Down Expand Up @@ -703,7 +710,7 @@ def publish_version_to_juju():
application_version_set(version)
set_state('calico.version-published')

except (FileNotFoundError, CalledProcessError):
except (FileNotFoundError, CalledProcessError, TimeoutExpired):
log('Calico version not available')


Expand Down
78 changes: 0 additions & 78 deletions tests/data/bundle.yaml

This file was deleted.

12 changes: 12 additions & 0 deletions tests/data/charm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
description: A minimal two-machine Kubernetes cluster, appropriate for development.
series: &series {{ series }}
services:
calico:
charm: {{calico_charm}}
resources:
calico: {{resource_path}}/calico-amd64.tar.gz
calico-arm64: {{resource_path}}/calico-arm64.tar.gz
calico-node-image: {{resource_path}}/calico-node-image.tar.gz
options:
ignore-loose-rpf: true
vxlan: Always
57 changes: 48 additions & 9 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,63 @@
from kubernetes_wrapper import Kubernetes
import logging
import pytest
import pytest_asyncio
import random
import string
import yaml

log = logging.getLogger(__name__)


@pytest_asyncio.fixture(scope="module")
def pytest_addoption(parser):
parser.addoption(
"--series",
type=str,
default="",
help="Set series for the machine units",
)


@pytest.fixture(scope="module")
def k8s_core_bundle(ops_test):
return ops_test.Bundle("kubernetes-core", channel="edge")


@pytest.fixture(scope="module")
@pytest.mark.asyncio
async def k8s_core_yaml(ops_test, k8s_core_bundle):
"""Download and render the kubernetes-core bundle, return it's full yaml"""
(bundle_path,) = await ops_test.async_render_bundles(k8s_core_bundle)
return yaml.safe_load(bundle_path.read_text())


@pytest.fixture(scope="module")
def series(k8s_core_yaml, request):
series = request.config.getoption("--series")
return series if series else k8s_core_yaml["series"]


@pytest.fixture(scope="module")
@pytest.mark.asyncio
async def kubernetes(ops_test):
kubeconfig_path = ops_test.tmp_path / "kubeconfig"
retcode, stdout, stderr = await ops_test.run(
"juju", "scp", "kubernetes-control-plane/leader:config", kubeconfig_path
k_c_p = ops_test.model.applications["kubernetes-control-plane"]
(leader,) = [u for u in k_c_p.units if (await u.is_leader_from_status())]
action = await leader.run_action("get-kubeconfig")
action = await action.wait()
success = (
action.status == "completed"
and action.results["return-code"] == 0
and "kubeconfig" in action.results
)
if retcode != 0:
log.error(f"retcode: {retcode}")
log.error(f"stdout:\n{stdout.strip()}")
log.error(f"stderr:\n{stderr.strip()}")

if not success:
log.error(f"status: {action.status}")
log.error(f"results:\n{yaml.safe_dump(action.results, indent=2)}")
pytest.fail("Failed to copy kubeconfig from kubernetes-control-plane")

kubeconfig_path = ops_test.tmp_path / "kubeconfig"
with kubeconfig_path.open("w") as f:
f.write(action.results["kubeconfig"])

namespace = "test-calico-integration-" + "".join(
random.choice(string.ascii_lowercase + string.digits)
for _ in range(5)
Expand Down
38 changes: 26 additions & 12 deletions tests/integration/test_calico_integration.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import asyncio
import logging
import os
from pathlib import Path
import pytest
import shlex
import time
import yaml
log = logging.getLogger(__name__)


@pytest.mark.abort_on_fail
@pytest.mark.skip_if_deployed
async def test_build_and_deploy(ops_test):
async def test_build_and_deploy(ops_test, k8s_core_bundle, series):
log.info("Building charm")
calico_charm = await ops_test.build_charm(".")

resource_path = ops_test.tmp_path / "charm-resources"
resource_path.mkdir()
resource_build_script = os.path.abspath("./build-calico-resource.sh")
Expand All @@ -25,17 +28,23 @@ async def test_build_and_deploy(ops_test):
log.error(f"stdout:\n{stdout.strip()}")
log.error(f"stderr:\n{stderr.strip()}")
pytest.fail("Failed to build charm resources")
bundle = ops_test.render_bundle(
"tests/data/bundle.yaml",

log.info("Build Bundle...")
bundle, *overlays = await ops_test.async_render_bundles(
k8s_core_bundle,
Path("tests/data/charm.yaml"),
calico_charm=calico_charm,
series=series,
resource_path=resource_path
)
# deploy with Juju CLI because libjuju does not support local resource
# paths in bundles

log.info("Deploying bundle")
retcode, stdout, stderr = await ops_test.run(
"juju", "deploy", "-m", ops_test.model_full_name, bundle
model = ops_test.model_full_name
cmd = f"juju deploy -m {model} {bundle} " + " ".join(
f"--overlay={f}" for f in overlays
)
retcode, stdout, stderr = await ops_test.run(*shlex.split(cmd))

if retcode != 0:
log.error(f"retcode: {retcode}")
log.error(f"stdout:\n{stdout.strip()}")
Expand All @@ -60,11 +69,16 @@ async def test_build_and_deploy(ops_test):


async def juju_run(unit, cmd):
result = await unit.run(cmd)
code = result.results["Code"]
stdout = result.results.get("Stdout")
stderr = result.results.get("Stderr")
assert code == "0", f"{cmd} failed ({code}): {stderr or stdout}"
action = await unit.run(cmd)
await action.wait()
code = action.results.get("Code", action.results.get("return-code"))
if code is None:
log.error(f"Failed to find the return code in {action.results}")
return -1
code = int(code)
stdout = action.results.get("Stdout", action.results.get("stdout")) or ""
stderr = action.results.get("Stderr", action.results.get("stderr")) or ""
assert code == 0, f"{cmd} failed ({code}): {stderr or stdout}"
return stdout


Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ deps =
git+https://github.com/canonical/kubernetes-rapper@main#egg=kubernetes-wrapper
# tox only passes through the upper-case versions by default, but some
# programs, such as wget or pip, only honor the lower-case versions
passenv = http_proxy https_proxy no_proxy
passenv = http_proxy, https_proxy, no_proxy
commands = pytest --asyncio-mode=auto --tb native --show-capture=no --log-cli-level=INFO -s {posargs} {toxinidir}/tests/integration

[testenv:lint]
Expand Down

0 comments on commit a164af4

Please sign in to comment.