Skip to content

Commit

Permalink
Brings back runner to enable run_tests.py
Browse files Browse the repository at this point in the history
  • Loading branch information
briansonnenberg committed Dec 8, 2023
1 parent 5e48585 commit 9bb5ddd
Show file tree
Hide file tree
Showing 23 changed files with 1,787 additions and 0 deletions.
22 changes: 22 additions & 0 deletions isotope/runner/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Runner

This subdirectory contains the Python3 _module_ for automating topology
tests. The executable "main" for this is at "../run_tests.py".

## Pseudocode

```txt
read configuration
create cluster
add prometheus
for each topology:
convert topology to Kubernetes YAML
for each environment (none, istio, sidecars only, etc.):
update Prometheus labels
deploy environment
deploy topology
run load test
delete topology
delete environment
delete cluster
```
34 changes: 34 additions & 0 deletions isotope/runner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright Istio Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Module for automating topology testing.
The pseudo-code for the intended calls for this is:
```
read configuration
create cluster
add prometheus
for each topology:
convert topology to Kubernetes YAML
for each environment (none, istio, sidecars only, etc.):
update Prometheus labels
deploy environment
deploy topology
run load test
delete topology
delete environment
delete cluster
```
"""
169 changes: 169 additions & 0 deletions isotope/runner/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# Copyright Istio Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Set up for GKE clusters and Prometheus monitoring."""

import logging
import os

from . import consts, prometheus, resources, sh, wait
from typing import List


def set_up_if_not_exists(
project_id: str, name: str, zones: List[str], version: str,
service_graph_machine_type: str, service_graph_disk_size_gb: int,
service_graph_num_nodes: int, client_machine_type: str,
client_disk_size_gb: int) -> None:
sh.run_gcloud(['config', 'set', 'project', project_id], check=True)
zone = zones[0]

# TODO: This is the default tabular output. Filter the input to just the
# names of the existing clusters.
output = sh.run_gcloud(
['container', 'clusters', 'list', '--zone', zone], check=True).stdout
# TODO: Also check if the cluster is normal (e.g. not being deleted).
if name in output:
logging.debug('%s already exists; bypassing creation', name)
else:
logging.debug('%s does not exist yet; creating...', name)
set_up(project_id, name, zones, version, service_graph_machine_type,
service_graph_disk_size_gb, service_graph_num_nodes,
client_machine_type, client_disk_size_gb)


def set_up(project_id: str, name: str, zones: List[str], version: str,
service_graph_machine_type: str, service_graph_disk_size_gb: int,
service_graph_num_nodes: int, client_machine_type: str,
client_disk_size_gb: int, deploy_prometheus=False) -> None:
"""Creates and sets up a GKE cluster.
Args:
project_id: full ID for the cluster's GCP project
name: name of the GKE cluster
zone: GCE zone (e.g. "us-central1-a")
version: GKE version (e.g. "1.9.7-gke.3")
service_graph_machine_type: GCE type of service machines
service_graph_disk_size_gb: disk size of service machines in gigabytes
service_graph_num_nodes: number of machines in the service graph pool
client_machine_type: GCE type of client machine
client_disk_size_gb: disk size of client machine in gigabytes
"""
sh.run_gcloud(['config', 'set', 'project', project_id], check=True)

_create_cluster(name, zones, version, 'n1-standard-4', 16, 1)
_create_cluster_role_binding()

if deploy_prometheus:
_create_persistent_volume()
_initialize_helm()
_helm_add_prometheus_operator()
prometheus.apply(
intermediate_file_path=resources.PROMETHEUS_VALUES_GEN_YAML_PATH)

_create_service_graph_node_pool(service_graph_num_nodes,
service_graph_machine_type,
service_graph_disk_size_gb,
zones[0])
_create_client_node_pool(client_machine_type, client_disk_size_gb, zones[0])


def _create_cluster(name: str, zones: List[str], version: str, machine_type: str,
disk_size_gb: int, num_nodes: int) -> None:
logging.info('creating cluster "%s"', name)
node_locations = ','.join(zones)
zone = zones[0]

sh.run_gcloud(
[
'container', 'clusters', 'create', name, '--zone', zone,
'--node-locations', node_locations, '--cluster-version', version,
'--machine-type', machine_type, '--disk-size',
str(disk_size_gb), '--num-nodes',
str(num_nodes)
],
check=True)
sh.run_gcloud(['config', 'set', 'container/cluster', name], check=True)
sh.run_gcloud(
['container', 'clusters', 'get-credentials', '--zone', zone, name],
check=True)


def _create_service_graph_node_pool(num_nodes: int, machine_type: str,
disk_size_gb: int, zone: str) -> None:
logging.info('creating service graph node-pool')
_create_node_pool(consts.SERVICE_GRAPH_NODE_POOL_NAME, num_nodes,
machine_type, disk_size_gb, zone)


def _create_client_node_pool(machine_type: str, disk_size_gb: int,
zone: str) -> None:
logging.info('creating client node-pool')
_create_node_pool(consts.CLIENT_NODE_POOL_NAME, 1, machine_type,
disk_size_gb, zone)


def _create_node_pool(name: str, num_nodes: int, machine_type: str,
disk_size_gb: int, zone: str) -> None:
sh.run_gcloud(
[
'container', 'node-pools', 'create', name, '--machine-type',
machine_type, '--num-nodes',
str(num_nodes), '--disk-size',
str(disk_size_gb), '--zone',
zone
],
check=True)


def _create_cluster_role_binding() -> None:
logging.info('creating cluster-admin-binding')
proc = sh.run_gcloud(['config', 'get-value', 'account'], check=True)
account = proc.stdout
sh.run_kubectl(
[
'create', 'clusterrolebinding', 'cluster-admin-binding',
'--clusterrole', 'cluster-admin', '--user', account
],
check=True)


def _create_persistent_volume() -> None:
logging.info('creating persistent volume')
sh.run_kubectl(
['apply', '-f', resources.PERSISTENT_VOLUME_YAML_PATH], check=True)


def _initialize_helm() -> None:
logging.info('initializing Helm')
sh.run_kubectl(
['create', '-f', resources.HELM_SERVICE_ACCOUNT_YAML_PATH], check=True)
sh.run_with_k8s_api(
['helm', 'init', '--service-account', 'tiller', '--wait'], check=True)
sh.run_with_k8s_api(
[
'helm', 'repo', 'add', 'coreos',
'https://s3-eu-west-1.amazonaws.com/coreos-charts/stable'
],
check=True)


def _helm_add_prometheus_operator() -> None:
logging.info('installing coreos/prometheus-operator')
sh.run_with_k8s_api(
[
'helm', 'install', 'coreos/prometheus-operator', '--name',
'prometheus-operator', '--namespace', consts.MONITORING_NAMESPACE
],
check=True)
126 changes: 126 additions & 0 deletions isotope/runner/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright Istio Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Read runner configuration from a dict or TOML."""

from typing import Any, Dict, List, Optional

import toml


class RunnerConfig:
"""Represents the intermediary between a config file"""

def __init__(self, topology_paths: List[str], environments: List[str],
istio_archive_url: str, cluster_project_id: str,
cluster_name: str, cluster_zones: List[str],
cluster_version: str, server_machine_type: str,
server_disk_size_gb: int, server_num_nodes: int,
server_image: str, client_machine_type: str,
client_disk_size_gb: int, client_image: str,
client_qps: Optional[int], client_duration: str,
client_num_conc_conns: int) -> None:
self.topology_paths = topology_paths
self.environments = environments
self.istio_archive_url = istio_archive_url
self.cluster_project_id = cluster_project_id
self.cluster_name = cluster_name
self.cluster_zones = cluster_zones
self.cluster_version = cluster_version
self.server_machine_type = server_machine_type
self.server_disk_size_gb = server_disk_size_gb
self.server_num_nodes = server_num_nodes
self.server_image = server_image
self.client_machine_type = client_machine_type
self.client_disk_size_gb = client_disk_size_gb
self.client_image = client_image
self.client_qps = client_qps
self.client_duration = client_duration
self.client_num_conc_conns = client_num_conc_conns

def labels(self) -> Dict[str, str]:
"""Returns the static labels for Prometheus for this configuration."""
return {
'istio_archive_url': self.istio_archive_url,
'cluster_version': self.cluster_version,
'cluster_zones': self.cluster_zones,
'server_machine_type': self.server_machine_type,
'server_disk_size_gb': str(self.server_disk_size_gb),
'server_num_nodes': str(self.server_num_nodes),
'server_image': self.server_image,
'client_machine_type': self.client_machine_type,
'client_disk_size_gb': str(self.client_disk_size_gb),
'client_image': self.client_image,
'client_qps': str(self.client_qps),
'client_duration': self.client_duration,
'client_num_concurrent_connections':
str(self.client_num_conc_conns),
}


def from_dict(d: Dict[str, Any]) -> RunnerConfig:
topology_paths = d.get('topology_paths', [])
environments = d.get('environments', [])

istio = d['istio']
istio_archive_url = istio['archive_url']

cluster = d['cluster']
cluster_project_id = cluster['project_id']
cluster_name = cluster['name']
cluster_zones = cluster['zones']
cluster_version = cluster['version']

server = d['server']
server_machine_type = server['machine_type']
server_disk_size_gb = server['disk_size_gb']
server_num_nodes = server['num_nodes']
server_image = server['image']

client = d['client']
client_machine_type = client['machine_type']
client_disk_size_gb = client['disk_size_gb']
client_image = client['image']
client_qps = client['qps']
if client_qps == 'max':
client_qps = None
else:
# Must coerce into integer, otherwise not a valid QPS.
client_qps = int(client_qps)
client_duration = client['duration']
client_num_conc_conns = client['num_concurrent_connections']

return RunnerConfig(
topology_paths=topology_paths,
environments=environments,
istio_archive_url=istio_archive_url,
cluster_project_id=cluster_project_id,
cluster_name=cluster_name,
cluster_zones=cluster_zones,
cluster_version=cluster_version,
server_machine_type=server_machine_type,
server_disk_size_gb=server_disk_size_gb,
server_image=server_image,
server_num_nodes=server_num_nodes,
client_machine_type=client_machine_type,
client_disk_size_gb=client_disk_size_gb,
client_image=client_image,
client_qps=client_qps,
client_duration=client_duration,
client_num_conc_conns=client_num_conc_conns)


def from_toml_file(path: str) -> RunnerConfig:
d = toml.load(path)
return from_dict(d)
35 changes: 35 additions & 0 deletions isotope/runner/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright Istio Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Common constants used throughout the runner module."""

import datetime

DEFAULT_NAMESPACE = 'default'
MONITORING_NAMESPACE = 'monitoring'
ISTIO_NAMESPACE = 'istio-system'
SERVICE_GRAPH_NAMESPACE = 'service-graph'

DEFAULT_NODE_POOL_NAME = 'default-pool'
SERVICE_GRAPH_NODE_POOL_NAME = 'service-graph-pool'
CLIENT_NODE_POOL_NAME = 'client-pool'
CLIENT_NAME = 'client'
CLIENT_PORT = 8080
SERVICE_GRAPH_SERVICE_SELECTOR = 'role=service'
SERVICE_PORT = 8080
ISTIO_INGRESS_GATEWAY_PORT = 80

PROMETHEUS_SCRAPE_INTERVAL = datetime.timedelta(seconds=30)

ISTIO_TELEMETRY_PORT = 42422

0 comments on commit 9bb5ddd

Please sign in to comment.