Skip to content

Commit

Permalink
Detect and handle changes in etcd info (#17)
Browse files Browse the repository at this point in the history
* Detect and handle changes in etcd info

The connection and cert info for etcd can change and needs to trigger
the service config being updated and the service restarted.

* Ensure etcd creds are rewritten

* Ensure NPC config gets updated with cert

* Fix services not restarting on ca/etcd cert change
  • Loading branch information
johnsca authored and George Kraft committed Jul 16, 2019
1 parent 201b009 commit c835ecd
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 15 deletions.
51 changes: 36 additions & 15 deletions reactive/calico.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import os
import json
import gzip
Expand All @@ -12,12 +13,13 @@
remove_state)
from charms.reactive import hook
from charms.reactive import endpoint_from_flag
from charms.reactive import data_changed
from charmhelpers.core import hookenv, unitdata
from charmhelpers.core.hookenv import log, status_set, resource_get
from charmhelpers.core.hookenv import DEBUG, ERROR
from charmhelpers.core.hookenv import unit_private_ip
from charmhelpers.core.templating import render
from charmhelpers.core.host import (arch, service, service_start,
from charmhelpers.core.host import (arch, service, service_restart,
service_running)

# TODO:
Expand Down Expand Up @@ -47,6 +49,8 @@ def upgrade_charm():
remove_state('calico.binaries.installed')
remove_state('calico.cni.configured')
remove_state('calico.image.pulled')
remove_state('calico.service.installed')
remove_state('calico.npc.deployed')
try:
log('Deleting /etc/cni/net.d/10-calico.conf')
os.remove('/etc/cni/net.d/10-calico.conf')
Expand Down Expand Up @@ -119,9 +123,24 @@ def blocked_without_etcd():
def install_etcd_credentials():
etcd = endpoint_from_flag('etcd.available')
etcd.save_client_credentials(ETCD_KEY_PATH, ETCD_CERT_PATH, ETCD_CA_PATH)
# record initial data so that we can detect changes
data_changed('calico.etcd.data', (etcd.get_connection_string(),
etcd.get_client_credentials()))
set_state('calico.etcd-credentials.installed')


@when('etcd.tls.available', 'calico.service.installed')
def check_etcd_updates():
etcd = endpoint_from_flag('etcd.available')
if data_changed('calico.etcd.data', (etcd.get_connection_string(),
etcd.get_client_credentials())):
etcd.save_client_credentials(ETCD_KEY_PATH,
ETCD_CERT_PATH,
ETCD_CA_PATH)
remove_state('calico.service.installed')
remove_state('calico.npc.deployed')


def get_bind_address():
''' Returns a non-fan bind address for the cni endpoint '''
try:
Expand Down Expand Up @@ -169,17 +188,9 @@ def install_calico_service():
'ip': get_bind_address(),
'cnx_node_image': uri
})
set_state('calico.service.installed')


@when('calico.service.installed')
@when_not('calico.service.started')
def start_calico_service():
''' Start the calico systemd service. '''
status_set('maintenance', 'Starting calico-node service.')
service_start('calico-node')
service_restart('calico-node')
service('enable', 'calico-node')
set_state('calico.service.started')
set_state('calico.service.installed')


@when('calico.binaries.installed', 'etcd.available',
Expand Down Expand Up @@ -239,7 +250,7 @@ def configure_master_cni():
set_state('calico.cni.configured')


@when('etcd.available', 'calico.cni.configured', 'calico.service.started',
@when('etcd.available', 'calico.cni.configured', 'calico.service.installed',
'cni.is-worker', 'kube-api-endpoint.available')
@when_not('calico.npc.deployed')
def deploy_network_policy_controller():
Expand All @@ -259,6 +270,7 @@ def deploy_network_policy_controller():
etcd = endpoint_from_flag('etcd.available')
encoded_creds = hookenv.config('registry-credentials')
registry = hookenv.config('registry')
etcd_cert_hash = get_etcd_cert_hash()
apiserver_ips = get_apiserver_ips()
templates = []

Expand All @@ -277,14 +289,16 @@ def deploy_network_policy_controller():
'etcd_ca': read_file_to_base64(ETCD_CA_PATH)
}),
('calico-kube-controllers.yaml', {
'registry': registry
'registry': registry,
'etcd_cert_hash': etcd_cert_hash
}),
('cnx-manager-tls-secret.yaml', {
'key': read_file_to_base64(key_path),
'cert': read_file_to_base64(cert_path)
}),
('cnx-etcd.yaml', {
'registry': registry
'registry': registry,
'etcd_cert_hash': etcd_cert_hash
}),
('cnx-policy.yaml', {})
]
Expand Down Expand Up @@ -332,7 +346,7 @@ def deploy_network_policy_controller():
set_state('calico.npc.deployed')


@when('calico.service.started', 'calico.pool.configured',
@when('calico.service.installed', 'calico.pool.configured',
'calico.cni.configured')
@when_any('cni.is-master', 'calico.npc.deployed')
def ready():
Expand Down Expand Up @@ -473,3 +487,10 @@ def calicoctl(*args):
elif run.stdout:
log(' '.join(run.stderr.decode()), DEBUG)
log(run.stdout.decode(), DEBUG)


def get_etcd_cert_hash():
with open(ETCD_CERT_PATH, 'rb') as f:
cert = f.read()
cert_hash = hashlib.sha256(cert).hexdigest()
return cert_hash
5 changes: 5 additions & 0 deletions templates/calico-kube-controllers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
namespace: kube-system
labels:
k8s-app: calico-kube-controllers
cdk-restart-on-ca-change: "true"
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
Expand All @@ -20,6 +21,10 @@ spec:
namespace: kube-system
labels:
k8s-app: calico-kube-controllers
annotations:
# annotate etcd cert hash, so when the cert changes, k8s will restart
# the pods in this deployment
cdk-etcd-cert-hash: "{{ etcd_cert_hash }}"
spec:
nodeSelector:
beta.kubernetes.io/os: linux
Expand Down
6 changes: 6 additions & 0 deletions templates/cnx-etcd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ metadata:
labels:
apiserver: "true"
k8s-app: cnx-apiserver
cdk-restart-on-ca-change: "true"
spec:
replicas: 1
strategy:
Expand All @@ -277,6 +278,10 @@ spec:
labels:
apiserver: "true"
k8s-app: cnx-apiserver
annotations:
# annotate etcd cert hash, so when the cert changes, k8s will restart
# the pods in this deployment
cdk-etcd-cert-hash: "{{ etcd_cert_hash }}"
spec:
nodeSelector:
beta.kubernetes.io/os: linux
Expand Down Expand Up @@ -423,6 +428,7 @@ metadata:
namespace: kube-system
labels:
k8s-app: cnx-manager
cdk-restart-on-ca-change: "true"
spec:
replicas: 1
strategy:
Expand Down
3 changes: 3 additions & 0 deletions templates/elasticsearch-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ metadata:
namespace: calico-monitoring
labels:
operator: prometheus
cdk-restart-on-ca-change: "true"
spec:
replicas: 1
template:
Expand Down Expand Up @@ -185,6 +186,8 @@ kind: Deployment
metadata:
name: elasticsearch-operator
namespace: calico-monitoring
labels:
cdk-restart-on-ca-change: "true"
spec:
replicas: 1
template:
Expand Down
1 change: 1 addition & 0 deletions templates/monitor-calico.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ metadata:
namespace: calico-monitoring
labels:
k8s-app: tigera-fluentd-node
cdk-restart-on-ca-change: "true"
spec:
selector:
matchLabels:
Expand Down

0 comments on commit c835ecd

Please sign in to comment.