Skip to content

Commit

Permalink
Add config modules for controlling IBM PowerVM RMC. (#584)
Browse files Browse the repository at this point in the history
Reliable Scalable Cluster Technology (RSCT) is a set of software
components that together provide a comprehensive clustering
environment(RAS features) for IBM PowerVM based virtual machines. RSCT
includes the Resource Monitoring and Control (RMC) subsystem. RMC is a
generalized framework used for managing, monitoring, and manipulating
resources. RMC runs as a daemon process on individual machines and needs
creation of unique node id and restarts during VM boot.

LP: #1895979

Co-authored-by: Scott Moser <smoser@brickies.net>
  • Loading branch information
Aman306 and smoser committed Oct 28, 2020
1 parent 29f0d13 commit f99d4f9
Show file tree
Hide file tree
Showing 5 changed files with 414 additions and 0 deletions.
159 changes: 159 additions & 0 deletions cloudinit/config/cc_refresh_rmc_and_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# (c) Copyright IBM Corp. 2020 All Rights Reserved
#
# Author: Aman Kumar Sinha <amansi26@in.ibm.com>
#
# This file is part of cloud-init. See LICENSE file for license information.

"""
Refresh IPv6 interface and RMC
------------------------------
**Summary:** Ensure Network Manager is not managing IPv6 interface
This module is IBM PowerVM Hypervisor specific
Reliable Scalable Cluster Technology (RSCT) is a set of software components
that together provide a comprehensive clustering environment(RAS features)
for IBM PowerVM based virtual machines. RSCT includes the Resource
Monitoring and Control (RMC) subsystem. RMC is a generalized framework used
for managing, monitoring, and manipulating resources. RMC runs as a daemon
process on individual machines and needs creation of unique node id and
restarts during VM boot.
More details refer
https://www.ibm.com/support/knowledgecenter/en/SGVKBA_3.2/admin/bl503_ovrv.htm
This module handles
- Refreshing RMC
- Disabling NetworkManager from handling IPv6 interface, as IPv6 interface
is used for communication between RMC daemon and PowerVM hypervisor.
**Internal name:** ``cc_refresh_rmc_and_interface``
**Module frequency:** per always
**Supported distros:** RHEL
"""

from cloudinit import log as logging
from cloudinit.settings import PER_ALWAYS
from cloudinit import util
from cloudinit import subp
from cloudinit import netinfo

import errno

frequency = PER_ALWAYS

LOG = logging.getLogger(__name__)
# Ensure that /opt/rsct/bin has been added to standard PATH of the
# distro. The symlink to rmcctrl is /usr/sbin/rsct/bin/rmcctrl .
RMCCTRL = 'rmcctrl'


def handle(name, _cfg, _cloud, _log, _args):
if not subp.which(RMCCTRL):
LOG.debug("No '%s' in path, disabled", RMCCTRL)
return

LOG.debug(
'Making the IPv6 up explicitly. '
'Ensuring IPv6 interface is not being handled by NetworkManager '
'and it is restarted to re-establish the communication with '
'the hypervisor')

ifaces = find_ipv6_ifaces()

# Setting NM_CONTROLLED=no for IPv6 interface
# making it down and up

if len(ifaces) == 0:
LOG.debug("Did not find any interfaces with ipv6 addresses.")
else:
for iface in ifaces:
refresh_ipv6(iface)
disable_ipv6(sysconfig_path(iface))
restart_network_manager()


def find_ipv6_ifaces():
info = netinfo.netdev_info()
ifaces = []
for iface, data in info.items():
if iface == "lo":
LOG.debug('Skipping localhost interface')
if len(data.get("ipv4", [])) != 0:
# skip this interface, as it has ipv4 addrs
continue
ifaces.append(iface)
return ifaces


def refresh_ipv6(interface):
# IPv6 interface is explicitly brought up, subsequent to which the
# RMC services are restarted to re-establish the communication with
# the hypervisor.
subp.subp(['ip', 'link', 'set', interface, 'down'])
subp.subp(['ip', 'link', 'set', interface, 'up'])


def sysconfig_path(iface):
return '/etc/sysconfig/network-scripts/ifcfg-' + iface


def restart_network_manager():
subp.subp(['systemctl', 'restart', 'NetworkManager'])


def disable_ipv6(iface_file):
# Ensuring that the communication b/w the hypervisor and VM is not
# interrupted due to NetworkManager. For this purpose, as part of
# this function, the NM_CONTROLLED is explicitly set to No for IPV6
# interface and NetworkManager is restarted.
try:
contents = util.load_file(iface_file)
except IOError as e:
if e.errno == errno.ENOENT:
LOG.debug("IPv6 interface file %s does not exist\n",
iface_file)
else:
raise e

if 'IPV6INIT' not in contents:
LOG.debug("Interface file %s did not have IPV6INIT", iface_file)
return

LOG.debug("Editing interface file %s ", iface_file)

# Dropping any NM_CONTROLLED or IPV6 lines from IPv6 interface file.
lines = contents.splitlines()
lines = [line for line in lines if not search(line)]
lines.append("NM_CONTROLLED=no")

with open(iface_file, "w") as fp:
fp.write("\n".join(lines) + "\n")


def search(contents):
# Search for any NM_CONTROLLED or IPV6 lines in IPv6 interface file.
return(
contents.startswith("IPV6ADDR") or
contents.startswith("IPADDR6") or
contents.startswith("IPV6INIT") or
contents.startswith("NM_CONTROLLED"))


def refresh_rmc():
# To make a healthy connection between RMC daemon and hypervisor we
# refresh RMC. With refreshing RMC we are ensuring that making IPv6
# down and up shouldn't impact communication between RMC daemon and
# hypervisor.
# -z : stop Resource Monitoring & Control subsystem and all resource
# managers, but the command does not return control to the user
# until the subsystem and all resource managers are stopped.
# -s : start Resource Monitoring & Control subsystem.
try:
subp.subp([RMCCTRL, '-z'])
subp.subp([RMCCTRL, '-s'])
except Exception:
util.logexc(LOG, 'Failed to refresh the RMC subsystem.')
raise
143 changes: 143 additions & 0 deletions cloudinit/config/cc_reset_rmc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# (c) Copyright IBM Corp. 2020 All Rights Reserved
#
# Author: Aman Kumar Sinha <amansi26@in.ibm.com>
#
# This file is part of cloud-init. See LICENSE file for license information.


"""
Reset RMC
------------
**Summary:** reset rsct node id
Reset RMC module is IBM PowerVM Hypervisor specific
Reliable Scalable Cluster Technology (RSCT) is a set of software components,
that together provide a comprehensive clustering environment (RAS features)
for IBM PowerVM based virtual machines. RSCT includes the Resource monitoring
and control (RMC) subsystem. RMC is a generalized framework used for managing,
monitoring, and manipulating resources. RMC runs as a daemon process on
individual machines and needs creation of unique node id and restarts
during VM boot.
More details refer
https://www.ibm.com/support/knowledgecenter/en/SGVKBA_3.2/admin/bl503_ovrv.htm
This module handles
- creation of the unique RSCT node id to every instance/virtual machine
and ensure once set, it isn't changed subsequently by cloud-init.
In order to do so, it restarts RSCT service.
Prerequisite of using this module is to install RSCT packages.
**Internal name:** ``cc_reset_rmc``
**Module frequency:** per instance
**Supported distros:** rhel, sles and ubuntu
"""
import os

from cloudinit import log as logging
from cloudinit.settings import PER_INSTANCE
from cloudinit import util
from cloudinit import subp

frequency = PER_INSTANCE

# RMCCTRL is expected to be in system PATH (/opt/rsct/bin)
# The symlink for RMCCTRL and RECFGCT are
# /usr/sbin/rsct/bin/rmcctrl and
# /usr/sbin/rsct/install/bin/recfgct respectively.
RSCT_PATH = '/opt/rsct/install/bin'
RMCCTRL = 'rmcctrl'
RECFGCT = 'recfgct'

LOG = logging.getLogger(__name__)

NODE_ID_FILE = '/etc/ct_node_id'


def handle(name, _cfg, cloud, _log, _args):
# Ensuring node id has to be generated only once during first boot
if cloud.datasource.platform_type == 'none':
LOG.debug('Skipping creation of new ct_node_id node')
return

if not os.path.isdir(RSCT_PATH):
LOG.debug("module disabled, RSCT_PATH not present")
return

orig_path = os.environ.get('PATH')
try:
add_path(orig_path)
reset_rmc()
finally:
if orig_path:
os.environ['PATH'] = orig_path
else:
del os.environ['PATH']


def reconfigure_rsct_subsystems():
# Reconfigure the RSCT subsystems, which includes removing all RSCT data
# under the /var/ct directory, generating a new node ID, and making it
# appear as if the RSCT components were just installed
try:
out = subp.subp([RECFGCT])[0]
LOG.debug(out.strip())
return out
except subp.ProcessExecutionError:
util.logexc(LOG, 'Failed to reconfigure the RSCT subsystems.')
raise


def get_node_id():
try:
fp = util.load_file(NODE_ID_FILE)
node_id = fp.split('\n')[0]
return node_id
except Exception:
util.logexc(LOG, 'Failed to get node ID from file %s.' % NODE_ID_FILE)
raise


def add_path(orig_path):
# Adding the RSCT_PATH to env standard path
# So thet cloud init automatically find and
# run RECFGCT to create new node_id.
suff = ":" + orig_path if orig_path else ""
os.environ['PATH'] = RSCT_PATH + suff
return os.environ['PATH']


def rmcctrl():
# Stop the RMC subsystem and all resource managers so that we can make
# some changes to it
try:
return subp.subp([RMCCTRL, '-z'])
except Exception:
util.logexc(LOG, 'Failed to stop the RMC subsystem.')
raise


def reset_rmc():
LOG.debug('Attempting to reset RMC.')

node_id_before = get_node_id()
LOG.debug('Node ID at beginning of module: %s', node_id_before)

# Stop the RMC subsystem and all resource managers so that we can make
# some changes to it
rmcctrl()
reconfigure_rsct_subsystems()

node_id_after = get_node_id()
LOG.debug('Node ID at end of module: %s', node_id_after)

# Check if new node ID is generated or not
# by comparing old and new node ID
if node_id_after == node_id_before:
msg = 'New node ID did not get generated.'
LOG.error(msg)
raise Exception(msg)
2 changes: 2 additions & 0 deletions config/cloud.cfg.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ cloud_final_modules:
- chef
- mcollective
- salt-minion
- reset_rmc
- refresh_rmc_and_interface
- rightscale_userdata
- scripts-vendor
- scripts-per-once
Expand Down
Loading

0 comments on commit f99d4f9

Please sign in to comment.