tools/ncf.py

# -*- coding: utf-8 -*-
# This is a Python module containing functions to parse and analyze ncf components

# This module is designed to run on the latest major versions of the most popular
# server OSes (Debian, Red Hat/CentOS, Ubuntu, SLES, ...)
# At the time of writing (November 2013) these are Debian 7, Red Hat/CentOS 6,
# Ubuntu 12.04 LTS, SLES 11, ...
# The version of Python in all of these is >= 2.6, which is therefore what this
# module must support

import re
import subprocess
import json
import os.path
import shutil
import sys
import os
import codecs
import uuid
from pprint import pprint

# Verbose output
VERBOSE = 0
CFENGINE_PATH="/opt/rudder/bin/cf-promises"

dirs = [ "10_ncf_internals", "20_cfe_basics", "30_generic_methods", "40_it_ops_knowledge", "50_techniques", "60_services", "ncf-hooks.d" ]

tags = {}
common_tags            = [ "name", "description", "parameter", "bundle_name", "bundle_args"]
tags["generic_method"] = [ "documentation", "class_prefix", "class_parameter", "class_parameter_id", "deprecated", "agent_version", "agent_requirements", "parameter_constraint", "parameter_type", "action", "rename" ]
tags["technique"]      = [ "version" ]
[ value.extend(common_tags) for (k,value) in tags.items() ]

optionnal_tags = {}
optionnal_tags["generic_method"] = [ "deprecated", "documentation", "parameter_constraint", "parameter_type", "agent_requirements", "action", "rename" ]
optionnal_tags["technique"]      = [ "parameter" ]
multiline_tags                   = [ "description", "documentation", "deprecated" ]

class NcfError(Exception):
  def __init__(self, message, details="", cause=None):
    self.message = message
    self.details = details
    # try to get details from inner cause
    try:
      # Will not add to details if cause is None or message is None
      self.details += " caused by : " + cause.message
      # Will not add to details if details is None
      self.details += "\n" + cause.details
    except:
      # We got an error while extending error details, just ignore it and keep current value
      pass

  def __str__(self):
    return repr(self.message)


def format_errors(error_list):
  formated_errors = []
  for error in error_list:
    sys.stderr.write("ERROR: " + error.message + "\n")
    sys.stderr.write(error.details + "\n")
    formated_errors.append( { "message": error.message, "details": error.details } )
  sys.stderr.flush()
  return formated_errors


def get_root_dir():
  return os.path.realpath(os.path.dirname(__file__) + "/../")


# This method emulates the behavior of subprocess check_output method.
# We aim to be compatible with Python 2.6, thus this method does not exist
# yet in subprocess.
def check_output(command, env = {}):
  command_env = dict(env)
  if VERBOSE == 1:
    sys.stderr.write("VERBOSE: About to run command '" + " ".join(command) + "'\n")
  command_env["PATH"] = os.environ['PATH']
  process = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=None, env=command_env)
  output, error = process.communicate()
  output = output.decode("UTF-8", "ignore")
  error = error.decode("UTF-8", "ignore")
  retcode = process.poll()
  if retcode == 0:
    sys.stderr.write(error)
  else:
    if VERBOSE == 1:
      sys.stderr.write("VERBOSE: Exception triggered, Command returned error code " + str(retcode) + "\n")
    raise NcfError("Error while running '" + " ".join(command) +"' command.", error)

  if VERBOSE == 1:
    sys.stderr.write("VERBOSE: Command output: '" + output + "'" + "\n")
  return output


def get_all_generic_methods_filenames(alt_path=None):
  result = []
  if alt_path is None:
    filelist1 = get_all_generic_methods_filenames_in_dir(get_root_dir() + "/tree/30_generic_methods")
    filelist2 = get_all_generic_methods_filenames_in_dir("/var/rudder/configuration-repository/ncf/30_generic_methods")
    result = filelist1 + filelist2
  else:
    result = get_all_generic_methods_filenames_in_dir(alt_path)

  return result


def get_all_generic_methods_filenames_in_dir(dir):
  return get_all_cf_filenames_under_dir(dir, False)


def get_all_techniques_filenames(migrate_technique = False):
  basePath = "/var/rudder/configuration-repository"
  if migrate_technique:
    path = os.path.join(basePath,"ncf/50_techniques")
  else:
    path = os.path.join(basePath,"techniques")

  return get_all_cf_filenames_under_dir(path, not migrate_technique)

excluded_dirs = [ "applications", "fileConfiguration", "fileDistribution", "jobScheduling", "systemSettings", "system" ]
def get_all_cf_filenames_under_dir(parent_dir, only_technique_cf):
  filenames = []
  filenames_add = filenames.append
  for root, dirs, files in os.walk(parent_dir):
    for dir in dirs:
      if dir not in excluded_dirs:
        filenames = filenames + get_all_cf_filenames_under_dir(os.path.join(parent_dir,dir),only_technique_cf)

    for file in files:
      if only_technique_cf:
        if file == "technique.cf":
          filenames.append(os.path.join(root, file))
      elif  not file.startswith("_") and file.endswith(".cf"):
        filenames.append(os.path.join(root, file))
  return filenames


def parse_technique_metadata(technique_content):
  return parse_bundlefile_metadata(technique_content, "technique")


def parse_generic_method_metadata(technique_content):
  return parse_bundlefile_metadata(technique_content, "generic_method")


def parse_bundlefile_metadata(content, bundle_type):
  res = {}
  warnings = []
  parameters = []
  param_names = set()
  param_constraints = {}
  param_types = {}
  default_constraint = {
    "allow_whitespace_string" : False
  , "allow_empty_string" : False
  , "max_length" : 16384
  }

  multiline = False
  previous_tag = None
  match_line = ""

  for line in content.splitlines():
    # line should already be unicode
    #unicodeLine = unicode(line,"UTF-8") #line.decode('unicode-escape')

    # Parse metadata tag line
    match = re.match("^\s*#\s*@(\w+)\s*(([a-zA-Z0-9_]+)?\s+(.*?)|.*?)\s*$", line, flags=re.UNICODE)
    if match :
      tag = match.group(1)
      # Check if we are a valid tag
      if tag in tags[bundle_type]:
        # tag "parameter" may be multi-valued
        if tag == "parameter":
          if bundle_type == "generic_method":
            param_name = match.group(3)
            parameters.append({'name': param_name, 'description': match.group(4)})
            param_names.add(param_name)
          else:
            parameter = json.loads(match.group(2))
            parameters.append(parameter)
        if tag == "parameter_constraint":
          constraint = json.loads("{" + match.group(4)+ "}")
          # extend default_constraint if it was not already defined)
          param_constraints.setdefault(match.group(3), default_constraint.copy()).update(constraint)
        if tag == "parameter_type":
          param_type = match.group(4)
          param_types[match.group(3)] = param_type
        else:
          res[tag] = match.group(2)
        previous_tag = tag
        continue


    # Parse line without tag, if previous tag was a multiline tag
    if previous_tag is not None and previous_tag in multiline_tags:
      match = re.match("^\s*# ?(.*)$", line, flags=re.UNICODE)
      if match:
        res[previous_tag] += "\n"+match.group(1)
        continue
      else:
        previous_tag = None

    # manage multiline bundle definition
    if multiline:
      match_line += line
    else:
      match_line = line
    if re.match("[^#]*bundle\s+agent\s+(\w+)\s*\([^)]*$", match_line, flags=re.UNICODE|re.MULTILINE|re.DOTALL):
      multiline = True

    # read a complete bundle definition
    match = re.match("[^#]*bundle\s+agent\s+(\w+)\s*(\(([^)]*)\))?\s*\{?\s*$", match_line, flags=re.UNICODE|re.MULTILINE|re.DOTALL)
    if match:
      multiline = False
      res['bundle_name'] = match.group(1)
      res['bundle_args'] = []

      if match.group(3) is not None and len(match.group(3)):
        res['bundle_args'] += [x.strip() for x in match.group(3).split(',')]

      # Any tags should come before the "bundle agent" declaration
      break

  # The tag "class_parameter_id" is a magic tag, it's value is built from class_parameter and the list of args
  if "class_parameter_id" in tags[bundle_type]:
    try:
      res['class_parameter_id'] = res['bundle_args'].index(res['class_parameter'])+1
    except:
      res['class_parameter_id'] = 0
      name = res['bundle_name'] if 'bundle_name' in res else "unknown"
      raise NcfError("The class_parameter name \"" + res['class_parameter'] + "\" does not seem to match any of the bundle's parameters in " + name)

  # Check that we don't have a constraint that is defined on a non existing parameter:
  wrong_constraint_names = set(param_constraints.keys()) - param_names
  if len(wrong_constraint_names) > 0:
    warning_message = "In technique '' defining constraint on non existing parameters: "+ ", ".join(wrong_constraint_names)
    print(warning_message)
    warnings.append(warning_message)

  # Check that we don't have a type that is defined on a non existing parameter:
  wrong_type_names = set(param_types.keys()) - param_names
  if len(wrong_type_names) > 0:
    warning_message = "In technique '' defining type on non existing parameters: "+ ", ".join(wrong_type_names)
    print(warning_message)
    warnings.append(warning_message)

  # If we found any parameters, store them in the res object
  if len(parameters) > 0:
    for param in parameters:
      parameter_name = param["name"]
      constraints = param_constraints.get(param["name"], default_constraint)
      param_type = param_types.get(param["name"], "string")
      param["constraints"] = constraints
      param["type"] = param_type

  res['parameter'] = parameters

  if bundle_type == "generic_method" and not "agent_version" in res:
    res["agent_version"] = ">= 3.6"

  # Remove trailing line breaks
  for tag in multiline_tags:
    if tag in res:
      res[tag] = res[tag].strip('\n\r')

  all_tags = tags[bundle_type]
  expected_tags = [ tag for tag in all_tags if not tag in optionnal_tags[bundle_type]]
  if not set(res.keys()).issuperset(set(expected_tags)):
    missing_keys = [mkey for mkey in expected_tags if mkey not in set(res.keys())]
    name = res['bundle_name'] if 'bundle_name' in res else "unknown"
    raise NcfError("One or more metadata tags not found before the bundle agent declaration (" + ", ".join(missing_keys) + ") in " + name)

  result = { "result" : res, "warnings" : warnings }
  return result


def class_context_and(a, b):
  """Concatenate two CFEngine class contexts, and simplify useless cases"""

  # Filter 'any' class
  contexts = [ context for context in [a,b] if context != "any" ]

  final_contexts = []
  # Add parenthesis if necessary
  if len(contexts) > 1:
    for context in contexts:
      if '.' in context or '&' in context or '|' in context:
        final_contexts.append('(' + context + ')')
      else:
        final_contexts.append(context)
  else:
    final_contexts = contexts

  # If nothing is left, just use the placeholder "any"
  if len(final_contexts) == 0:
    final_contexts.append('any')

  return '.'.join(final_contexts)

def sanitize_cfpromises_string (value):
  """All quotes in json provided by cf-promises are backslashed, so we need to remove all backslash before a quote from all values"""
  return value.replace('\\"', '"').replace("\\'", "'")

def parse_function_call_class_context(function_call):
  """Extract a function call from class context"""
  function_name = function_call['name']
  function_args = [ sanitize_cfpromises_string(function_arg['value']) for function_arg in function_call['arguments']]
  # This is valid for string parameters only should improve for inner function
  return function_name + '(' + ','.join(function_args) + ')'


def parse_technique_methods(technique_file, gen_methods):
  res = []

  # Check file exists
  if not os.path.exists(technique_file):
    raise NcfError("No such file: " + technique_file)

  env = os.environ.copy()
  env['RES_OPTIONS'] = 'attempts:0'
  out = check_output([CFENGINE_PATH, "-pjson", "-f", technique_file], env=env)
  try:
    promises = json.loads(out)
  except Exception as e:
      raise NcfError("An error occured while parsing technique '"+technique_file+"'", cause = e)

  # Sanity check: if more than one bundle, this is a weird file and I'm quitting
  bundle_count = 0
  for bundle in promises['bundles']:
    if bundle['bundleType'] == "agent":
      bundle_count += 1

  if bundle_count > 1:
    raise NcfError("There is not exactly one bundle in " + technique_file + ", aborting")

  # Sanity check: the bundle must be of type agent
  if promises['bundles'][0]['bundleType'] != 'agent':
    raise NcfError("This bundle is not a bundle agent in " + technique_file + ", aborting")

  methods_promises = [promiseType for promiseType in promises['bundles'][0]['promiseTypes'] if promiseType['name']=="methods"]
  methods = []
  if len(methods_promises) >= 1:
    methods = methods_promises[0]['contexts']

  for context in methods:
    class_context = context['name']

    for method in context['promises']:
      method_name = None
      args = None
      promise_class_context = class_context
      ifvarclass_context = None
      # Promiser is used as report component, but in 5.1 we added an unique identifier to identify each generic method to make it more unique
      # (because if promiser are unique and params too, method is not run, cfengine way of life)
      # Format of the identifier is _directiveId_methodIndex, first is given by the variable from report_data, other one is just and int
      promiser = re.sub("_\${report_data\.directive_id}_\d+$", "", method['promiser'])

      for attribute in method['attributes']:
        if attribute['lval'] == 'usebundle':
          if attribute['rval']['type'] == 'functionCall':
            method_name = attribute['rval']['name']
            args = [ sanitize_cfpromises_string(arg['value']) for arg in attribute['rval']['arguments']]
          if attribute['rval']['type'] == 'string':
            method_name = attribute['rval']['value']
        # Extract class context from 'ifvarclass'
        elif attribute['lval'] == 'ifvarclass' or attribute['lval'] == 'if':
          # Simple string get its value
          if attribute['rval']['type'] == 'string':
            ifvarclass_context = attribute['rval']['value']
          # We have a function call here, and need to treat concat case
          if attribute['rval']['type'] == 'functionCall':
            ifvarclass_function = attribute['rval']['name']
            # Function is concat! We use that to handle variable in classes:
            # variables in classes are expanded at runtime, making invalid character in classes
            # We have to canonify variables only, and not the whole if var class
            # as it would replace all the 'invalid' character from the class ( and '.' , not '!', ...)
            # so a class like:
            # Monday.${bundle2.var}.debian.${bundle.var}.linux
            # will be written
            # concat("Monday.",canonify(${bundle2.var}),".debian.",canonify(${bundle.var}),".linux")
            # But the class we really want to extract is:
            # Monday.${bundle2.var}.debian.${bundle.var}.linux
            if ifvarclass_function == 'concat':
              ifvarclass_args = []
              for arg in attribute['rval']['arguments']:
                # simple string get only the value
                if arg['type'] == 'string':
                  ifvarclass_args.append(arg['value'])
                # This a canonify call, extract only the value of the canonify
                elif arg['type'] == 'functionCall' and arg['name'] == 'canonify':
                  ifvarclass_args.append(arg['arguments'][0]['value'])
                # Extract the function call correctly
                else:
                  function_call = parse_function_call_class_context(arg)
                  ifvarclass_args.append(function_call)
              ifvarclass_context = ''.join(ifvarclass_args)
            # Another function call, extract it directly
            else:
              ifvarclass_context = parse_function_call_class_context(attribute['rval'])

      if ifvarclass_context is not None:
        promise_class_context = class_context_and(class_context, ifvarclass_context)

      if not (method_name.startswith("_") or method_name.startswith("log")):
        if promiser == "method_call":
          promiser = gen_methods[method_name]["name"]
        if args:
          res.append({'class_context': promise_class_context, 'component': promiser, 'method_name': method_name, 'args': args})
        else:
          res.append({'class_context': promise_class_context, 'component': promiser, 'method_name': method_name})

  return res

# FUNCTIONS called directly by the API code
###########################################

def get_all_techniques_metadata(include_methods_calls = True, migrate_technique = False):
  methods_data = get_all_generic_methods_metadata()
  methods = methods_data["data"]["generic_methods"]
  all_metadata = {}

  filenames = get_all_techniques_filenames(migrate_technique)
  method_errors = methods_data["errors"]
  warnings = methods_data["warnings"]
  errors = []
  for file in filenames:
    with codecs.open(file, encoding="utf-8") as fd:
      content = fd.read()
    try:
      # path of file is Category/technique_name/technique_version/technique.cf  
      # to get back the category of our technique we need to go up 3 directories
      category = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(file))))
      # if we are migrating a technique (5.0 -> 6.0) from configuration-repository/ncf to techniques/ncf_techniques, we need to set the category manually
      if migrate_technique:
        category = "ncf_techniques"
      result = parse_technique_metadata(content)
      metadata = result["result"]
      metadata["category"] = category
      warnings.extend(result["warnings"])

      if include_methods_calls:
        method_calls = parse_technique_methods(file, methods)
        metadata['method_calls'] = method_calls

      all_metadata[metadata['bundle_name']] = metadata

    except NcfError as e:
      file_abs_path = os.path.abspath(file)
      error = NcfError("Could not parse Technique file '"+ file_abs_path + "'", cause=e)
      errors.append(error)
      continue # skip this file, it doesn't have the right tags in - yuk!

  return { "data": { "techniques" : all_metadata, "generic_methods" : methods }, "errors": method_errors + format_errors(errors), "warnings": warnings }


def get_agents_support(method, content):
  agents = []
  if os.path.exists("/var/rudder/configuration-repository/dsc/ncf/30_generic_methods/" + method + ".ps1"):
    agents.append("dsc")
  if not re.search(r'\n\s*bundle\s+agent\s+'+method+r'\b.*?\{\s*\}', content, re.DOTALL): # this matches an empty bundle content
    agents.append("cfengine-community")
  return agents

def get_all_generic_methods_metadata(alt_path=None):
  all_metadata = {}

  filenames = get_all_generic_methods_filenames(alt_path)
  errors = []
  warnings = []

  for file in filenames:
    with codecs.open(file, encoding="utf-8") as fd:
      content = fd.read()
    try:
      result = parse_generic_method_metadata(content)
      metadata = result["result"]
      warnings.extend(result["warnings"])
      metadata["agent_support"] = get_agents_support(metadata["bundle_name"], content)
      all_metadata[metadata['bundle_name']] = metadata
    except NcfError as e:
      error = NcfError("Could not parse generic method in '" + file + "'", cause=e )
      errors.append(error)
      continue # skip this file, it doesn't have the right tags in - yuk!

  return { "data": { "generic_methods" : all_metadata }, "errors": format_errors(errors), "warnings": warnings }