In [None]:
%sh 
hostip=$(hostname -I | xargs)
echo "IP: ${hostip}"
echo """
DD_SITE=${DD_SITE}
DD_ENV=${DD_ENV}
DD_API_KEY=${DD_API_KEY}
DD_TAGS=${DD_TAGS}
CUSTOM_DD_TAGS=${CUSTOM_DD_TAGS}
host_ip: ${hostip}
databricks_cluster_id: ${DB_CLUSTER_ID}
databricks_cluster_name: ${DB_CLUSTER_NAME}
user: """ $(whoami)

In [None]:
%python
# This cell creates an `datadog-install-driver-workers.sh` init script to dbfs. It requires that the
# user running this cell has the Databricks admin role.

# The name of the dbfs directory where the Datadog init script will be stored
initdir = "datadog"

dbutils.fs.put("dbfs:/"+initdir+"/datadog-install-driver-workers.sh","""
#!/bin/bash

#######################################################
##########             FORWARD               ##########
#######################################################
# This init script will install a Datadog agent on all nodes, driver and worker, in a Databricks cluster.
# It can be also used as a global init script, but requires environment variables to be hard coded as there
# is no way to specify global environment variables globally. Variables which need to be uncommendeted and
# hard coded for this use case are in the "Customizable Environment Vars" section below.
#
# Driver-node-specific configuration
#   * Configures a sensible default set of tags: databricks_cluster_id, databricks_cluster_name, spark_node
#   * Configures the Datadog agent using the cluster environment variables
#   * Enables Log collection and Live Processes
#     * Disable Log Collection by setting the `Enable Log Collection` section to `false`
#     * Disable Live Processes by setting the `Enable Live Processes` section to `'false'`
#   * Configures the Datadog Agent's Spark integration to
#     * Collect metrics from the Spark API
#     * Collect Spark logs
#     * Collect Databricks logs
#
# Worker-node-specific configuration
#   * Configures a sensible default set of tags: databricks_cluster_id, databricks_cluster_name, spark_node
#   * Configures the Datadog agent using the cluster environment variables
#   * Enables Live Processes
#     * Disable Live Processes by setting the `Enable Live Processes` section to `'false'`
#
# Environment variables
#   * DD_SITE (required)
#     * Can be one of the following values. Consult the URL you use to access Datadog to choose the appropriate value.
#       * datadoghq.com
#       * datadoghq.eu
#       * us3.datadoghq.com
#       * us5.datadoghq.com
#   * DD_API_KEY (required)
#     * Obtain from ___.datadoghq.___/organization-settings/api-keys (replace domain with the domain you use to access Datadog)
#   * DD_ENV (required)
#     * This is an important tag that is integral to Datadog's Unified Service Tagging standard.
#       * https://docs.datadoghq.com/getting_started/tagging/unified_service_tagging?tab=kubernetes
#     * Examples: prod, dev, qa, local, test
#   * CUSTOM_DD_TAGS (optional)
#     * These can be any tags important to this environment and can represent any concepts that you wish to capture for the Databricks cluster.
#     * You must format the values of these tags to the requirements here - https://docs.datadoghq.com/getting_started/tagging/#defining-tags
#     * The script requires you to format the tags as comma separated key:value pairs. The full value must be surrounded in quotes due to bash
#       processing syntax
#     * Example: "team:data-science,department:compliance"
#
# Notes and considerations
#   * The script requires that your clusters have access to the internet to download https://s3.amazonaws.com/dd-agent/scripts/install_script.sh.
#     This install_script.sh will install the Datadog Agent using the best method for the target OS. For Debian, this adds Datadog GPG keys, installs
#     the Datadog APT repo, apt installs the dependencies of the Datadog Agent, and installs the Datadog Agent from the Datadog repo.
#       * In instances of a private cluster, you'll need to host the Datadog Agent package and its dependencies in an internal repo, make the installers
#         accessible as deb packages, or provide another way to install these packages without downloading them from the internet. Lines containing the
#         install_script.sh URL above will need to be replaced with the install method of your choice.

########################################################
########## Create Datadog Installation Script ##########
########################################################

cat <<EOF >> /tmp/start_datadog.sh

#!/bin/bash

#######################################################
##########   Customizable Environment Vars   ##########
#######################################################

## For Global Init Script deployments, uncomment the variables below and hard code their values
## For cluster based configs, add the same variables to the cluster Advanced Options > Environment Variables
# DD_SITE=<dd-site>       # Required
# DD_API_KEY=<dd-api-key> # Required
# DD_ENV=<env>            # Required
# CUSTOM_DD_TAGS="<comma separated k:v pairs>" # Optional

#######################################################
##########   Initialize required variables   ##########
#######################################################

hostip=$(hostname -I | xargs)
echo "host_ip: ${hostip}, databricks_cluster_id: ${DB_CLUSTER_ID}, databricks_cluster_name: ${DB_CLUSTER_NAME}, user: "$(whoami)

#######################################################
##########    Installation on Driver Node    ##########
#######################################################

if [[ \${DB_IS_DRIVER} = "TRUE" ]]; then

  echo "Installing Datadog agent in the driver (master node) ..."

  ##########################################
  #        Datadog Agent Installation      #
  ##########################################

  # Configure Datadog Agent host tags for the Driver. These tags inlude the custom tags set up in our environment variables
  DD_TAGS="databricks_cluster_id:\${DB_CLUSTER_ID}","databricks_cluster_name:\${DB_CLUSTER_NAME}","spark_node:driver","${CUSTOM_DD_TAGS}"

  # Datadog Agent v7 (latest)
  DD_AGENT_MAJOR_VERSION=7 DD_API_KEY=\${DD_API_KEY} DD_SITE=\${DD_SITE} DD_HOST_TAGS=\${DD_TAGS} bash -c "\$(curl -L https://s3.amazonaws.com/dd-agent/scripts/install_script.sh)"

  # Wait for the Datadog Agent to be installed
  while [ -z \$datadoginstalled ]; do
    if [ -e "/etc/datadog-agent/datadog.yaml" ]; then
      datadoginstalled=TRUE
    fi
    sleep 2
  done
  echo "Datadog Agent is installed"

  ##########################################
  #   Create custom Datadog Agent config   #
  ##########################################

  echo "api_key: \${DD_API_KEY}
site: \${DD_SITE}
tags: [\${DD_TAGS}]
env: \${DD_ENV}
# Enable Live Processes
process_config:
    enabled: 'true'
# Enable Log Collection
logs_enabled: true" > /etc/datadog-agent/datadog.yaml

  ##########################################
  # Create custom Spark integration config #
  ##########################################

  # Discover the port that Spark is using on the Driver node
  while [ -z \$gotparams ]; do
    if [ -e "/tmp/driver-env.sh" ]; then
      DB_DRIVER_PORT=\$(grep -i "CONF_UI_PORT" /tmp/driver-env.sh | cut -d'=' -f2)
      gotparams=TRUE
    fi
    sleep 2
  done

  # Creating config file for Spark integration with structured stream metrics ENABLED.
  # You can modify this section with more Spark integration options. For all available options se
  # https://github.com/DataDog/integrations-core/blob/master/spark/datadog_checks/spark/data/conf.yaml.example

  echo "init_config:
instances:
    - spark_url: http://\${DB_DRIVER_IP}:\${DB_DRIVER_PORT}
      spark_cluster_mode: spark_driver_mode
      cluster_name: \${hostip}
      streaming_metrics: true
logs:
    - type: file
      path: /databricks/driver/logs/*
      exclude_paths:
        - /databricks/driver/logs/*.gz
        - /databricks/driver/logs/metrics.json
      source: databricks
      service: databricks
      log_processing_rules:
        - type: multi_line
          name: new_log_start_with_date
          pattern: \d{2,4}[\-\/]\d{2,4}[\-\/]\d{2,4}.*" > /etc/datadog-agent/conf.d/spark.yaml

#######################################################
##########    Installation on Worker Node    ##########
#######################################################

else

  ##########################################
  #        Datadog Agent Installation      #
  ##########################################

  # Configure Datadog Agent host tags for the Worker. These tags inlude the custom tags set up in our environment variables
  DD_TAGS="databricks_cluster_id:\${DB_CLUSTER_ID}","databricks_cluster_name:\${DB_CLUSTER_NAME}","spark_node:worker","${CUSTOM_DD_TAGS}"

  # Datadog Agent v7 (latest)
  DD_AGENT_MAJOR_VERSION=7 DD_API_KEY=\$DD_API_KEY DD_SITE=\${DD_SITE} DD_HOST_TAGS=\$DD_TAGS bash -c "\$(curl -L https://s3.amazonaws.com/dd-agent/scripts/install_script.sh)"

  # Wait for the Datadog Agent to be installed
  while [ -z \$datadoginstalled ]; do
    if [ -e "/etc/datadog-agent/datadog.yaml" ]; then
      datadoginstalled=TRUE
    fi
    sleep 2
  done
  echo "Datadog Agent is installed"

  ##########################################
  #   Create custom Datadog Agent config   #
  ##########################################

  echo "api_key: \${DD_API_KEY}
site: \${DD_SITE}
tags: [\${DD_TAGS}]
env: \${DD_ENV}
# Enable Live Processes
process_config:
    enabled: 'true'" > /etc/datadog-agent/datadog.yaml
fi

#######################################################
##########            Post Install           ##########
#######################################################

# Restart the Datadog Agent to pick up configuration changes
sudo service datadog-agent restart

EOF

#########################################################
########## Run the Datadog Installation Script ##########
#########################################################

# Enable script
chmod a+x /tmp/start_datadog.sh
/tmp/start_datadog.sh >> /tmp/datadog_start.log 2>&1 & disown
""", True)