Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
7 contributors

Users who have contributed to this file

@bhavanki @meowfaceman @jasonbw @handavid @ying1i @mdzz @jadair-cloudera
1572 lines (1321 sloc) 48.5 KB
#
# Copyright (c) 2017-2019 Cloudera, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Sample Cloudera Altus Director configuration file based on the Cloudera Azure reference architecture:
# http://www.cloudera.com/documentation/other/reference-architecture/PDF/cloudera_ref_arch_azure.pdf
#
# Highly Available cluster with 3 master nodes and 5 worker nodes
#
#
# Cluster name
#
# If environmentName and deploymentName are not defined they will get the value of 'name'.
# Must be unique.
#
name: C6-Reference-Azure
#
# Environment name
#
environmentName: Azure-HA
#
# Deployment name
# Used to name the Cloudera Manager instance in Cloudera Altus Director.
# Must be unique.
#
deploymentName: Cloudera-Manager-on-Azure
#
# Cloud provider configuration (credentials, region or zone, and more)
#
provider {
type: azure
#
# ID of Azure region to use. NOTE: region must support Premium Storage
# See: https://azure.microsoft.com/en-us/global-infrastructure/regions/
#
region: "region_REPLACE_ME"
#
# Azure Cloud Environment to use. Valid values are:
# - azure
# - azure-us-government
# - azure-germany
#
azureCloudEnvironment: "azureCloudEnvironment_REPLACE_ME"
#
# Azure Active Directory Subscription ID.
#
subscriptionId: "subscriptionId_REPLACE_ME"
#
# Tenant ID (from AAD)
#
tenantId: "tenantId_REPLACE_ME"
#
# Azure Active Directory Application Client ID.
#
clientId: "clientId_REPLACE_ME"
#
# Client Secret
#
clientSecret: "clientSecret_REPLACE_ME"
}
#
# SSH credentials to use to connect to the machines
#
ssh {
username: "username_REPLACE_ME"
privateKey: privateKey_REPLACE_ME # with an absolute path to .pem file, ${HOME} may be used
}
#
# Instance templates
#
# A list of instance templates to use for groups of nodes or management services. Some configuration
# sections serve as bases for other, fully-realized templates via HOCON substitution. For more about
# HOCON substitution: https://github.com/lightbend/config/blob/master/HOCON.md#substitutions
#
# The format of this section:
# - base: core config fields that are common to all templates
# - master-base: core config fields that are common to all master templates
# - master-1: config fields for fully defining the master-1 template
# - master-2: config fields for fully defining the master-2 template
# - worker: config fields for fully defining the worker template
# - edge: config fields for fully defining the edge template (used for CM as well)
#
instances {
# fields that are common to all nodes
base {
#
# The VM type. See the Azure RA for more detail.
#
type: STANDARD_D32S_V3
#
# The image ID used for instances is an alias defined in the plugin configuration file.
#
image: cloudera-centos-72-latest
#
# Whether to use an Azure Virtual Machine Scale Set (VMSS) for a group of instances.
# Changing the size of the VMSS must be done through Altus Director, not through the Azure Portal.
# By default, Altus Director will not use a VMSS.
# Allowed values: true, false
# See: https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/overview
#
automatic: false
#
# The Resource Group for the Network Security Group for VMs. The Resource Group you specify
# must exist within the region you selected.
# See: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-overview#resource-groups
#
networkSecurityGroupResourceGroup: "networkSecurityGroupResourceGroup_REPLACE_ME"
#
# The Network Security Group for VMs. It must be within the resource group named by
# networkSecurityGroupResourceGroup. NSG configuration allows you to limit access to the VM
# with firewall-like rules.
# See: https://docs.microsoft.com/en-us/azure/virtual-network/security-overview
#
networkSecurityGroup: "networkSecurityGroup_REPLACE_ME"
#
# The Resource Group for the Virtual Network for VMs. The Resource Group you specify must
# exist within the region you selected and should be the same for all instances that will
# be used in the same cluster.
# See: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-overview#resource-groups
#
virtualNetworkResourceGroup: "virtualNetworkResourceGroup_REPLACE_ME"
#
# The Virtual Network for VMs. It must be within the resource group named by
# virtualNetworkResourceGroup and should be the same for all instances that will be used in
# the same cluster.
# See: https://docs.microsoft.com/en-us/azure/virtual-network/
#
virtualNetwork: "virtualNetwork_REPLACE_ME"
#
# The name of the Subnet that will be used. It must be within the virtual network.
#
subnetName: "subnetName_REPLACE_ME"
#
# OPTIONAL: The DNS domain configured in your custom DNS server. Example values include:
# cdh-cluster.internal, cluster.your-company-name.com. The host FQDN is configured on each
# VM with the following format:
# {instanceNamePrefix}-{truncated-UUID}.hostFqdnSuffix
#
hostFqdnSuffix: "hostFqdnSuffix_REPLACE_ME"
#
# OPTIONAL: Tags to apply to all instances
#
tags {
owner: ${?USER}
}
#
# Bootstrap Scripts
#
# Provide scripts, that run as root, which can customize instances immediately
# after boot and before any other Cloudera Altus Director action. Scripts run
# in the order listed. Interpeters other than sh/bash may be used.
#
# Use exit code 0 to indicate success.
# Use exit code 91 to indicate an unretryable failure.
# Altus Director will automatically retry script execution for all other exit codes.
#
# Script content can be provided in a simple string, or as part of of an object
# with an ID (for easier tracking) and an optional map of environment
# variables to set before running the script.
#
# bootstrapScripts: ["""#!/bin/sh
#
#echo 'Hello World!'
#exit 0
#
#""",
# {
# id: bootstrapScript2,
# env {
# KEY1: VALUE1
# KEY2: VALUE2
# },
# content: "echo The values are $KEY1 and $KEY2"
# }
# ]
#
# This script, included from azure-os-generic-bootstrap.conf, is needed to configure DNS.
#
bootstrapScripts: [ ${?bootstrap-script.os-generic} ]
#
# More complex scripts can be supplied via local filesystem paths. They will
# run after any scripts supplied in the previous bootstrapScripts section.
#
# bootstrapScriptsPaths: ["/tmp/test-script.sh",
# {
# id: bootstrapScriptFromPath2,
# env: {
# KEY3: VALUE3
# },
# path: "/tmp/test-script.py"
# }
# ]
#
# Pre-terminate Scripts
#
# Provide scripts, that run as root, which can run cleanup steps on instances
# immediately before termination. If an instance has already been terminated,
# the scripts will be skipped for it. Scripts run in the order listed.
# Interpeters other than sh/bash may be used.
#
# Use exit code 0 to indicate success.
# Use exit code 91 to indicate an unretryable failure.
# Altus Director will automatically retry script execution for all other exit codes.
#
# Script content can be provided in a simple string, or as part of of an object
# with an ID (for easier tracking) and an optional map of environment
# variables to set before running the script.
#
preTerminateScripts: ["""#!/bin/sh
echo 'Goodbye World!'
exit 0
""",
{
id: preTerminateScript2,
env {
KEY1: VALUE1
KEY2: VALUE2
},
content: "echo The values are $KEY1 and $KEY2"
}
]
#
# More complex scripts can be supplied via local filesystem paths. They will
# run after any scripts supplied in the previous preTerminateScripts section.
#
# preTerminateScriptsPaths: ["/tmp/test-pre-terminate-script.sh",
# {
# id: preTerminateScriptFromPath2,
# env: {
# KEY3: VALUE3
# },
# path: "/tmp/test-pre-terminate-script.py"
# }
# ]
#
# Flag indicating whether to normalize the instance. Not setting normalization here implies that
# your bootstrap script will take care of normalization. This is an advanced configuration that
# requires assistance from Cloudera support.
#
# Defaults to true
#
# normalizeInstance: true
#
# Configuration allowing for granular control over the normalization steps of an instance.
# By default, all of these steps are on. This is an advanced configuration. None of these steps
# will run if normalizeInstance is set to false.
#
# Normalization includes:
# prewarming the parcel directory
# downloading, installing, and adjusting packages
# minimizing swappiness
# increasing the maximun number of open files
# resizing the root partition
# mounting ephemeral disks
#
# normalizationConfig {
# prewarmDirectory: true
# installPackages: true
# miscellaneousServiceAdjustment: true
# minimizeSwappiness: true
# increaseMaxNumberOfOpenFiles: true
# resizeRootPartition: true
# mountAllUnmountedDisks: true
# }
#
# By default Director does not do host key verification when performing SSH on an instance.
# This can be changed for added security by setting the SSH Host Key Retrieval Type.
#
# NONE : Host key fingerprints will not be stored or verified during SSH (default)
# PROVIDER : Retrieve the host key fingerprints from the cloud provider. This does not
# work for all images and will likely increase bootstrap time.
# INSTANCE : Retrieve the host key fingerprints upon first connection into the instance. This
# is less secure than the PROVIDER method since the first connection isn't guaranteed
# to be secure.
# FALLBACK : Attempt to retrieve host key fingeprint form the cloud provider first, if that
# fails, attempt to retrieve upon first connection into the instance.
#
# sshHostKeyRetrievalType: NONE
}
# fields for master nodes
master: ${instances.base} {
#
# Resource Group for VMs. The Resource Group you specify must exist
# within the region you selected.
# See: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-overview#resource-groups
#
computeResourceGroup: "master_computeResourceGroup_REPLACE_ME"
#
# OPTIONAL: Availability Set for VMs. VMs within the same availability set have staggered
# maintenance times. With a default availability set configuration, no more than 1/5 of the
# VMs will be offline at a time (rounded up). Sharing an Availability Set between master and
# worker nodes is strongly discouraged.
# See: https://docs.microsoft.com/en-us/azure/virtual-machines/linux/manage-availability
#
availabilitySet: "master_availabilitySet_REPLACE_ME"
#
# Prefix for VM names and hostnames. Each VM name has the following format:
# {instanceNamePrefix}-{UUID}
# where {UUID} is generated by the Cloudera Altus Director server.
#
instanceNamePrefix: "master_instanceNamePrefix_REPLACE_ME"
#
# OPTIONAL: The storage account type to use. The dataDiskSize parameter should be updated
# based on this property. The allowed values are:
# - Premium_LRS (default)
# - Standard_LRS
# See the RA for the supported ways to use standard storage.
#
storageAccountType: "Premium_LRS"
#
# OPTIONAL: The size of each data drive. For disks allocated in a premium storage account, only the
# following GB values are allowed:
# 512 (P20 disk)
# 1023 (P30 disk) - for backwards compatibility
# 1024 (P30 disk) - default
# 2048 (P40 disk)
# 4095 (P50 disk)
# For disks allocated in a standard storage account, any size between 1 and 4095 inclusive
# can be used.
# See: https://docs.microsoft.com/en-us/azure/storage/common/storage-introduction
# See: https://docs.microsoft.com/en-us/azure/virtual-machines/windows/disks-types#premium-ssd
#
dataDiskSize: 512
#
# OPTIONAL: Whether or not to use managed disks. Managed Disks are enabled by default. To
# use Storage Accounts, set this field to "No". In order for managed disks to be used, the
# Availability Set must be aligned to use managed disks.
# Allowed values: Yes, No
# See: https://docs.microsoft.com/en-us/azure/virtual-machines/windows/managed-disks-overview
#
managedDisks: Yes
#
# OPTIONAL: Whether or not to use a custom (managed) image. Custom images are only supported
# when using Managed Disks (managedDisks must be Yes).
# Allowed values: Yes, No
#
useCustomManagedImage: No
#
# The custom VM image purchase plan. The purchase plan for the original VM image used to
# create the custom image. This can be:
# - an empty string ("")
# - a string with the format /publisher/<publisher>/product/<product>/name/<name>
#
# customImagePlan: ""
#
# Should each VM have a Public IP Address and DNS Label? If Yes, each VM will have a
# publicly resolvable hostname with the following format:
# {instanceNamePrefix}-{UUID}.{region}.cloudapp.azure.com
# Allowed values: Yes, No
#
publicIP: No
}
# fields for master-1 nodes
master-1: ${instances.master} {
#
# OPTIONAL: The number of data drives. The size is specified with dataDiskSize. The default
# value is 5.
# Data drives are mounted on /data0 .. /data[n]
# /data0 - Dedicated Log Device
# For Masters-1:
# /data1 - HDFS JournalNode Data
# /data2 - Zookeeper Data / DataLog
# /data3 - NameNode Data
# For Masters-2:
# /data1 - HDFS JournalNode Data
# /data2 - Zookeeper Data / DataLog
# For Workers:
# /data1 .. /data[n] will be used for HDFS data
#
dataDiskCount: 4
}
# fields for master-2 nodes
master-2: ${instances.master} {
dataDiskCount: 3
}
# fields for worker nodes
worker: ${instances.base} {
computeResourceGroup: "worker_computeResourceGroup_REPLACE_ME"
availabilitySet: "worker_availabilitySet_REPLACE_ME"
instanceNamePrefix: "worker_instanceNamePrefix_REPLACE_ME"
storageAccountType: "Standard_LRS"
dataDiskCount: 11
dataDiskSize: 1024
managedDisks: Yes
publicIP: No
}
# fields for edge nodes
edge: ${instances.base} {
computeResourceGroup: "edge_computeResourceGroup_REPLACE_ME"
availabilitySet: "edge_availabilitySet_REPLACE_ME"
instanceNamePrefix: "edge_instanceNamePrefix_REPLACE_ME"
storageAccountType: "Standard_LRS"
dataDiskCount: 1
dataDiskSize: 512
managedDisks: Yes
# Change this to Yes to allow accessing edge/CM nodes via public IP
publicIP: No
}
}
include "azure-os-generic-bootstrap.conf"
#
# Optional external database server configuration.
#
# Cloudera Altus Director can create databases on existing database servers,
# including MySQL servers running in Azure MySQL.
#
# To properly setup a database see:
# https://www.cloudera.com/documentation/director/latest/topics/director_get_started_azure_set_up_msql_postgres.html
#
databaseServers {
#
# Use an existing MySQL server (possibly running in Azure MySQL)
#
existingmysql1 {
#
# The type of database to use. Allowed values are:
# - mysql
# - postgresql
#
type: mysql
#
# The static internal IP (recommended) or internal FQDN of the database server.
#
host: mysql_host_REPLACE_ME.mysql.database.azure.com
#
# The database server's port.
#
port: 3306
#
# The user Cloudera Altus Director will use. The user must have privileges to create databases,
# create users, and grant the users access to those databases. For Azure MySQL, be sure to
# include the short hostname of the database server.
#
user: "root@mysql_host_REPLACE_ME"
#
# The password for the database user.
#
password: "rootpassword"
}
#
# Use an existing PostgresSQL server
#
# existingpostgres1 {
# type: postgresql
# host: REPLACE_ME # with IP address of database server
# port: 5432
# user: postgres
# password: rootpassword
# }
}
#
# Configuration for Cloudera Manager. Cloudera Altus Director can use an existing Cloudera
# Manager installation, or bootstrap everything from scratch for a new cluster.
#
cloudera-manager {
instance: ${instances.edge} {
#
# Additional tags for the Cloudera Manager instance
#
tags {
application: "Cloudera Manager 6"
}
}
#
# Licensing configuration
#
# There are three mutually exclusive options for setting up Cloudera Manager's license.
# 1. License text may be embedded in this file using the "license" field. Triple quotes (""")
# are recommended for including multi-line text strings.
# 2. The "licensePath" can be used to specify the path to a file containing the license.
# 3. The "enableEnterpriseTrial" flag indicates whether the 60-Day Cloudera Enterprise Trial
# should be activated when no license is present. This must not be set to true if a
# license is included using either "license" or "licensePath".
#
#
# Embed a license for Cloudera Manager
#
# license: """
# -----BEGIN PGP SIGNED MESSAGE-----
# Hash: SHA1
#
# {
# "version" : 1,
# "name" : "License Owner",
# "uuid" : "license id",
# "expirationDate" : 0,
# "features" : [ "FEATURE1", "FEATURE2" ]
# }
# -----BEGIN PGP SIGNATURE-----
# Version: GnuPG v1.4.11 (GNU/Linux)
#
# PGP SIGNATURE
# -----END PGP SIGNATURE-----
# """
#
# Include a license for Cloudera Manager from an external file
#
# licensePath: "/path/to/license.txt.asc"
#
# Activate 60-Day Cloudera Enterprise Trial
#
enableEnterpriseTrial: true
#
# Specify the billing ID.
#
# Altus Director will use the billing ID to report usage information to a metering service for
# usage based billing.
#
# Usage reporting starts as soon as you assign a billing ID and a license to a Cloudera Manager.
# If you remove a billing ID, Director will stop reporting to the metering service.
#
# When usage reporting stops, you will not have access to Cloudera Support with this deployment.
# If you want a billing ID, please contact Cloudera. An enterprise license is required.
#
# billingId: "billingId_REPLACE_ME"
#
# Select the strategy for installing a JDK. Choices are:
#
# - AUTO (default): Altus Director installs the JDK on the Cloudera Manager instance, and
# Cloudera Manager installs the JDK on cluster instances
# - DIRECTOR_MANAGED: Altus Director installs the JDK on all instances
# - NONE: Neither Altus Director nor Cloudera Manager installs the JDK on any instances
#
# javaInstallationStrategy: AUTO
#
# Install the unlimited strength JCE policy files for higher levels of encryption.
# Prior to setting this to true, confirm that you understand the legal ramifications
# of using unlimited JCE policy files in your country.
#
# unlimitedJce: true
#
# Automatic TLS
#
#
# Set up TLS connections automatically between Cloudera Altus Director and Cloudera Manager,
# as well as among the cluster services. Automatic TLS includes installation of
# unlimited strength JCE policy files (see unlimitedJce).
#
# tlsEnabled: true
#
# Pass TLS configuration properties to Cloudera Manager to refine how automatic TLS
# is configured. All TLS configuration properties are optional and have sane defaults.
# Additional properties are available beyond those listed here.
#
# tlsConfigurationProperties {
# subject_suffix: "O=example.com,L=Cityville,ST=CA,C=US"
# ca_sig_hash_algo: "SHA512"
# email_address: "name@example.com"
# }
#
# Optional database configuration
#
# There are three mutually exclusive options for database usage in Cloudera Altus Director.
# 1. With no configuration, databases in the Cloudera Manager embedded PostgreSQL database
# server will be used. This option is NOT supported for production use.
# 2. Alternatively, existing external databases can be used.
# 3. Finally, databases can be created on the fly on existing external database servers.
#
# Note that using an external database here necessitates using an external database
# for the cluster services. This reference configuration is using Option 3.
#
#
# (Option 2) Optional configuration for existing external databases
#
# databases {
# CLOUDERA_MANAGER {
# type: postgresql
#
# host: db.example.com
# port: 123
#
# user: admin
# password: 1231ed
#
# name: scm
# }
#
# ACTIVITYMONITOR { ... }
#
# REPORTSMANAGER { ... }
#
# NAVIGATOR { ... }
#
# NAVIGATORMETASERVER { ... }
# }
#
# (Option 3) Optional configuration for creating external databases on the fly
#
# When a database is created on the fly, Altus Director generates a random database name using the specified
# database name prefix, a random username based on the specified username prefix, and a random password. The
# password is stored by Altus Director and made available to the service that uses the database. If multiple
# services reference the same external database server, Altus Director will create a database for each.
#
# MySQL limits usernames to sixteen characters. Therefore, limit usernamePrefix values for databases on MySQL to
# seven characters; the remaining nine characters are used by the randomized suffix generated by Director.
#
# Note that the databaseServerName must correspond to an external database server named above.
#
databaseTemplates {
CLOUDERA_MANAGER {
name: scmt
databaseServerName: existingmysql1
databaseNamePrefix: scm
usernamePrefix: scmu
}
ACTIVITYMONITOR {
name: amont
databaseServerName: existingmysql1
databaseNamePrefix: amon
usernamePrefix: amonu
}
REPORTSMANAGER {
name: rmant
databaseServerName: existingmysql1
databaseNamePrefix: rman
usernamePrefix: rmanu
}
NAVIGATOR {
name: navt
databaseServerName: existingmysql1
databaseNamePrefix: nav
usernamePrefix: navu
}
NAVIGATORMETASERVER {
name: navmst
databaseServerName: existingmysql1
databaseNamePrefix: navms
usernamePrefix: navmsu
}
}
#
# Configuration to override Cloudera Manager package repositories. These are
# optional, and default to the Cloudera Enterprise release corresponding to
# the Altus Director version.
#
# repository: "https://archive.cloudera.com/cm6/6.2/redhat7/yum/"
# repositoryKeyUrl: "https://archive.cloudera.com/cm6/6.2/redhat7/yum/RPM-GPG-KEY-cloudera"
# OR use an existing Cloudera Manager installation
# hostname: "192.168.33.10"
# username: REPLACE_ME # default is admin
# password: REPLACE_ME # default is admin
#
# Configuration for Cloudera Manager and its management services
#
# Configuration properties for CLOUDERA_MANAGER are documented at
# https://www.cloudera.com/documentation/enterprise/6/properties/6.2/topics/cm_props_cmserver.html
#
# Configuration properties for the Cloudera Management services are documented at
# https://www.cloudera.com/documentation/enterprise/6/properties/6.2/topics/cm_props_mgmtservice.html
#
# Configuration properties for Hosts are documented at
# https://www.cloudera.com/documentation/enterprise/6/properties/6.2/topics/cm_props_host.html
#
# Properties here ensure that non-ephemeral disks are used for all directories and files
#
configs {
#
# CLOUDERA_MANAGER corresponds to the Cloudera Manager Server configuration options
#
CLOUDERA_MANAGER {
# enable_api_debug: true
custom_banner_html: "Managed by Cloudera Altus Director"
}
#
# CLOUDERA_MANAGEMENT_SERVICE corresponds to the Service-Wide configuration options
#
# CLOUDERA_MANAGEMENT_SERVICE {
# enable_alerts : false
# enable_config_alerts : false
# }
SERVICEMONITOR {
mgmt_log_dir: /data0/log/cloudera-scm-firehose
firehose_storage_dir: /data0/lib/cloudera-service-monitor
}
ACTIVITYMONITOR {
mgmt_log_dir: /data0/log/cloudera-scm-firehose
}
HOSTMONITOR {
mgmt_log_dir: /data0/log/cloudera-scm-firehose
firehose_storage_dir: /data0/lib/cloudera-host-monitor
}
REPORTSMANAGER {
headlamp_scratch_dir: /data0/lib/cloudera-scm-headlamp
mgmt_log_dir: /data0/log/cloudera-scm-headlamp
}
EVENTSERVER {
mgmt_log_dir: /data0/log/cloudera-scm-eventserver
eventserver_index_dir: /data0/lib/cloudera-scm-eventserver
}
ALERTPUBLISHER {
mgmt_log_dir: /data0/log/cloudera-scm-alertpublisher
}
NAVIGATOR {
mgmt_log_dir: /data0/log/cloudera-scm-navigator
}
NAVIGATORMETASERVER {
audit_event_log_dir: /data0/log/cloudera-scm-navigator/audit
data_dir: /data0/lib/cloudera-scm-navigator
mgmt_log_dir: /data0/log/cloudera-scm-navigator
}
# Configuration properties for all hosts
# HOSTS { ... }
}
#
# Deployment Post-create Scripts
#
# Provide scripts, that run as root, which can customize the Cloudera Manager
# instance after the deployment has been created. Scripts run in the order
# listed. Interpeters other than sh/bash may be used.
#
# Use exit code 0 to indicate success.
# Use exit code 91 to indicate an unretryable failure.
# Altus Director will automatically retry script execution for all other exit codes.
#
# Deployment post-creation scripts have access to the following environment
# variables:
#
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CM_USERNAME
# CM_PASSWORD
#
# Script content can be provided in a simple string, or as part of of an object
# with an ID (for easier tracking) and an optional map of additional environment
# variables to set before running the script. Environment variables set here
# override the values of default ones above.
#
postCreateScripts: ["""#!/bin/sh
echo 'Hello World!'
exit 0
""",
{
id: depPostCreateScript2,
env {
KEY1: VALUE1
KEY2: VALUE2
},
content: "echo The values are $KEY1 and $KEY2"
},
"""#!/usr/bin/python
print 'Hello again!'
"""]
#
# More complex scripts can be supplied via local filesystem paths. They will
# run after any scripts supplied in the previous postCreateScripts section.
#
# postCreateScriptsPaths: ["/tmp/test-script.sh",
# {
# id: postCreateScriptFromPath2,
# env: {
# KEY3: VALUE3
# },
# path: "/tmp/test-script.py"
# }
# ]
#
# Optional custom service descriptors for external parcels
#
# csds: [
# "https://archive.cloudera.com/exampleProduct/csd/example-csd.jar"
# ]
#
}
#
# Cluster description
#
cluster {
# The table of products and their versions that need to be installed. Each
# product must have a corresponding parcel in the parcelRepositories
# configured in this section. The specified version for a product will be
# used to find a suitable parcel. Specifying a version that is satisfied by
# more than one parcel among those available will result in a configuration
# error. Specify more granular versions to avoid conflicts.
products {
CDH: 6
# EXAMPLEPRODUCT: 1
}
#
# Optional override of CDH parcel repositories
#
# This defaults to the Cloudera Enterprise release corresponding to the
# Altus Director version.
#
# parcelRepositories: ["https://archive.cloudera.com/cdh6/6.2/parcels/"]
#
# Services to include in the cluster
#
# NOTE: On CM 5.9+ Sentry and Kafka 2.0 can't coexist in the same cluster.
# If this is needed, use CM 5.8 repository and parcels, or use Kafka
# 2.1 or higher.
#
services: [
HDFS,
YARN,
ZOOKEEPER,
HBASE,
HIVE,
HUE,
IMPALA,
OOZIE,
SPARK_ON_YARN
]
#
# Custom service configurations
#
# Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double
# quotes.
#
# Configuration properties for CDH roles and services are documented at
# https://www.cloudera.com/documentation/enterprise/6/properties/6.2/topics/cm_props_cdh620.html
#
configs {
# HDFS fencing should be set to true for HA configurations
HDFS {
dfs_ha_fencing_methods: "shell(true)"
dfs_block_local_path_access_user: "impala,hbase,mapred,spark"
#
# Optional configuration for Azure Data Lake Storage usage with
# CDH. This requires that a valid service principal be created and
# granted permission to access the ADLS account.
# See: https://www.cloudera.com/documentation/enterprise/latest/topics/admin_adls_config.html
#
# core_site_safety_valve: """
# <property>
# <name>dfs.adls.oauth2.access.token.provider.type</name>
# <value>ClientCredential</value>
# </property>
# <property>
# <name>dfs.adls.oauth2.client.id</name>
# <value>client_id_REPLACE_ME</value>
# </property>
# <property>
# <name>dfs.adls.oauth2.credential</name>
# <value>client_secret_REPLACE_ME</value>
# </property>
# <property>
# <name>dfs.adls.oauth2.refresh.url</name>
# <value>refresh_url_REPLACE_ME</value>
# </property>
# """
}
HIVE {
audit_event_log_dir: /data0/log/hive/audit
lineage_event_log_dir: /data0/log/hive/lineage
}
HBASE {
audit_event_log_dir: /data0/log/hbase/audit
}
# OOZIE requires a load balancer specifically for high availability.
# Director does not create or manage the load balancer.
#
# The load balancer must be configured with the IPs of the oozie servers
# after the cluster completes bootstrapping.
OOZIE {
oozie_load_balancer: "example.com"
oozie_load_balancer_http_port: 5002
#oozie_load_balancer_https_port: 5000
}
}
#
# Database configuration for Highly Available Cluster Services
#
# As mentioned in the cloudera-manager section, the three mutually exclusive options for database
# usage are (1) No configuration which uses databases in the Cloudera Manager embedded PostgreSQL
# database server; (2) Use existing external databases; or (3) Create databases on an existing
# external database server.
#
# High availibility configuration requires external databases to be defined for the
# Hive Metastore, Hue, and Oozie services. These databases may be configured using either option
# 2 or 3.
#
#
# (Option 2) Configuration for existing external databases for services
#
# databases {
# HIVE {
# type: postgresql
# host: db.example.com
# port: 123
# user: hive
# password: pass
# name: hive_db
# }
# HUE {
# type: postgresql
# host: db.example.com
# port: 123
# user: hue
# password: pass
# name: hue_db
# }
# OOZIE {
# type: postgresql
# host: db.example.com
# port: 123
# user: oozie
# password: pass
# name: oozie_db
# }
# }
#
# (Option 3) Configuration for creating external databases on the fly for services
#
databaseTemplates: {
HIVE {
name: hivet
databaseServerName: existingmysql1
databaseNamePrefix: hive
usernamePrefix: hiveu
}
HUE {
name: huet
databaseServerName: existingmysql1
databaseNamePrefix: hue
usernamePrefix: hueu
}
OOZIE {
name: ooziet
databaseServerName: existingmysql1
databaseNamePrefix: oozie
usernamePrefix: oozieu
}
}
#
# Instance group configurations
#
#
# This reference configuration follows the Cloudera Azure Reference Architecture.
#
masters-1 {
count: 2
instance: ${instances.master-1} {
tags {
group: masters-1
}
}
roles {
# ZooKeeper uses majority quorum for r/w, configure odd number of servers.
ZOOKEEPER: [SERVER]
HDFS: [NAMENODE, FAILOVERCONTROLLER, JOURNALNODE]
YARN: [RESOURCEMANAGER]
HBASE: [MASTER]
}
configs {
# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability
HDFS {
NAMENODE {
dfs_federation_namenode_nameservice: hanameservice
autofailover_enabled: true
dfs_namenode_quorum_journal_name: hanameservice
namenode_log_dir: /data0/log/hadoop-hdfs
dfs_name_dir_list: /data3/dfs/nn
}
FAILOVERCONTROLLER {
failover_controller_log_dir: /data0/log/hadoop-hdfs
}
JOURNALNODE {
journalnode_log_dir: /data0/log/hadoop-hdfs
dfs_journalnode_edits_dir: /data1/hdfs
}
}
ZOOKEEPER {
SERVER {
zk_server_log_dir: /data0/log/zookeeper
dataDir: /data2/zookeeper
dataLogDir: /data2/zookeeper
}
}
YARN {
RESOURCEMANAGER {
resource_manager_log_dir: /data0/log/hadoop-yarn
}
}
HBASE {
MASTER {
hbase_master_log_dir: /data0/log/hbase
}
}
}
}
masters-2 {
count: 1
instance: ${instances.master-2} {
tags {
group: masters-2
}
}
# HIVESERVER2 roles need a SPARK role (such as gateway) on the same
# instance to pick up Spark configurations
# Likewise, SPARK_ON_YARN roles need a HIVE gateway to read Hive tables
roles {
# ZooKeeper uses majority quorum for r/w, configure odd number of servers.
ZOOKEEPER: [SERVER]
HDFS: [JOURNALNODE, HTTPFS]
HIVE: [HIVESERVER2, HIVEMETASTORE, WEBHCAT, GATEWAY]
YARN: [JOBHISTORY]
HUE: [HUE_SERVER]
OOZIE: [OOZIE_SERVER]
IMPALA: [CATALOGSERVER, STATESTORE]
SPARK_ON_YARN: [SPARK_YARN_HISTORY_SERVER]
HBASE: [HBASETHRIFTSERVER] # HBASETHRIFTSERVER role needed for HUE
}
configs {
HDFS {
JOURNALNODE {
journalnode_log_dir: /data0/log/hadoop-hdfs
dfs_journalnode_edits_dir: /data1/hdfs
}
HTTPFS {
httpfs_log_dir: /data0/log/hadoop-httpfs
}
}
OOZIE {
# Oozie plugins must be configured for high availability
OOZIE_SERVER {
oozie_plugins_list: "org.apache.oozie.service.ZKLocksService,org.apache.oozie.service.ZKXLogStreamingService,org.apache.oozie.service.ZKJobsConcurrencyService,org.apache.oozie.service.ZKUUIDService"
oozie_log_dir: /data0/log/oozie
}
}
ZOOKEEPER {
SERVER {
zk_server_log_dir: /data0/log/zookeeper
dataDir: /data2/zookeeper
dataLogDir: /data2/zookeeper
}
}
HIVE {
HIVEMETASTORE {
hive_log_dir: /data0/log/hive
}
HIVESERVER2 {
hive_log_dir: /data0/log/hive
}
WEBHCAT {
hcatalog_log_dir: /data0/log/hcatalog
}
}
YARN {
JOBHISTORY {
mr2_jobhistory_log_dir: /data0/log/hadoop-mapreduce
}
}
HUE {
HUE_SERVER {
hue_server_log_dir: /data0/log/hue
}
KT_RENEWER {
kt_renewer_log_dir: /data0/log/hue
}
}
IMPALA {
CATALOGSERVER {
log_dir: /data0/log/catalogd
}
STATESTORE {
log_dir: /data0/log/statestore
}
}
SPARK_ON_YARN {
SPARK_YARN_HISTORY_SERVER {
log_dir: /data0/log/spark
}
}
HBASE {
HBASETHRIFTSERVER {
hbase_thriftserver_log_dir: /data0/log/hbase
}
#HBASERESTSERVER {
# hbase_restserver_log_dir: /data0/log/hbase
#}
}
}
}
workers {
#
# The desired number of instances for this instance group. Cloudera Altus Director attempts
# to launch this many instances, but will not fail bootstrap as long as the minimum number
# of instances, specified with minCount below, succeed.
#
count: 5
#
# Minimum number of instances required for this instance group.
# Altus Director will fail bootstrap of a cluster if minCount number of instances are not
# available in this cloud environment.
# If minCount is not specified then minCount is set to count.
#
minCount: 3
instance: ${instances.worker} {
tags {
group: worker
}
}
roles {
HDFS: [DATANODE]
YARN: [NODEMANAGER]
HBASE: [REGIONSERVER]
IMPALA: [IMPALAD]
}
configs {
HDFS {
DATANODE {
datanode_log_dir: /data0/log/hadoop-hdfs
dfs_data_dir_list: "/data1/dfs/dn,/data2/dfs/dn,/data3/dfs/dn,/data4/dfs/dn,/data5/dfs/dn,/data6/dfs/dn,/data7/dfs/dn,/data8/dfs/dn,/data9/dfs/dn,/data10/dfs/dn"
}
}
YARN {
NODEMANAGER {
node_manager_log_dir: /data0/log/hadoop-yarn
yarn_nodemanager_log_dirs: "/data1/log/hadoop-yarn/container,/data2/log/hadoop-yarn/container,/data3/log/hadoop-yarn/container,/data4/log/hadoop-yarn/container,/data5/log/hadoop-yarn/container,/data6/log/hadoop-yarn/container,/data7/log/hadoop-yarn/container,/data8/log/hadoop-yarn/container,/data9/log/hadoop-yarn/container,/data10/log/hadoop-yarn/container"
yarn_nodemanager_local_dirs: "/data1/yarn,/data2/yarn,/data3/yarn,/data4/yarn,/data5/yarn,/data6/yarn,/data7/yarn,/data8/yarn,/data9/yarn,/data10/yarn"
}
}
HBASE {
REGIONSERVER {
hbase_regionserver_log_dir: /data0/log/hbase
}
}
IMPALA {
IMPALAD {
log_dir: /data0/log/impalad
lineage_event_log_dir: /data0/log/impalad/lineage
audit_event_log_dir: /data0/log/impalad/audit
scratch_dirs: "/data1/impala/impalad,/data2/impala/impalad,/data3/impala/impalad,/data4/impala/impalad,/data5/impala/impalad,/data6/impala/impalad,/data7/impala/impalad,/data8/impala/impalad,/data9/impala/impalad,/data10/impala/impalad"
}
}
}
}
#
# Cluster Instance-level Post-create Scripts
#
# Provide scripts, that run as root, which can customize each cluster instance
# after the cluster has been created. Scripts run on each cluster instance in
# the order listed, before those listed under postCreateScripts and
# postCreateScriptsPaths. Interpeters other than sh/bash may be used.
#
# Use exit code 0 to indicate success.
# Use exit code 91 to indicate an unretryable failure.
# Altus Director will automatically retry script execution for all other exit codes.
#
# Cluster instance-level post-creation scripts have access to the following
# environment variables:
#
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CLUSTER_NAME
# CM_USERNAME
# CM_PASSWORD
#
# Script content can be provided in a simple string, or as part of of an object
# with an ID (for easier tracking) and an optional map of additional environment
# variables to set before running the script. Environment variables set here
# override the values of default ones above.
#
instancePostCreateScripts: ["""#!/bin/sh
echo 'Hello World!'
exit 0
""",
{
id: cluInstancePostCreateScript2,
env {
KEY1: VALUE1
KEY2: VALUE2
},
content: "echo The values are $KEY1 and $KEY2"
},
"""#!/usr/bin/python
print 'Hello again!'
"""]
#
# More complex scripts can be supplied via local filesystem paths. They will
# run after any scripts supplied in the previous instancePostCreateScripts section.
#
# instancePostCreateScriptsPaths: ["/tmp/test-script.sh",
# {
# id: instancePostCreateScriptFromPath2,
# env: {
# KEY3: VALUE3
# },
# path: "/tmp/test-script.py"
# }
# ]
#
# Cluster Post-create Scripts
#
# Provide scripts, that run as root, which can customize the cluster after it
# has been created. Scripts run only once, at a cluster level, on an arbitrary
# cluster instance after those listed under instancePostCreateScripts and
# instancePostCreateScriptsPaths. Interpeters other than sh/bash may be used.
#
# Use exit code 0 to indicate success.
# Use exit code 91 to indicate an unretryable failure.
# Altus Director will automatically retry script execution for all other exit codes.
#
# Cluster post-creation scripts have access to the following environment
# variables:
#
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CLUSTER_NAME
# CM_USERNAME
# CM_PASSWORD
#
# Script content can be provided in a simple string, or as part of of an object
# with an ID (for easier tracking) and an optional map of additional environment
# variables to set before running the script. Environment variables set here
# override the values of default ones above.
#
postCreateScripts: ["""#!/bin/sh
echo 'Hello World!'
exit 0
""",
{
id: cluPostCreateScript2,
env {
KEY1: VALUE1
KEY2: VALUE2
},
content: "echo The values are $KEY1 and $KEY2"
},
"""#!/usr/bin/python
print 'Hello again!'
"""]
#
# More complex scripts can be supplied via local filesystem paths. They will
# run after any scripts supplied in the previous postCreateScripts section.
#
# postCreateScriptsPaths: ["/tmp/test-script.sh",
# {
# id: postCreateScriptFromPath2,
# env: {
# KEY3: VALUE3
# },
# path: "/tmp/test-script.py"
# }
# ]
#
# Cluster Pre-terminate Scripts
#
# Provide scripts, that run as root, which can operate on the cluster before it
# is terminated. Scripts run only once, at a cluster level, on an arbitrary
# cluster instance. Interpeters other than sh/bash may be used.
#
# Use exit code 0 to indicate success.
# Use exit code 91 to indicate an unretryable failure.
# Altus Director will automatically retry script execution for all other exit codes.
#
# Cluster post-creation scripts have access to the following environment
# variables:
#
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CLUSTER_NAME
# CM_USERNAME
# CM_PASSWORD
#
# Script content can be provided in a simple string, or as part of of an object
# with an ID (for easier tracking) and an optional map of additional environment
# variables to set before running the script. Environment variables set here
# override the values of default ones above.
#
preTerminateScripts: ["""#!/bin/sh
echo 'Goodbye World!'
exit 0
""",
{
id: cluPreTerminateScript2,
env {
KEY1: VALUE1
KEY2: VALUE2
},
content: "echo The values are $KEY1 and $KEY2"
},
"""#!/usr/bin/python
print 'Goodbye again!'
"""]
#
# More complex scripts can be supplied via local filesystem paths. They will
# run after any scripts supplied in the previous preTerminateScripts section.
#
# preTerminateScriptsPaths: ["/tmp/test-script.sh",
# {
# id: preTerminateScriptFromPath2,
# env: {
# KEY3: VALUE3
# },
# path: "/tmp/test-script.py"
# }
# ]
#
# Optional Administration Settings
#
# Administration settings configure Director's interaction with the cluster.
#
administrationSettings {
#
# If enabled, Director will attempt to automatically repair
# clusters whose instances have been terminated in the cloud provider.
#
# autoRepairEnabled: false
#
# Cooldown period between autorepair passes.
#
# autoRepairCooldownPeriodInSeconds: 1800
}
}
You can’t perform that action at this time.