Permalink
1365 lines (1149 sloc) 39.9 KB
#
# Copyright (c) 2015 Cloudera, Inc. All rights reserved.
#
#
# Sample AWS Cloudera Altus Director configuration file based on the Cloudera AWS reference architecture:
# http://www.cloudera.com/documentation/other/reference-architecture/PDF/cloudera_ref_arch_aws.pdf
#
# This is a template of a non-HA deployment for all services. Kerberos configurations properties
# are also provided. Refer to Cloudera Altus Director's documentation for more information.
#
#
# Cluster name
#
name: "C6-Reference-AWS"
#
# The environment and deployment name will be generated by default based on the cluster
# name. Another cluster can be bootstrapped under the same deployment by specifying the
# existing environment and deployment name along with a new cluster name. Note that when
# terminating with a configuration file, the deployment is only terminated if there are
# no other clusters under the deployment. Also, the external databases are only terminated
# if there are no other components (deployments or clusters) using the external database.
#
# environmentName: "C6-Reference-AWS Environment"
# deploymentName: "C6-Reference-AWS Deployment"
#
# Cloud provider configuration (credentials, region or zone and optional default image)
#
provider {
type: aws
#
# Get AWS credentials from the OS environment
# See http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
#
# If specifying the access keys directly and not through variables, make sure to enclose
# them in double quotes.
#
# Leave the accessKeyId and secretAccessKey fields blank when running on an instance
# launched with an IAM role.
# accessKeyId: ${?AWS_ACCESS_KEY_ID}
# secretAccessKey: ${?AWS_SECRET_ACCESS_KEY}
#
# ID of the Amazon AWS region to use
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
#
region: region-REPLACE-ME
#
# Region endpoint (if you are using one of the Gov. regions)
#
# regionEndpoint: ec2.us-gov-west-1.amazonaws.com
#
# ID of the VPC subnet
# See: http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_Subnets.html
#
subnetId: subnet-REPLACE-ME
#
# Comma separated list of security group IDs
# See: http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_SecurityGroups.html
#
# Default security group
# Multiple security groups may be specified as a comma-separated list. e.g., "sg-abc123,sg-def456"
securityGroupsIds: sg-REPLACE-ME
#
# A prefix that Cloudera Altus Director should use when naming the instances (this is not part of the hostname)
#
instanceNamePrefix: cloudera-director
#
# Specify a size for the root volume (in GBs). Cloudera Altus Director will automatically expand the
# filesystem so that you can use all the available disk space for your application
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/storage_expand_partition.html
#
# rootVolumeSizeGB: 100 # defaults to 50 GB if not specified
#
# Specify the type of the EBS volume used for the root partition. Defaults to gp2
# See: http://aws.amazon.com/ebs/details/
#
# rootVolumeType: gp2 # OR standard (for EBS magnetic)
#
# Whether to associate a public IP address with instances or not. If this is false
# we expect instances to be able to access the internet using a NAT instance
#
# Currently the only way to get optimal S3 data transfer performance is to assign
# public IP addresses to your instances and not use NAT instances (public subnet setup)
#
# See: http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/vpc-ip-addressing.html
#
# associatePublicIpAddresses: true
# Spot Price (in USD per hour) for Spot instances
# See: https://aws.amazon.com/ec2/spot/
# Can be set here and shared across instance groups, and/or overridden/set per instance group
# spotPriceUSDPerHr: 0.50
# Block Duration in minutes for Spot instances
# This value must be a multiple of 60 (60, 120, 180, 240, 300, or 360).
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-requests.html#fixed-duration-spot-instances
# Can be set here and shared across instance groups, and/or overridden/set per instance group
# blockDurationMinutes: 120
}
#
# SSH credentials to use to connect to the instances
#
ssh {
username: ec2-user # for RHEL image
privateKey: privateKey-REPLACE-ME # with an absolute path to .pem file
}
#
# These instance properties will be applied to all instances.
#
common-instance-properties {
#
# Amazon Machine Image (AMI)
#
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html
#
# Certain AMI virtualization types are incompatible with certain instance types. HVM
# AMI types are recommended since they are compatible with most instance types.
#
# Compatibility matrix: https://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
#
# Red Hat Enterprise Linux AMI IDs: http://aws.amazon.com/partners/redhat/
#
# We support RHEL and CentOS 6.5, 6.7, 6.8, 7.1, 7.2, and 7.3.
# RHEL 7.2 is supported only for Cloudera Manager and CDH 5.7 and higher.
# RHEL 6.8 and 7.3 are supported only for Cloudera Manager and CDH 5.10 and higher.
#
# To use Amazon EC2 D2 instances, you must run a minimum version of RHEL 6.7 or CentOS 6.7
image: ami-REPLACE-ME
#
# Cloudera Manager Rack ID
#
# Each host in Cloudera Manager has a rack ID that is used to encode the datacenter topology
# as a path
# rackId: "/default"
#
# Name of the IAM Role to use for this instance type
# http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html
#
# iamProfileName: iam-profile-REPLACE-ME
tags {
owner: ${?USER}
}
#
# EBS Volumes
#
# Director can create and attach additional EBS volumes to the instance. These volumes
# will be automatically deleted when the associated instance is terminated. These
# properties don't apply to the root volume.
#
# See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumes.html
#
# ebsVolumeCount : 0
# ebsVolumeType: st1 # Specify either st1, sc1, gp2 or io1 volume type
# ebsVolumeSizeGiB: 500
# ebsIops: 500 # Number of IOPS, only valid and required for io1 volume type
#
# EBS Volume Encryption
#
# Encryption can be enabled on the additional EBS volumes. An optional CMK can
# be specified for volume encryption. Not setting a CMK means the default CMK
# for EBS will be used. The encryption here does not apply to the root volume.
#
# See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSEncryption.html
#
# enableEbsEncryption: false
# ebsKmsKeyId: arn:aws:kms:REPLACE-ME # full ARN of the KMS CMK
#
# EBS Optimized
#
# Specify whether to enable EBS Optimized I/O. This optimization isn't available
# with all instance types. Some instance types are EBS Optimized by default
# regardless of this flag. Additional usage charges may apply when using an
# EBS-optimized instance.
#
# See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSOptimized.html
#
# ebsOptimized : false
bootstrapScripts: ["""#!/bin/sh
# This is an embedded bootstrap script that runs as root and can be used to customize
# the instances immediately after boot and before any other Cloudera Altus Director action
# Use exit code 0 to indicate success
# Use exit code 91 indicate an unretryable failure
# Cloudera Altus Director will automatically retry script execution for all other exit codes
echo 'Hello World!'
exit 0
"""]
# For more complex scripts, embedded bootstrap scripts can be supplied via local
# filesystem paths. They will run after any scripts supplied in the previous
# bootstrapScripts section.
# bootstrapScriptsPaths: ["/tmp/test-script.sh",
# "/tmp/test-script.py"]
#
# Flag indicating whether to normalize the instance. Not setting normalization here implies that your
# bootstrap script will take care of normalization. This is an advanced configuration that will require
# assistance from Cloudera support.
#
# Defaults to true
#
# normalizeInstance: true
#
# Configuration allowing for granular control over the normalization steps of an instance.
# By default, all of these steps are on. This is an advanced configuration. None of these steps
# will run if normalizeInstance is set to false.
#
# Normalization includes:
# prewarming the parcel directory
# downloading, installing, and adjusting packages
# minimizing swappiness
# increasing the maximun number of open files
# resizing the root partition
# mounting ephemeral disks
# normalizationConfig {
# prewarmDirectory: true
# installPackages: true
# miscellaneousServiceAdjustment: true
# minimizeSwappiness: true
# increaseMaxNumberOfOpenFiles: true
# resizeRootPartition: true
# mountAllUnmountedDisks: true
# }
#
# By default Director does not do host key verification when performing SSH on an instance.
# This can be changed for added security by setting the SSH Host Key Retrieval Type.
#
# NONE : Host key fingerprints will not be stored or verified during SSH (default)
# PROVIDER : Retrieve the host key fingerprints from the cloud provider. This does not
# work for all images and will likely increase bootstrap time.
# INSTANCE : Retrieve the host key fingerprints upon first connection into the instance. This
# is less secure than the PROVIDER method since the first connection isn't guaranteed
# to be secure.
# FALLBACK : Attempt to retrieve host key fingeprint form the cloud provider first, if that
# fails, attempt to retrieve upon first connection into the instance.
#
# sshHostKeyRetrievalType: NONE
#
# User Data
#
# Director can pass opaque data or scripts to instances using the EC2 user data mechanism.
# Director will base64-encode the user data automatically when it is provided through the
# userDataUnencoded property. If the data is already base64-encoded, pass it through the
# userData property instead. Only use one property or the other, not both.
#
# See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
#
# userData: "U2FtcGxlIHVzZXIgZGF0YQo="
# userDataUnencoded: "Sample user data"
}
#
# A list of instance types to use for groups of nodes or management services. Instances
# specified here inherit from the common-instance-properties properties specified above.
#
instances {
m42x: ${common-instance-properties} {
type: m4.2xlarge
#
# The properties inherited from the provider section or common-instance-properties can
# be overridden for each instance template.
# These properties include:
#
# image: ami-REPLACE-ME
# securityGroupsIds: sg-REPLACE-ME
# subnetId: subnet-REPLACE-ME
# rackId: "/default"
}
m44x: ${common-instance-properties} {
type: m4.4xlarge
}
c34x: ${common-instance-properties} {
type: c3.4xlarge
}
c38x: ${common-instance-properties} {
type: c3.8xlarge
}
c44x: ${common-instance-properties} {
type: c4.4xlarge
}
i2x: ${common-instance-properties} {
type: i2.xlarge
}
i22x: ${common-instance-properties} {
type: i2.2xlarge
}
d2x: ${common-instance-properties} {
type: d2.xlarge
}
d22x: ${common-instance-properties} {
type: d2.2xlarge
}
d24x: ${common-instance-properties} {
type: d2.4xlarge
}
t2l: ${common-instance-properties} { # only suitable as a gateway
type: t2.large
}
}
#
# Optional external database server configuration.
#
# Cloudera Altus Director can provision RDS database servers on-demand or create databases
# on existing database servers. This reference configuration does the former.
#
databaseServers {
#
# Provision RDS database server
#
rds-mysql-prod1 {
type: mysql
user: root
password: rootpassword
instanceClass: db.m3.medium
dbSubnetGroupName: REPLACE-ME
vpcSecurityGroupIds: sg-REPLACE-ME
allocatedStorage: 10
engineVersion: 5.5.53
tags {
owner: ${?USER}
}
}
#
# Use an existing MySQL server
#
#
# existingmysql1 {
# type: mysql
# host: REPLACE-ME # with IP address of database server
# port: 3306
# user: root
# password: rootpassword
# }
#
#
# Use an existing PostgresSQL server
#
#
# existingpostgres1 {
# type: postgresql
# host: REPLACE-ME # with IP address of database server
# port: 5432
# user: postgres
# password: rootpassword
# }
#
}
#
# Configuration for Cloudera Manager. Cloudera Altus Director can use an existing Cloudera Manager
# or bootstrap everything from scratch for a new cluster
#
cloudera-manager {
instance: ${instances.m42x} {
#
# The properties inherited from the provider section, common-instance-properties or
# instance template can be overriden for the Cloudera Manager instance.
# These properties include:
#
# image: ami-REPLACE-ME
# securityGroupsIds: sg-REPLACE-ME
# subnetId: subnet-REPLACE-ME
# rackId: "/default"
tags {
# add any additional tags as needed
application: "Cloudera Manager 5"
}
}
#
# Licensing configuration
#
# There are three mutually exclusive options for setting up Cloudera Manager's license.
# 1. License text may be embedded in this file using the "license" field. Triple quotes (""")
# are recommended for including multi-line text strings.
# 2. The "licensePath" can be used to specify the path to a file containing the license.
# 3. The "enableEnterpriseTrial" flag indicates whether the 60-Day Cloudera Enterprise Trial
# should be activated when no license is present. This must not be set to true if a
# license is included using either "license" or "licensePath".
#
# Embed a license for Cloudera Manager
#
# license: """
# -----BEGIN PGP SIGNED MESSAGE-----
# Hash: SHA1
#
# {
# "version" : 1,
# "name" : "License Owner",
# "uuid" : "license id",
# "expirationDate" : 0,
# "features" : [ "FEATURE1", "FEATURE2" ]
# }
# -----BEGIN PGP SIGNATURE-----
# Version: GnuPG v1.4.11 (GNU/Linux)
#
# PGP SIGNATURE
# -----END PGP SIGNATURE-----
# """
#
# Include a license for Cloudera Manager from an external file
#
# licensePath: "/path/to/license.txt.asc"
#
# Specify the billing ID.
#
# Cloudera Altus Director will use the billing ID to report usage information to a metering service
# for usage based billing.
#
# Usage reporting starts as soon as you assign a billing ID and a license to a Cloudera Manager.
# If you remove a billing ID, Director will stop reporting to the metering service.
#
# When usage reporting stops, you will not have access to Cloudera Support with this deployment.
# If you want a billing ID, please contact Cloudera.
# billingId: billingId-REPLACE-ME
#
# Activate 60-Day Cloudera Enterprise Trial
#
enableEnterpriseTrial: true
#
# Install the unlimited strength JCE policy files for higher levels of encryption.
# Prior to setting this to true, confirm that you understand the legal ramifications
# of using unlimited JCE policy files in your country.
#
# unlimitedJce: true
#
# Automatic TLS
#
#
# Set up TLS connections automatically between Cloudera Altus Director and Cloudera Manager,
# as well as among the cluster services. Automatic TLS includes installation of
# unlimited strength JCE policy files (see unlimitedJce).
#
# tlsEnabled: true
#
# Pass TLS configuration properties to Cloudera Manager to refine how automatic TLS
# is configured. All TLS configuration properties are optional and have sane defaults.
# Additional properties are available beyond those listed here.
#
# tlsConfigurationProperties {
# subject_suffix: "O=example.com,L=Cityville,ST=CA,C=US"
# ca_sig_hash_algo: "SHA512"
# email_address: "name@example.com"
# }
#
# Kerberos Credentials
#
# #
# # An administrative Kerberos account capable of creating principals on the KDC that
# # Cloudera Manager will be using. This will typically be in the format:
# # Principal@YOUR.KDC.REALM
# krbAdminUsername: "REPLACE-ME"
#
# # The password for the administrative Kerberos account.
# krbAdminPassword: "REPLACE-ME"
#
#
# Optional database configuration for Cloudera Manager
#
# There are three mutually exclusive options for database usage in Cloudera Altus Director.
# 1. With no configuration, embedded PostgreSQL databases will be used.
# 2. Alternatively, existing external databases can be used.
# 3. Finally, databases can be created on the fly on existing external database servers.
#
# Note that using an external database here necessitates using an external database
# for the cluster services. This reference configuration is using Option 3.
#
#
# 2. Optional configuration for existing external databases
#
# databases {
# CLOUDERA_MANAGER {
# type: postgresql
#
# host: db.example.com
# port: 123
#
# user: admin
# password: 1231ed
#
# name: scm
# }
#
# ACTIVITYMONITOR { ... }
#
# REPORTSMANAGER { ... }
#
# NAVIGATOR { ... }
#
# # Available in Cloudera Manager 5.2+
# NAVIGATORMETASERVER { ... }
# }
#
# 3. Optional configuration for creating external databases on the fly
#
# When a database is created on the fly, Director generates a random database name using the specified database
# name prefix, a random username based on the specified username prefix, and a random password. The password is
# stored by Director and made available to the service that uses the database. If multiple services reference the
# same external database server, Director will create a database for each.
#
# MySQL limits usernames to sixteen characters. Therefore, limit usernamePrefix values for databases on MySQL to
# seven characters; the remaining nine characters are used by the randomized suffix generated by Director.
#
# Note that the databaseServerName must correspond to an external database server named above, which
# can be the provisioned RDS instance or another existing database server.
#
databaseTemplates {
CLOUDERA_MANAGER {
name: scmt
databaseServerName: rds-mysql-prod1
databaseNamePrefix: scm
usernamePrefix: scmu
}
ACTIVITYMONITOR {
name: amont
databaseServerName: rds-mysql-prod1
databaseNamePrefix: amon
usernamePrefix: amonu
}
REPORTSMANAGER {
name: rmant
databaseServerName: rds-mysql-prod1
databaseNamePrefix: rman
usernamePrefix: rmanu
}
NAVIGATOR {
name: navt
databaseServerName: rds-mysql-prod1
databaseNamePrefix: nav
usernamePrefix: navu
}
# Available in Cloudera Manager 5.2+
NAVIGATORMETASERVER {
name: navmst
databaseServerName: rds-mysql-prod1
databaseNamePrefix: navms
usernamePrefix: navmsu
}
}
#
# Configuration to override Cloudera Manager package repositories
#
repository: "https://archive.cloudera.com/cm6/6.0/redhat7/yum/"
repositoryKeyUrl: "https://archive.cloudera.com/cm6/6.0/redhat7/yum/RPM-GPG-KEY-cloudera"
# OR use an existing Cloudera Manager installation
# hostname: "192.168.33.10"
# username: <if not default 'admin'>
# password: <if not default 'admin'>
#
# Optional configuration for Cloudera Manager and its management services
#
# Configuration properties for CLOUDERA_MANAGER are documented at
# http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cmserver.html
#
# Configuration properties for the Cloudera Management services are documented at
# http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_mgmtservice.html
#
# Configuration properties for Hosts are documented at
# http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_host.html
#
configs {
# CLOUDERA_MANAGER corresponds to the Cloudera Manager Server configuration options
CLOUDERA_MANAGER {
enable_api_debug: true
custom_banner_html: "Managed by Cloudera Altus Director"
#
# Kerberos Configurations
#
#
# # The type of KDC Cloudera Manager will be using. Valid values are "MIT KDC"
# # and "Active Directory"
# KDC_TYPE: "Active Directory"
#
# # The KDC host name or IP address.
# KDC_HOST: "REPLACE-ME"
#
# # The security realm that your KDC uses. This will be of the format of a fully
# # qualified domain name:
# # YOUR.KDC.REALM
# SECURITY_REALM: "REPLACE-ME"
#
# # The Active Directory KDC domain. Only applicable to Active Directory KDCs. This
# # will be in the format of an X.500 Directory Specification:
# # DC=domain,DC=example,DC=com
# AD_KDC_DOMAIN: "REPLACE-ME"
#
# # Allow Cloudera Manager to deploy Kerberos configurations to hosts. This should
# # be set to true unless you have an alternate mechanism to generate or retrieve the
# # Kerberos configuration on your Cloudera Manager node instances.
# KRB_MANAGE_KRB5_CONF: true
#
# # The encryption types your KDC supports. Some of those listed below will require the
# # unlimited strength JCE policy files.
# KRB_ENC_TYPES: "aes256-cts aes128-cts des3-hmac-sha1 arcfour-hmac des-hmac-sha1 des-cbc-md5 des-cbc-crc"
#
# # There are many more optional Kerberos configuration options available to Cloudera Manager.
# # Please refer to the Kerberos section on
# # http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cmserver.html
# # for more details.
#
}
#
# # CLOUDERA_MANAGEMENT_SERVICE corresponds to the Service-Wide configuration options
# CLOUDERA_MANAGEMENT_SERVICE {
# enable_alerts : false
# enable_config_alerts : false
# }
#
# SERVICEMONITOR { ... }
#
# ACTIVITYMONITOR { ... }
#
# HOSTMONITOR { ... }
#
# REPORTSMANAGER { ... }
#
# EVENTSERVER { ... }
#
# ALERTPUBLISHER { ... }
#
# NAVIGATOR { ... }
#
# # Available in Cloudera Manager 5.2+
# NAVIGATORMETASERVER { ... }
#
# # Configuration properties for all hosts
# HOSTS { ... }
}
postCreateScripts: ["""#!/bin/sh
# This is an embedded post-creation script that runs as root and can be used to
# customize the Cloudera Manager instance after the deployment has been created.
# Use exit code 0 to indicate success
# Use exit code 91 indicate an unretryable failure
# Cloudera Altus Director will automatically retry script execution for all other exit codes
# Post-creation scripts also have access to the following environment variables:
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CM_USERNAME
# CM_PASSWORD
echo 'Hello World!'
exit 0
""",
"""#!/usr/bin/python
# Additionally, multiple post-creation scripts can be supplied. They will run
# in the order they are listed here. Interpeters other than bash can be used
# as well.
print 'Hello again!'
"""]
# For more complex scripts, post-creation scripts can be supplied via local
# filesystem paths. They will run after any scripts supplied in the previous
# postCreateScripts section.
# postCreateScriptsPaths: ["/tmp/test-script.sh",
# "/tmp/test-script.py"]
# Optional custom service descriptors for external parcels
# csds: [
# "https://archive.cloudera.com/exampleProduct/csd/example-csd.jar"
# ]
#
# External accounts
#
# # Any external accounts that should be set up within Cloudera Manager. These will allow some cluster
# # services to utilize cloud functionality, such as object stores.
#
# # Note: CM/CDH 5.10 is required for this feature. At the moment, only AWS external accounts are supported.
# externalAccounts {
#
# # External account that uses AWS Access Key Authentication. This type of authentication
# # will require adding the AWS_S3 service.
# AWSAccount1 {
# type: AWS_ACCESS_KEY_AUTH
# configs {
# aws_access_key: REPLACE-ME
# aws_secret_key: REPLACE-ME
#
# #
# # S3 Guard (added in CM/CDH 5.11) can be enabled to guarantee a consistent view of data stored
# # in Amazon S3 by storing additional metadata in a table residing in an Amazon DynamoDB instances.
# # See https://www.cloudera.com/documentation/enterprise/latest/topics/cm_s3guard.html for more
# # details and additional S3 Guard configuration properties.
# #
#
# # s3guard_enable: false
# # s3guard_region: REPLACE-ME
# # s3guard_table_name: s3guard-metadata
# # s3guard_table_auto_create: false
# }
# }
#
# # External account that uses IAM Role Authentication. This type of authentication doesn't
# # require adding the AWS_S3 service unless it has S3 guard enabled.
# AWSAccount2 {
# type: AWS_IAM_ROLES_AUTH
# }
# }
}
#
# Cluster description
#
cluster {
# List the products and their versions that need to be installed.
# These products must have a corresponding parcel in the parcelRepositories
# configured above. The specified version will be used to find a suitable
# parcel. Specifying a version that points to more than one parcel among
# those available will result in a configuration error. Specify more granular
# versions to avoid conflicts.
products {
CDH: 6
# EXAMPLEPRODUCT: 1
}
#
# Optional override of CDH parcel repositories
#
parcelRepositories: ["https://archive.cloudera.com/cdh6/6.0/parcels/"]
#
# NOTE: Sentry is only supported in Cloudera Manager versions 5.1 onward.
# NOTE: On CM 5.9+ Sentry and Kafka 2.0 can't coexist in the same cluster.
# If this is needed, use CM 5.8 repository and parcels, or use Kafka
# 2.1 or higher.
#
services: [
HDFS,
YARN,
ZOOKEEPER,
HBASE,
HIVE,
HUE,
OOZIE,
SPARK_ON_YARN,
KAFKA,
SOLR,
FLUME,
IMPALA,
SQOOP_CLIENT,
KS_INDEXER,
# SENTRY, # Sentry requires Kerberos to be enabled
KUDU,
# AWS_S3 # Requires Sentry and Kerberos (on default configuraitons)
]
#
# Optional custom service configurations
# Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double quotes.
#
# Configuration properties for CDH roles and services are documented at
# https://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cdh5130.html
#
#
# configs {
# AWS_S3 {
# cloud_account: AWSAccount1
# }
#
# HDFS {
# dfs_block_size: 134217728
# }
#
# MAPREDUCE {
# mapred_system_dir: /user/home
# mr_user_to_impersonate: mapred1
# }
#
# KAFKA {
# "num.partitions": 3
# }
# }
#
# Optional database configuration for Cluster Services
#
# As mentioned in the cloudera-manager section, the three mutually exclusive options for database
# usage are (1) No configuration which uses embedded PostgreSQL databases; (2) Use existing
# external databases; or (3) Create databases on an existing external database server.
#
#
# 2. Optional configuration for existing external databases for Hive Metastore or Sentry databases
#
# databases {
# HIVE {
# type: postgresql
# host: db.example.com
# port: 123
# user: hive
# password: pass
# name: hive_db
# },
#
# HUE {
# type: postgresql
# host: db.example.com
# port: 123
# user: hue
# password: pass
# name: hue_db
# },
#
# OOZIE {
# type: postgresql
# host: db.example.com
# port: 123
# user: oozie
# password: pass
# name: oozie_db
# },
#
# # Sentry requires Kerberos
# SENTRY {
# type: postgresql
# host: db.example.com
# port: 123
# user: sentry
# password: pass
# name: sentry_db
# },
# }
#
# 3. Optional configuration for creating external databases on the fly for Hive Metastore, Hue,
# Oozie and Sentry database
#
databaseTemplates: {
HIVE {
name: hivet
databaseServerName: rds-mysql-prod1 # Must correspond to an external database server named above
databaseNamePrefix: hive
usernamePrefix: hiveu
},
HUE {
name: huet
databaseServerName: rds-mysql-prod1 # Must correspond to an external database server named above
databaseNamePrefix: hue
usernamePrefix: hueu
},
OOZIE {
name: ooziet
databaseServerName: rds-mysql-prod1 # Must correspond to an external database server named above
databaseNamePrefix: oozie
usernamePrefix: oozieu
},
#
# # Sentry requires Kerberos
# SENTRY {
# name: sentryt
# databaseServerName: rds-mysql-prod1
# databaseNamePrefix: sentry
# usernamePrefix: sentryu
# }
#
}
#
# Instance group configurations
#
#
# Given the number of services being deployed in this sample, in order to not overload
# a single instance, the roles for the masters have been split between two instances
# (masters-1 and masters-2). Alternatively, all the master roles can be placed in a single
# instance if fewer services are being deployed or if the instance type is large enough
# to handle all roles.
#
masters-1 {
count: 1
instance: ${instances.i22x} {
#
# The properties inherited from the provider section or common-instance-properties or
# instance template can be overridden for each instance group.
# These properties include:
#
# image: ami-REPLACE-ME
# securityGroupsIds: sg-REPLACE-ME
# subnetId: subnet-REPLACE-ME
# rackId: "/default"
tags {
group: master-1
}
}
roles {
HDFS: [NAMENODE]
YARN: [RESOURCEMANAGER]
ZOOKEEPER: [SERVER]
HBASE: [MASTER]
HIVE: [HIVESERVER2]
HUE: [HUE_SERVER]
OOZIE: [OOZIE_SERVER]
SPARK_ON_YARN: [SPARK_YARN_HISTORY_SERVER]
KAFKA: [KAFKA_BROKER, KAFKA_MIRROR_MAKER]
SOLR: [SOLR_SERVER]
KUDU: [KUDU_MASTER]
IMPALA: [CATALOGSERVER]
KS_INDEXER: [HBASE_INDEXER]
# SENTRY: [SENTRY_SERVER] # Sentry requires Kerberos
}
#
# Optional custom role configurations
# Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double quotes.
#
# Configuration properties for CDH roles and services are documented at
# http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cdh5130.html
#
configs {
# HDFS {
# NAMENODE {
# dfs_name_dir_list: /data/nn
# namenode_port: 1234
# }
# }
# KAFKA {
# KAFKA_BROKER {
# broker_max_heap_size: 512
# "log.dirs": /data0/kafka/data
# }
# KAFKA_MIRROR_MAKER {
# "source.bootstrap.servers": "sourcebroker1:9092"
# "bootstrap.servers": "destinationbroker1:9092"
# whitelist: mytopic1
# }
# }
KUDU {
KUDU_MASTER {
# The master rarely performs IO. If fs_data_dirs is unset, it will
# use the same directory as fs_wal_dir
fs_wal_dir: "/data0/kudu/masterwal"
fs_data_dirs: "/data1/kudu/master"
}
}
}
}
masters-2 {
count: 1
instance: ${instances.i22x} {
tags {
group: master-2
}
}
roles {
HDFS: [SECONDARYNAMENODE]
YARN: [JOBHISTORY]
HBASE: [HBASETHRIFTSERVER]
HIVE: [HIVEMETASTORE]
IMPALA: [STATESTORE]
}
}
workers {
count: 9
#
# Minimum number of instances required to set up the cluster.
# Fail and quit if minCount number of instances is not available in this cloud
# environment. Else, continue setting up the cluster.
#
minCount: 5
instance: ${instances.d22x} {
# Put all cluster nodes in a placement group for improved network performance
# Note: this only works for a limited set of instances
# http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html
# placementGroup: REPLACE-ME
tags {
group: worker
}
}
roles {
HDFS: [DATANODE]
YARN: [NODEMANAGER]
HBASE: [REGIONSERVER]
FLUME: [AGENT]
IMPALA: [IMPALAD]
KUDU: [KUDU_TSERVER]
}
#
# Optional custom role configurations
# Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double quotes.
#
# Configuration properties for CDH services and roles are documented at
# http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cdh5130.html
#
configs {
# HDFS {
# DATANODE {
# dfs_data_dir_list: /data/dn
# }
# }
# HBASE {
# REGIONSERVER {
# hbase_regionserver_java_heapsize: 4000000000
# }
# }
KUDU {
KUDU_TSERVER {
# Set fs_wal_dir to an SSD drive (if exists) for better performance.
# Set fs_data_dirs to a comma-separated string containing all remaining
# disk drives, solid state or otherwise.
# If there are multiple drives in the machine, it's best to ensure that
# the WAL directory is not located on the same disk as a tserver data
# directory.
fs_wal_dir: "/data0/kudu/tabletwal"
fs_data_dirs: "/data1/kudu/tablet"
}
}
}
}
# Spot instance group configuration
# See: http://www.cloudera.com/documentation/director/latest/topics/director_aws_using_spot_instances.html
# workers-spot {
# count: 3
# #
# # Minimum number of instances required to set up the cluster.
# # Fail and quit if minCount number of instances is not available in this cloud
# # environment. Else, continue setting up the cluster.
# #
# # For a group using Spot instances, minCount should always be 0.
# minCount: 0
# instance: ${instances.c44x} {
# # Put all cluster nodes in a placement group for improved network performance
# # Note: this only works for a limited set of instances
# # http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html
# # placementGroup: REPLACE-ME
# # sshUsername: centos # May vary by image
# useSpotInstances: true
# # spotPriceUSDPerHr: 0.50 # Can be set/overridden per instance group
# tags {
# group: worker-spot
# }
# }
# # Only stateless roles can be used with Spot instances
# roles {
# YARN: [NODEMANAGER]
# }
# #
# # Optional custom role configurations
# # Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double quotes.
# #
# # Configuration properties for CDH roles and services are documented at
# # http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cdh5130.html
# #
#
# # configs {
# # }
# }
gateways {
count: 1
instance: ${instances.t2l} {
tags {
group: gateway
}
}
roles {
HDFS: [GATEWAY]
HBASE: [GATEWAY]
HIVE: [GATEWAY]
SPARK_ON_YARN: [GATEWAY]
KAFKA: [GATEWAY]
SOLR: [GATEWAY]
SQOOP_CLIENT: [GATEWAY]
}
#
# Optional custom role configurations
# Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double quotes.
#
# Configuration properties for CDH roles and services are documented at
# http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cdh5130.html
#
# Configuration keys containing periods must be enclosed in double quotes.
# configs {
# HIVE {
# GATEWAY {
# hive_metastore_timeout: 3000
# client_config_root_dir: /etc/hive
# }
# }
# }
}
instancePostCreateScripts: ["""#!/bin/sh
# This is an embedded instance post-creation script that runs as root and can be used to
# customize each cluster instance after the cluster has been created. This script will run
# on every cluster instance. These scripts run before postCreateScripts, which are at cluster level.
# Use exit code 0 to indicate success
# Use exit code 91 indicate an unretryable failure
# Cloudera Altus Director will automatically retry script execution for all other exit codes
# Instance post-creation scripts also have access to the following environment variables:
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CLUSTER_NAME
# CM_USERNAME
# CM_PASSWORD
echo 'Hello World!'
exit 0
""",
"""#!/usr/bin/python
# Additionally, multiple instance post-creation scripts can be supplied. They will run
# in the order they are listed here. Interpeters other than bash can be used
# as well.
print 'Hello again!'
"""]
# For more complex scripts, instance post-creation scripts can be supplied via local
# filesystem paths. They will run after any scripts supplied in the previous
# instancePostCreateScripts section.
# instancePostCreateScriptsPaths: ["/tmp/test-script.sh",
# "/tmp/test-script.py"]
postCreateScripts: ["""#!/bin/sh
# This is an embedded post-creation script that runs as root and can be used to
# customize the cluster after it has been created. This will run only once,
# at a cluster level, on an arbitrary cluster instance.
# Use exit code 0 to indicate success
# Use exit code 91 indicate an unretryable failure
# Cloudera Altus Director will automatically retry script execution for all other exit codes
# Post-creation scripts also have access to the following environment variables:
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CLUSTER_NAME
# CM_USERNAME
# CM_PASSWORD
echo 'Hello World!'
exit 0
""",
"""#!/usr/bin/python
# Additionally, multiple post-creation scripts can be supplied. They will run
# in the order they are listed here. Interpeters other than bash can be used
# as well.
print 'Hello again!'
"""]
# For more complex scripts, post-creation scripts can be supplied via local
# filesystem paths. They will run after any scripts supplied in the previous
# postCreateScripts section.
# postCreateScriptsPaths: ["/tmp/test-script.sh",
# "/tmp/test-script.py"]
preTerminateScripts: ["""#!/bin/sh
# This is an embedded pre-termination script that runs as root and can be used to
# customize the cluster after it has been created.
# If the exit code is not zero Cloudera Altus Director will fail
# Pre-terminate scripts also have access to the following environment variables:
# DEPLOYMENT_HOST_PORT
# ENVIRONMENT_NAME
# DEPLOYMENT_NAME
# CLUSTER_NAME
# CM_USERNAME
# CM_PASSWORD
echo 'Goodbye World!'
exit 0
""",
"""#!/usr/bin/python
# Additionally, multiple pre-terminate scripts can be supplied. They will run
# in the order they are listed here. Interpeters other than bash can be used
# as well.
print 'Goodbye again!'
"""]
# For more complex scripts, pre-terminate scripts can be supplied via local
# filesystem path. They will run after any scripts supplied in the previous
# preTerminateScripts section.
# preTerminateScriptsPaths: ["/tmp/test-script.sh",
# "/tmp/test-script.py"]
#
# Optional Administration Settings
# Administration settings configure Director's interaction with the cluster.
#
administrationSettings {
# If enabled, Director will attempt to automatically repair
# clusters whose instances have been terminated in the cloud provider.
# autoRepairEnabled: false
# autoRepairCooldownPeriodInSeconds: 1800
}
}