Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
6 contributors

Users who have contributed to this file

@handavid @bhavanki @meowfaceman @aarman-cloudera @ying1i @jadair-cloudera
955 lines (802 sloc) 25.5 KB
#
# Copyright (c) 2015-2019 Cloudera, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Sample AWS Cloudera Altus Director configuration file illustrating how to set up a highly available
# cluster based on the Cloudera AWS reference architecture:
# https://www.cloudera.com/content/dam/www/marketing/resources/solution-briefs/cloudera-enterprise-reference-architecture-for-aws-deployments.pdf.landing.html
#
# This configuration file is intended to demonstrate how to set up high availability clusters
# only. Please refer to the aws.reference.conf file and the Cloudera Altus Director documentation for
# further details on how to use Cloudera Altus Director.
#
#
# Cluster name
#
name: "C6-HAReference-AWS"
#
# Cloud provider configuration (credentials, region or zone, and more)
#
provider {
type: aws
#
# Get AWS credentials from the OS environment
# See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
#
# If specifying the access keys directly and not through variables, make sure to enclose
# them in double quotes.
#
# Leave the accessKeyId and secretAccessKey fields blank when running on an instance
# launched with an IAM role.
#
# accessKeyId: ${?AWS_ACCESS_KEY_ID}
# secretAccessKey: ${?AWS_SECRET_ACCESS_KEY}
#
# ID of the Amazon AWS region to use
# See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
#
region: region-REPLACE-ME
#
# Region endpoint (if you are using a region with custom endpoints, such as GovCloud)
#
# regionEndpoint: ec2.us-gov-west-1.amazonaws.com
#
# ID of the VPC subnet
# See: https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Subnets.html
#
subnetId: subnet-REPLACE-ME
#
# Comma separated list of security group IDs
# See: https://docs.aws.amazon.com/vpc/latest/userguide/VPC_SecurityGroups.html
#
securityGroupsIds: sg-REPLACE-ME
#
# A prefix that Cloudera Altus Director should use when naming the instances (this is not part
# of the hostname)
#
instanceNamePrefix: cloudera-director
#
# Specify a size for the root volume (in GB). Cloudera Altus Director will automatically expand
# the filesystem so that you can use all the available disk space for your application
#
# rootVolumeSizeGB: 100 # defaults to 50 GB if not specified
#
# Specify the type of the EBS volume used for the root partition. Defaults to gp2
# See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html
#
# rootVolumeType: gp2 # OR standard (for EBS magnetic)
#
# Whether to associate a public IP address with instances or not. If this is false, instances
# should be able to access the internet using a NAT instance if necessary.
# See: https://docs.aws.amazon.com/vpc/latest/userguide/vpc-ip-addressing.html
#
# associatePublicIpAddresses: true
}
#
# SSH credentials to use to connect to the instances
#
ssh {
username: ec2-user # for RHEL image
privateKey: privateKey-REPLACE-ME # with an absolute path to .pem file, ${HOME} may be used
}
#
# These instance properties will be applied to all instances.
#
common-instance-properties {
#
# Amazon Machine Image (AMI)
#
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html
#
# Certain AMI virtualization types are incompatible with certain instance types. HVM
# AMI types are recommended since they are compatible with most instance types.
#
# See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html
# Compatibility matrix: https://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
# Altus Director compatibility: https://www.cloudera.com/documentation/director/latest/topics/director_deployment_requirements.html
#
image: ami-REPLACE-ME
#
# Name of the IAM Role to use for this instance type
# See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html
#
# iamProfileName: iam-profile-REPLACE-ME
#
# Tags to apply to each instance
#
tags {
owner: ${?USER}
}
#
# Flag indicating whether to normalize the instance. Not setting normalization here implies that
# your bootstrap script will take care of normalization. This is an advanced configuration that
# requires assistance from Cloudera support.
#
# Defaults to true
#
# normalizeInstance: true
#
# Configuration allowing for granular control over the normalization steps of an instance.
# By default, all of these steps are on. This is an advanced configuration. None of these steps
# will run if normalizeInstance is set to false.
#
# Normalization includes:
# prewarming the parcel directory
# downloading, installing, and adjusting packages
# minimizing swappiness
# increasing the maximun number of open files
# resizing the root partition
# mounting ephemeral disks
#
# normalizationConfig {
# prewarmDirectory: true
# installPackages: true
# miscellaneousServiceAdjustment: true
# minimizeSwappiness: true
# increaseMaxNumberOfOpenFiles: true
# resizeRootPartition: true
# mountAllUnmountedDisks: true
# }
}
#
# A list of instance types to use for groups of nodes or management services. Instances
# specified here inherit from the common-instance-properties properties specified above.
#
instances {
m42x: ${common-instance-properties} {
type: m4.2xlarge
#
# The properties inherited from the provider section or common-instance-properties can
# be overridden for each instance template.
# These properties include:
#
# image: ami-REPLACE-ME
# securityGroupsIds: sg-REPLACE-ME
# subnetId: subnet-REPLACE-ME
# rackId: "/default"
}
m44x: ${common-instance-properties} {
type: m4.4xlarge
}
c34x: ${common-instance-properties} {
type: c3.4xlarge
}
c38x: ${common-instance-properties} {
type: c3.8xlarge
}
c44x: ${common-instance-properties} {
type: c4.4xlarge
}
i2x: ${common-instance-properties} {
type: i2.xlarge
}
i22x: ${common-instance-properties} {
type: i2.2xlarge
}
d2x: ${common-instance-properties} {
type: d2.xlarge
}
d22x: ${common-instance-properties} {
type: d2.2xlarge
}
d24x: ${common-instance-properties} {
type: d2.4xlarge
}
t2l: ${common-instance-properties} { # only suitable as a gateway
type: t2.large
}
}
#
# Optional external database server configuration.
#
# Cloudera Altus Director can provision RDS database servers on-demand or create databases
# on existing database servers. This reference configuration does the former.
#
# Services that use databases (e.g., Hive, Hue, and Oozie) require use of an
# external highly available database service in order for the services
# themselves to be highly available.
#
databaseServers {
#
# Provision RDS database server
#
rds-mysql-prod1 {
type: mysql
user: dbrootuser-REPLACEME
password: dbrootpassword-REPLACEME
instanceClass: dbinstanceClass-REPLACEME
dbSubnetGroupName: dbsubnet-REPLACE-ME
vpcSecurityGroupIds: sg-REPLACE-ME
allocatedStorage: 10
engineVersion: 5.7.24
multiAZ: true
tags {
owner: ${?USER}
}
}
#
# Use an existing MySQL server
#
# existingmysql1 {
# type: mysql
# host: REPLACE-ME # with IP address of database server
# port: 3306
# user: root
# password: rootpassword
# }
#
# Use an existing PostgresSQL server
#
# existingpostgres1 {
# type: postgresql
# host: REPLACE-ME # with IP address of database server
# port: 5432
# user: postgres
# password: rootpassword
# }
}
#
# Configuration for Cloudera Manager. Cloudera Altus Director can use an existing Cloudera
# Manager installation, or bootstrap everything from scratch for a new cluster.
#
cloudera-manager {
instance: ${instances.m42x} {
#
# The properties inherited from the provider section, common-instance-properties or
# instance template can be overriden for the Cloudera Manager instance.
# These properties include:
#
# image: ami-REPLACE-ME
# securityGroupsIds: sg-REPLACE-ME
# subnetId: subnet-REPLACE-ME
# rackId: "/default"
#
# Additional tags for the Cloudera Manager instance
#
tags {
application: "Cloudera Manager 6"
}
}
#
# Licensing configuration
#
# There are three mutually exclusive options for setting up Cloudera Manager's license.
# 1. License text may be embedded in this file using the "license" field. Triple quotes (""")
# are recommended for including multi-line text strings.
# 2. The "licensePath" can be used to specify the path to a file containing the license.
# 3. The "enableEnterpriseTrial" flag indicates whether the 60-Day Cloudera Enterprise Trial
# should be activated when no license is present. This must not be set to true if a
# license is included using either "license" or "licensePath".
#
# Embed a license for Cloudera Manager
#
# license: """
# -----BEGIN PGP SIGNED MESSAGE-----
# Hash: SHA1
#
# {
# "version" : 1,
# "name" : "License Owner",
# "uuid" : "license id",
# "expirationDate" : 0,
# "features" : [ "FEATURE1", "FEATURE2" ]
# }
# -----BEGIN PGP SIGNATURE-----
# Version: GnuPG v1.4.11 (GNU/Linux)
#
# PGP SIGNATURE
# -----END PGP SIGNATURE-----
# """
#
# Include a license for Cloudera Manager from an external file
#
# licensePath: "/path/to/license.txt.asc"
#
# Activate 60-Day Cloudera Enterprise Trial
#
enableEnterpriseTrial: true
#
# Specify the billing ID.
#
# Altus Director will use the billing ID to report usage information to a metering service for
# usage based billing.
#
# Usage reporting starts as soon as you assign a billing ID and a license to a Cloudera Manager.
# If you remove a billing ID, Director will stop reporting to the metering service.
#
# When usage reporting stops, you will not have access to Cloudera Support with this deployment.
# If you want a billing ID, please contact Cloudera. An enterprise license is required.
#
# billingId: billingId-REPLACE-ME
#
# Select the strategy for installing a JDK. Choices are:
#
# - AUTO (default): Altus Director installs the JDK on the Cloudera Manager instance, and
# Cloudera Manager installs the JDK on cluster instances
# - DIRECTOR_MANAGED: Altus Director installs the JDK on all instances
# - NONE: Neither Altus Director nor Cloudera Manager installs the JDK on any instances
#
# javaInstallationStrategy: AUTO
#
# Install the unlimited strength JCE policy files for higher levels of encryption.
# Prior to setting this to true, confirm that you understand the legal ramifications
# of using unlimited JCE policy files in your country.
#
# unlimitedJce: true
#
# Automatic TLS
#
#
# Set up TLS connections automatically between Cloudera Altus Director and Cloudera Manager,
# as well as among the cluster services. Automatic TLS includes installation of
# unlimited strength JCE policy files (see unlimitedJce).
#
# tlsEnabled: true
#
# Pass TLS configuration properties to Cloudera Manager to refine how automatic TLS
# is configured. All TLS configuration properties are optional and have sane defaults.
# Additional properties are available beyond those listed here.
#
# tlsConfigurationProperties {
# subject_suffix: "O=example.com,L=Cityville,ST=CA,C=US"
# ca_sig_hash_algo: "SHA512"
# email_address: "name@example.com"
# }
#
# Optional database configuration
#
# There are three mutually exclusive options for database usage in Cloudera Altus Director.
# 1. With no configuration, databases in the Cloudera Manager embedded PostgreSQL database server
# will be used.
# 2. Alternatively, existing external databases can be used.
# 3. Finally, databases can be created on the fly on existing external database servers.
#
# Note that using an external database here necessitates using an external database
# for the cluster services. This reference configuration is using Option 3.
#
#
# (Option 2) Optional configuration for existing external databases
#
# databases {
# CLOUDERA_MANAGER {
# type: postgresql
#
# host: db.example.com
# port: 123
#
# user: admin
# password: 1231ed
#
# name: scm
# }
#
# ACTIVITYMONITOR { ... }
#
# REPORTSMANAGER { ... }
#
# NAVIGATOR { ... }
#
# NAVIGATORMETASERVER { ... }
# }
#
# (Option 3) Optional configuration for creating external databases on the fly
#
# When a database is created on the fly, Director generates a random database name using the
# specified database name prefix, a random username based on the specified username prefix, and a
# random password. The password is stored by Director and made available to the service that uses the database. If
# multiple services reference the same external database server, Director will create a database for each.
#
# MySQL limits usernames to sixteen characters. Therefore, limit usernamePrefix values for databases on MySQL to
# seven characters; the remaining nine characters are used by the randomized suffix generated by Director.
#
# Note that the databaseServerName must correspond to an external database server named above, which
# can be the provisioned RDS instance or another existing database server.
#
databaseTemplates {
CLOUDERA_MANAGER {
name: scmt
databaseServerName: rds-mysql-prod1
databaseNamePrefix: scm
usernamePrefix: scmu
}
ACTIVITYMONITOR {
name: amont
databaseServerName: rds-mysql-prod1
databaseNamePrefix: amon
usernamePrefix: amonu
}
REPORTSMANAGER {
name: rmant
databaseServerName: rds-mysql-prod1
databaseNamePrefix: rman
usernamePrefix: rmanu
}
NAVIGATOR {
name: navt
databaseServerName: rds-mysql-prod1
databaseNamePrefix: nav
usernamePrefix: navu
}
NAVIGATORMETASERVER {
name: navmt
databaseServerName: rds-mysql-prod1
databaseNamePrefix: navms
usernamePrefix: navmu
}
}
#
# Configuration to override Cloudera Manager package repositories. These are
# optional, and default to the Cloudera Enterprise release corresponding to
# the Altus Director version.
#
# NOTE: Cloudera Manager version 5.5 onward is required for setting up highly available
# clusters with Cloudera Altus Director.
#
# repository: "https://archive.cloudera.com/cm6/6.2/redhat7/yum/"
# repositoryKeyUrl: "https://archive.cloudera.com/cm6/6.2/redhat7/yum/RPM-GPG-KEY-cloudera"
#
# Optional custom service descriptors for external parcels
#
# csds: [
# "https://archive.cloudera.com/exampleProduct/csd/example-csd.jar"
# ]
}
#
# Highly Available Cluster description
#
cluster {
# The table of products and their versions that need to be installed. Each
# product must have a corresponding parcel in the parcelRepositories
# configured in this section. The specified version for a product will be
# used to find a suitable parcel. Specifying a version that is satisfied by
# more than one parcel among those available will result in a configuration
# error. Specify more granular versions to avoid conflicts.
products {
CDH: 6
# EXAMPLEPRODUCT: 1
}
#
# Optional override of CDH parcel repositories
#
# This defaults to the Cloudera Enterprise release corresponding to the Altus
# Director version.
#
# parcelRepositories: ["https://archive.cloudera.com/cdh6/6.2/parcels/"]
#
# Services to include in the cluster
#
services: [
HDFS,
YARN,
ZOOKEEPER,
HBASE,
HIVE,
HUE,
OOZIE,
SPARK_ON_YARN,
KAFKA,
SOLR,
FLUME,
IMPALA,
SQOOP_CLIENT,
KS_INDEXER,
KUDU
]
#
# Optional custom service configurations
#
# Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double
# quotes.
#
# Configuration properties for CDH roles and services are documented at
# https://www.cloudera.com/documentation/enterprise/6/properties/6.1/topics/cm_props_cdh610.html
#
configs {
# HDFS fencing should be set to true for HA configurations
HDFS {
dfs_ha_fencing_methods: "shell(true)"
}
# OOZIE requires a load balancer specifically for high availability.
# Director does not create or manage the load balancer.
#
# The load balancer must be configured with the IPs of the oozie servers
# after the cluster completes bootstrapping.
OOZIE {
oozie_load_balancer: "example.com"
oozie_load_balancer_http_port: 5002
#oozie_load_balancer_https_port: 5000
}
}
#
# Database configuration for Highly Available Cluster Services
#
# As mentioned in the cloudera-manager section, the three mutually exclusive options for database
# usage are (1) No configuration which uses databases in the Cloudera Manager embedded PostgreSQL
# database server; (2) Use existing external databases; or (3) Create databases on an existing
# external database server.
#
# High availibility configuration requires external databases to be defined for the
# Hive Metastore, Hue, and Oozie services. These databases may be configured using either option
# 2 or 3.
#
#
# (Option 2) Configuration for existing external databases for services
#
# databases {
# HIVE {
# type: postgresql
# host: db.example.com
# port: 123
# user: hive
# password: pass
# name: hive_db
# },
#
# HUE {
# type: postgresql
# host: db.example.com
# port: 123
# user: hue
# password: pass
# name: hue_db
# },
#
# OOZIE {
# type: postgresql
# host: db.example.com
# port: 123
# user: oozie
# password: pass
# name: oozie_db
# }
# }
#
# (Option 3) Configuration for creating external databases on the fly for services
#
databaseTemplates: {
HIVE {
name: hivet
databaseServerName: rds-mysql-prod1
databaseNamePrefix: hive
usernamePrefix: hiveu
},
HUE {
name: huet
databaseServerName: rds-mysql-prod1
databaseNamePrefix: hue
usernamePrefix: hueu
},
OOZIE {
name: ooziet
databaseServerName: rds-mysql-prod1
databaseNamePrefix: oozie
usernamePrefix: oozieu
}
}
#
# Instance group configurations
#
#
# This reference configuration file divides the master roles into four separate virtual
# instance groups, "hdfsmasters-1", "hdfsmasters-2", "masters-1", and "masters-2". The split
# between the "hdfsmasters" and "masters" groups is due to a difference in the recovery methods
# for the different services.
#
# HDFS master roles are placed separately from non-HDFS master roles in this sample cluster
# because recovery from NameNode instance failures is performed through Cloudera Manager's
# MigrateRoles command.
# See: https://www.cloudera.com/documentation/enterprise/latest/topics/admin_nn_migrate_roles.html
#
hdfsmasters-1 {
count: 2
instance: ${instances.i22x} {
#
# The properties inherited from the provider section or common-instance-properties or
# instance template can be overridden for each instance group.
# These properties include:
#
# image: ami-REPLACE-ME
# securityGroupsIds: sg-REPLACE-ME
# subnetId: subnet-REPLACE-ME
# rackId: "/default"
tags {
group: hdfsmasters-1
}
}
roles {
HDFS: [NAMENODE, FAILOVERCONTROLLER, JOURNALNODE]
}
# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability
configs {
HDFS {
NAMENODE {
dfs_federation_namenode_nameservice: hanameservice
autofailover_enabled: true
dfs_namenode_quorum_journal_name: hanameservice
}
}
}
}
hdfsmasters-2 {
count: 1
instance: ${instances.i22x} {
tags {
group: hdfsmasters-2
}
}
roles {
HDFS: [JOURNALNODE, HTTPFS] # HTTPFS role needed for HUE
}
}
masters-1 {
count: 2
instance: ${instances.i22x} {
tags {
group: masters-1
}
}
# HIVESERVER2 roles need a SPARK role (such as gateway) on the same
# instance to pick up Spark configurations
# Likewise, SPARK_ON_YARN roles need a HIVE gateway to read Hive tables
roles {
# ZooKeeper uses majority quorum for r/w, configure odd number of servers.
ZOOKEEPER: [SERVER]
HIVE: [HIVESERVER2, HIVEMETASTORE, GATEWAY]
YARN: [RESOURCEMANAGER]
SPARK_ON_YARN: [GATEWAY]
HBASE: [MASTER]
HUE: [HUE_SERVER]
OOZIE: [OOZIE_SERVER]
# Kudu master uses majority quorum for r/w, configure odd number of servers.
KUDU: [KUDU_MASTER]
}
#
# Custom role configurations
#
# Configuration keys containing special characters (e.g., '.', ':') must be enclosed in double
# quotes.
#
configs {
# Oozie plugins must be configured for high availability
OOZIE {
OOZIE_SERVER {
oozie_plugins_list: "org.apache.oozie.service.ZKLocksService,org.apache.oozie.service.ZKXLogStreamingService,org.apache.oozie.service.ZKJobsConcurrencyService,org.apache.oozie.service.ZKUUIDService"
}
}
# HIVESERVER2 requires a load balancer specifically for high availability.
# Director does not create or manage the load balancer.
#
# The load balancer must be configured with the IPs of the Hive servers
# after the cluster completes bootstrapping.
HIVE {
HIVESERVER2 {
hiveserver2_load_balancer: "example.com:11000"
}
}
KUDU {
KUDU_MASTER {
# The master rarely performs IO. If fs_data_dirs is unset, it will
# use the same directory as fs_wal_dir
fs_wal_dir: "/data0/kudu/masterwal"
fs_data_dirs: "/data1/kudu/master"
}
}
}
}
masters-2 {
count: 1
instance: ${instances.i22x} {
tags {
group: master
}
}
roles {
# ZooKeeper uses majority quorum for r/w, configure odd number of servers.
ZOOKEEPER: [SERVER]
YARN: [JOBHISTORY]
HBASE: [HBASETHRIFTSERVER] # HBASETHRIFTSERVER role needed for HUE
HIVE: [GATEWAY]
HUE: [HUE_LOAD_BALANCER]
SPARK_ON_YARN: [SPARK_YARN_HISTORY_SERVER]
KAFKA: [KAFKA_BROKER, KAFKA_MIRROR_MAKER]
IMPALA: [CATALOGSERVER, STATESTORE]
KS_INDEXER: [HBASE_INDEXER]
# Kudu master uses majority quorum for r/w, configure odd number of servers.
KUDU: [KUDU_MASTER]
}
configs {
KAFKA {
# KAFKA_BROKER {
# broker_max_heap_size: 512
# "log.dirs": /data0/kafka/data
# }
KAFKA_MIRROR_MAKER {
# Without resolvable URLs, this role will fail to start
"source.bootstrap.servers": "sourcebroker1:9092"
"bootstrap.servers": "destinationbroker1:9092"
# Also required for role to start
whitelist: mytopic1
}
}
KUDU {
KUDU_MASTER {
# The master rarely performs IO. If fs_data_dirs is unset, it will
# use the same directory as fs_wal_dir
fs_wal_dir: "/data0/kudu/masterwal"
fs_data_dirs: "/data1/kudu/master"
}
}
}
}
workers {
count: 9
#
# Minimum number of instances required for this instance group.
# Altus Director will fail bootstrap of a cluster if minCount number of instances are not
# available in this cloud environment.
# If minCount is not specified then minCount is set to count.
#
minCount: 5
instance: ${instances.d22x} {
#
# Put all cluster nodes in a placement group for improved network performance
# Note: this only works for a limited set of instances
# See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html
#
# placementGroup: REPLACE-ME
tags {
group: worker
}
}
roles {
HDFS: [DATANODE]
YARN: [NODEMANAGER]
HBASE: [REGIONSERVER]
SOLR: [SOLR_SERVER]
IMPALA: [IMPALAD]
KUDU: [KUDU_TSERVER]
}
configs {
KUDU {
KUDU_TSERVER {
# Set fs_wal_dir to an SSD drive (if exists) for better performance.
# Set fs_data_dirs to a comma-separated string containing all remaining
# disk drives, solid state or otherwise.
# If there are multiple drives in the machine, it's best to ensure that
# the WAL directory is not located on the same disk as a tserver data
# directory.
fs_wal_dir: "/data0/kudu/tabletwal"
fs_data_dirs: "/data1/kudu/tablet"
}
}
}
}
gateways {
count: 1
instance: ${instances.t2l} {
tags {
group: gateway
}
}
roles {
HBASE: [GATEWAY]
HIVE: [GATEWAY]
KAFKA: [GATEWAY]
SOLR: [GATEWAY]
FLUME: [AGENT]
SQOOP_CLIENT: [GATEWAY]
}
}
}
You can’t perform that action at this time.