Permalink
Fetching contributors…
Cannot retrieve contributors at this time
404 lines (322 sloc) 11.7 KB
#
# Copyright (c) 2017 Cloudera, Inc. All rights reserved.
#
#
# A reference configuration for deploying a cluster that utilizes Cloudera's
# Shared Data Experience (SDX). Please refer to
# https://www.cloudera.com/products/sdx.html for more detailed information.
#
#
# Cluster name
#
name: "SDX-AWS Cluster 1"
#
# The environment and deployment name will be generated by default based on the cluster
# name. Another cluster can be bootstrapped under the same deployment by specifying the
# existing environment and deployment name along with a new cluster name. Note that when
# terminating with a configuration file, the deployment is only terminated if there are
# no other clusters under the deployment. Also, the external databases are only terminated
# if there are no other components (deployments or clusters) using the external database.
#
# environmentName: "SDX-AWS Environment"
# deploymentName: "SDX-AWS Deployment"
#
# Cloud provider configuration (credentials, region or zone and optional default image)
#
provider {
type: aws
image: ami-HVM-REPLACE-ME
#
# Get AWS credentials from the OS environment
# See http://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
#
# If specifying the access keys directly and not through variables, make sure to enclose
# them in double quotes.
#
# Leave the accessKeyId and secretAccessKey fields blank when running on an instance
# launched with an IAM role.
# accessKeyId: ${?AWS_ACCESS_KEY_ID}
# secretAccessKey: ${?AWS_SECRET_ACCESS_KEY}
#
# ID of the Amazon AWS region to use
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
#
region: region-REPLACE-ME
#
# Region endpoint (if you are using one of the Gov. regions)
#
# regionEndpoint: ec2.us-gov-west-1.amazonaws.com
#
# ID of the VPC subnet
# See: http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_Subnets.html
#
subnetId: subnet-REPLACE-ME
#
# Comma separated list of security group IDs
# See: http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_SecurityGroups.html
#
securityGroupsIds: sg-REPLACE-ME
#
# A prefix that Launchpad should use when naming the instances (this is not part of the hostname)
#
instanceNamePrefix: cloudera-director
#
# Specify a size for the root volume (in GBs). Launchpad will automatically expand the
# filesystem so that you can use all the available disk space for your application
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/storage_expand_partition.html
#
# rootVolumeSizeGB: 100 # defaults to 50 GB if not specified
#
# Specify the type of the EBS volume used for the root partition. Defaults to gp2
# See: http://aws.amazon.com/ebs/details/
#
# rootVolumeType: gp2 # OR standard (for EBS magnetic)
#
# Whether to associate a public IP address with instances or not. If this is false
# we expect instances to be able to access the internet using a NAT instance
#
# Currently the only way to get optimal S3 data transfer performance is to assign
# public IP addresses to your instances and not use NAT (public subnet type of setup)
#
# See: http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/vpc-ip-addressing.html
#
associatePublicIpAddresses: true
}
#
# SSH credentials to use to connect to the instances
#
ssh {
username: ec2-user # for RHEL image
privateKey: privateKey-REPLACE-ME # with an absolute path to .pem file
}
#
# A list of instance types to use for group of nodes or management services
#
instances {
m42x {
type: m4.2xlarge # requires an HVM AMI
#
# Amazon Machine Image (AMI)
#
# See: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html
# Compatibility matrix: https://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
#
# Red Hat Enterprise Linux AMI IDs: http://aws.amazon.com/partners/redhat/
# We support Red Hat Enterprise Linux 6.4 (64bit) 64bit PV or HVM
#
image: ami-HVM-REPLACE-ME
#
# Name of the IAM Role to use for this instance type
# http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html
#
# iamProfileName: iam-profile-REPLACE-ME
tags {
owner: ${?USER}
}
bootstrapScripts: [ """#!/bin/sh
# This is an embedded bootstrap script that runs as root and can be used to customize
# the instances immediately after boot and before any other Cloudera Altus Director action
# Use exit code 0 to indicate success
# Use exit code 91 indicate an unretryable failure
# Cloudera Altus Director will automatically retry script execution for all other exit codes
echo 'Hello World!'
exit 0
""" ]
#
# Flag indicating whether to normalize the instance. Not setting normalization here implies that your
# bootstrap script will take care of normalization. This is an advanced configuration that will require
# assistance from Cloudera support.
#
# Normalization includes:
# downloading and installing packages
# minimizing swappiness
# increasing the maximun number of open files
# mounting ephemeral disks
# resizing the root partition.
#
# Defaults to true
#
# normalizeInstance: true
}
d2x {
type: d2.xlarge
image: ami-HVM-REPLACE-ME
tags {
owner: ${?USER}
}
}
}
#
# Configuration for Cloudera Manager. Launchpad can use an existing instance
# or bootstrap everything from scratch for a new cluster
#
cloudera-manager {
instance: ${instances.m42x} {
tags {
application: "Cloudera Manager 6"
}
}
#
# Install the unlimited strength JCE policy files for higher levels of encryption.
# Prior to setting this to true, confirm that you understand the legal ramifications
# of using unlimited JCE policy files in your country.
#
# unlimitedJce: true
#
# Kerberos Credentials
#
#
# An administrative Kerberos account capable of creating principals on the KDC that
# Cloudera Manager will be using. This will typically be in the format:
# Principal@YOUR.KDC.REALM
krbAdminUsername: "REPLACE-ME"
# The password for the administrative Kerberos account.
krbAdminPassword: "REPLACE-ME"
# For Cloudera's SDX, Cloudera Manager 5.13+ is required.
repository: "https://archive.cloudera.com/cm6/6.0/redhat7/yum/"
# To share data across clusters, an object store like AWS S3 must be configured.
# An external account must be configured using either IAM roles (recommended) or
# by using AWS access and secret keys.
externalAccounts {
# By using IAM roles, you'll be able to avoid supplying the access key
# and secret key. This is more secure, as the keys do not need to be
# rotated regularly.
SDXIamAccount {
type: AWS_IAM_ROLES_AUTH
}
# If for some reason IAM roles cannot be used, an external account can be
# defined using regular access and secret keys.
SDXUnsecureAccount {
type: AWS_ACCESS_KEY_AUTH
configs {
aws_access_key: "AWS-ACCESS-KEY-REPLACE-ME"
aws_secret_key: "AWS-SECRET-KEY-REPLACE-ME"
}
}
}
configs {
CLOUDERA_MANAGER {
enable_api_debug: true
custom_banner_html: "Managed by Cloudera Altus Director"
#
# Kerberos Configurations
#
# The type of KDC Cloudera Manager will be using. Valid values are "MIT KDC"
# and "Active Directory"
KDC_TYPE: "Active Directory"
# The KDC host name or IP address.
KDC_HOST: "REPLACE-ME"
# The security realm that your KDC uses. This will be of the format of a fully
# qualified domain name:
# YOUR.KDC.REALM
SECURITY_REALM: "REPLACE-ME"
# The Active Directory KDC domain. Only applicable to Active Directory KDCs. This
# will be in the format of an X.500 Directory Specification:
# DC=domain,DC=example,DC=com
AD_KDC_DOMAIN: "REPLACE-ME"
# Allow Cloudera Manager to deploy Kerberos configurations to hosts. This should
# be set to true unless you have an alternate mechanism to generate or retrieve the
# Kerberos configuration on your Cloudera Manager node instances.
KRB_MANAGE_KRB5_CONF: true
# The encryption types your KDC supports. Some of those listed below will require the
# unlimited strength JCE policy files.
KRB_ENC_TYPES: "aes256-cts aes128-cts des3-hmac-sha1 arcfour-hmac des-hmac-sha1 des-cbc-md5 des-cbc-crc"
# There are many more optional Kerberos configuration options available to Cloudera Manager.
# Please refer to the Kerberos section on
# http://www.cloudera.com/documentation/enterprise/properties/5-13-x/topics/cm_props_cmserver.html
# for more details.
}
}
}
#
# Cluster description
#
cluster {
# For Cloudera's SDX, CDH 5.13+ is required.
parcelRepositories: ["https://archive.cloudera.com/cdh6/6.0/parcels/"]
products {
CDH: 6.0
}
# To take full advantage of SDX, we recommend installing
# the following services at minimum:
# * AWS_S3
# * HDFS
# * HIVE
# * HUE
# * IMPALA
# * OOZIE
# * SENTRY
# * YARN
# * ZOOKEEPER
services: [AWS_S3, HDFS, HIVE, HUE, IMPALA, OOZIE, SENTRY, YARN, ZOOKEEPER]
# External databases for the HIVE and SENTRY services must be set up for SDX
# prior to setting up a cluster. For best results, host the databases on RDS.
databases {
HIVE {
type: hive-db-type-REPLACE-ME
host: hive-db-host-REPLACE-ME
port: hive-db-port-REPLACE-ME
user: hive-db-username-REPLACE-ME
password: hive-db-password-REPLACE-ME
name: hive-db-name-REPLACE-ME
}
SENTRY {
type: sentry-db-type-REPLACE-ME
host: sentry-db-host-REPLACE-ME
port: sentry-db-port-REPLACE-ME
user: sentry-db-username-REPLACE-ME
password: sentry-db-password-REPLACE-ME
name: sentry-db-password-REPLACE-ME
}
}
# The AWS_S3 connector must be configured to point to a configured external
# account.
configs {
AWS_S3 {
cloud_account: SDXIamAccount
# If using AWS_ACCESS_KEY_AUTH type of external account, in order for
# HIVE to work properly, you'll need to set the key_distribution_policy
# to UNSECURE.
# key_distribution_policy: UNSECURE
}
}
masters {
count: 1
instance: ${instances.m42x} {
tags {
group: masters
}
}
roles {
HDFS: [NAMENODE]
HIVE: [HIVEMETASTORE, HIVESERVER2]
HUE: [HUE_SERVER]
IMPALA: [CATALOGSERVER]
OOZIE: [OOZIE_SERVER]
SENTRY: [SENTRY_SERVER]
YARN: [RESOURCEMANAGER]
ZOOKEEPER: [SERVER]
}
configs {
HIVE {
HIVEMETASTORE {
# This is necessary for HIVEMETASTORE to propagate metadata changes to SENTRY
hive_enable_db_notification: true
}
}
}
}
workers {
count: 3
instance: ${instances.m42x} {
tags {
group: workers
}
}
roles {
HDFS: [DATANODE]
IMPALA: [IMPALAD]
YARN: [NODEMANAGER]
}
}
}