Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
402 lines (333 sloc) 11 KB
#
# Copyright (c) 2017-2019 Cloudera, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Sample AWS Cloudera Altus Director configuration file based on the Cloudera AWS reference architecture:
# https://www.cloudera.com/content/dam/www/marketing/resources/solution-briefs/cloudera-enterprise-reference-architecture-for-aws-deployments.pdf.landing.html
#
# This is a template for a minimal deployment of CDSW. Refer to Cloudera Altus Director's documentation
# and aws.reference.conf for more information about deploying clusters with Cloudera Altus Director.
#
#
# Cluster name
#
name: cdsw
provider {
type: aws
#
# Get AWS credentials from the OS environment
# See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
#
# If specifying the access keys directly and not through variables, make sure to enclose
# them in double quotes.
#
# Leave the accessKeyId and secretAccessKey fields blank when running Altus Director on an
# instance launched with an IAM role.
#
# accessKeyId: ${?AWS_ACCESS_KEY_ID}
# secretAccessKey: ${?AWS_SECRET_ACCESS_KEY}
#
# ID of the Amazon AWS region to use
# See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html
#
region: region-REPLACE-ME
#
# ID of the VPC subnet
# See: https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Subnets.html
#
subnetId: subnet-REPLACE-ME
#
# Comma separated list of security group IDs
# See: https://docs.aws.amazon.com/vpc/latest/userguide/VPC_SecurityGroups.html
#
securityGroupsIds: sg-REPLACE-ME
#
# A prefix that Cloudera Altus Director should use when naming the instances (this is not part
# of the hostname)
#
instanceNamePrefix: cloudera-director
}
#
# SSH credentials to use to connect to the instances
#
ssh {
username: ec2-user # for RHEL image
privateKey: privateKey-REPLACE-ME # with an absolute path to .pem file, ${HOME} may be used
}
#
# These instance properties will be applied to all instances.
#
common-instance-properties {
#
# Amazon Machine Image (AMI)
#
# Certain AMI virtualization types are incompatible with certain instance types. HVM
# AMI types are recommended since they are compatible with most instance types.
#
# See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html
# Compatibility matrix: https://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
# Altus Director compatibility: https://www.cloudera.com/documentation/director/latest/topics/director_deployment_requirements.html
#
image: ami-REPLACE-ME
tags {
owner: ${?USER}
}
#
# Configuration allowing for granular control over the normalization steps of an instance.
# By default, all of these steps are on. This is an advanced configuration. None of these steps
# will run if normalizeInstance is set to false.
#
# Normalization includes:
# prewarming the parcel directory
# downloading, installing, and adjusting packages
# minimizing swappiness
# increasing the maximun number of open files
# resizing the root partition
# mounting ephemeral disks
#
# See aws.reference.conf for the complete list of valid property keys.
#
# Turning off mountAllUnmountedDisks is necessary for CDSW to work, as CDSW handles
# this step on its own.
#
normalizationConfig {
mountAllUnmountedDisks: false
}
#
# Bootstrap Scripts
#
# This configuration opts to install an Oracle JDK via bootstrap script.
#
bootstrapScripts: [ """#!/bin/sh
# Remove any natively installed JDKs
yum remove --assumeyes *openjdk*
# Install JDK 1.8 required by Spark 2
rpm -ivh "https://archive.cloudera.com/director6/6.0.0/redhat7/RPMS/x86_64/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm"
exit 0
""" ]
}
#
# A list of instance types to use for groups of nodes or management services. Instances
# specified here inherit from the common-instance-properties properties specified above.
#
instances {
m4x: ${common-instance-properties} {
type: m4.xlarge
}
m44x: ${common-instance-properties} {
type: m4.4xlarge
}
}
#
# Configuration for Cloudera Manager. Cloudera Altus Director can use an existing Cloudera
# Manager installation, or bootstrap everything from scratch for a new cluster.
#
cloudera-manager {
instance: ${instances.m4x} {
#
# Additional tags for the Cloudera Manager instance
#
tags {
application: "Cloudera Manager 5"
}
}
#
# Select the strategy for installing a JDK. Choices are:
#
# - AUTO (default): Altus Director installs the JDK on the Cloudera Manager instance, and
# Cloudera Manager installs the JDK on cluster instances
# - DIRECTOR_MANAGED: Altus Director installs the JDK on all instances
# - NONE: Neither Altus Director nor Cloudera Manager installs the JDK on any instances
#
# JDK 1.8 is installed with a bootstrap script, so the NONE strategy is selected
#
javaInstallationStrategy: NONE
#
# Configuration to override Cloudera Manager package repositories. These are
# optional, and default to the Cloudera Enterprise release corresponding to
# the Altus Director version.
#
# Support for CDSW requires Cloudera Manager 5.13.1+
#
repository: "https://archive.cloudera.com/cm5/redhat/7/x86_64/cm/5.15/"
repositoryKeyUrl: "https://archive.cloudera.com/cm5/redhat/7/x86_64/cm/RPM-GPG-KEY-cloudera"
#
# Optional configuration for Cloudera Manager and its management services
#
configs {
#
# CLOUDERA_MANAGER corresponds to the Cloudera Manager Server configuration options
#
CLOUDERA_MANAGER {
# enable_api_debug: true
custom_banner_html: "Managed by Cloudera Altus Director"
}
}
#
# Optional custom service descriptors for external parcels
#
# Custom service descriptors for CDSW and Spark2
#
csds: [
"https://archive.cloudera.com/cdsw1/1.4.2/csd/CLOUDERA_DATA_SCIENCE_WORKBENCH-CDH5-1.4.2.jar",
"https://archive.cloudera.com/spark2/csd/SPARK2_ON_YARN-2.3.0.cloudera4.jar"
]
}
#
# Cluster description
#
cluster {
# The table of products and their versions that need to be installed. Each
# product must have a corresponding parcel in the parcelRepositories
# configured in this section. The specified version for a product will be
# used to find a suitable parcel. Specifying a version that is satisfied by
# more than one parcel among those available will result in a configuration
# error. Specify more granular versions to avoid conflicts.
products {
CDH: 5.15
CDSW: 1.4.2
SPARK2: 2
}
#
# Optional override of CDH parcel repositories
#
# This defaults to the Cloudera Enterprise release corresponding to the
# Altus Director version.
#
parcelRepositories: ["https://archive.cloudera.com/cdh5/parcels/5.15/",
"https://archive.cloudera.com/cdsw1/1.4.2/parcels/",
"https://archive.cloudera.com/spark2/parcels/2.3.0.cloudera4/"]
#
# Services to include in the cluster
#
services: [
HDFS,
YARN,
ZOOKEEPER,
CDSW,
SPARK2_ON_YARN
]
#
# Optional custom service configurations
#
configs {
CDSW {
"cdsw.domain.config": cdsw.my-domain.com # The fully qualified domain name for the CDSW host
}
}
#
# Instance group configurations
#
masters {
count: 1
instance: ${instances.m4x} {
tags: {
application: "CDH 5.15"
group: masters
}
}
roles {
HDFS: [NAMENODE, SECONDARYNAMENODE]
YARN: [RESOURCEMANAGER, JOBHISTORY]
SPARK2_ON_YARN: [SPARK2_YARN_HISTORY_SERVER]
}
}
workers {
count: 3
minCount: 3
instance: ${instances.m4x} {
tags: {
application: "CDH 5.15"
group: workers
}
}
roles {
HDFS: [DATANODE]
YARN: [NODEMANAGER]
ZOOKEEPER: [SERVER]
}
}
cdswmasters {
count: 1
instance: ${instances.m44x} {
tags: {
application: "CDH 5.15 + CDSW 1.4.2"
group: cdswmasters
}
rootVolumeSizeGB: 100
ebsVolumeCount: 3
ebsVolumeSizeGiB: 1000
ebsVolumeType: gp2
bootstrapScripts: [ """#!/bin/sh
# We remove any natively installed JDKs, as both Cloudera Manager and Cloudera Altus Director only support Oracle JDKs
yum remove --assumeyes *openjdk*
# Install JDK 1.8 required by Spark 2
rpm -ivh "https://archive.cloudera.com/director6/6.0.0/redhat7/RPMS/x86_64/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm"
# Mount one volume for application data
device="/dev/xvdh"
mount="/var/lib/cdsw"
echo "Making file system"
mkfs.ext4 -F -E lazy_itable_init=1 "$device" -m 0
echo "Mounting $device on $mount"
if [ ! -e "$mount" ]; then
mkdir -p "$mount"
fi
mount -o defaults,noatime "$device" "$mount"
echo "$device $mount ext4 defaults,noatime 0 0" >> /etc/fstab
exit 0
""" ]
}
roles {
HDFS: [GATEWAY]
YARN: [GATEWAY]
CDSW: [CDSW_MASTER, CDSW_APPLICATION, CDSW_DOCKER]
SPARK2_ON_YARN: [GATEWAY]
}
configs {
CDSW {
CDSW_DOCKER {
"cdsw.docker.devices.config": "/dev/xvdf /dev/xvdg" # related to the ebs configuration above
}
}
}
}
cdswworkers {
count: 2
minCount: 2
instance: ${instances.m44x} {
tags: {
application: "CDH 5.15 + CDSW 1.4.2"
group: cdswworkers
}
rootVolumeSizeGB: 100
ebsVolumeCount: 2
ebsVolumeSizeGiB: 500
ebsVolumeType: gp2
}
roles {
HDFS: [GATEWAY]
YARN: [GATEWAY]
CDSW: [CDSW_WORKER, CDSW_DOCKER]
SPARK2_ON_YARN: [GATEWAY]
}
configs {
CDSW {
CDSW_DOCKER {
"cdsw.docker.devices.config": "/dev/xvdf /dev/xvdg" # related to the ebs configuration above
}
}
}
}
}
You can’t perform that action at this time.