Skip to content
Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
719 lines (628 sloc) 19.8 KB
#
# Copyright (c) 2018 Cloudera, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Sample Cloudera Altus Director configuration file based on the Cloudera Azure reference architecture:
# http://www.cloudera.com/documentation/other/reference-architecture/PDF/cloudera_ref_arch_azure.pdf
#
# This is a template for a minimal deployment of CDSW. Refer to Cloudera Altus Director's documentation
# and azure.reference.conf for more information about deploying clusters with Cloudera Altus Director.
#
name: cdsw
provider {
type: azure
#
# ID of Azure region to use. NOTE: region must support Premium Storage
# See: https://azure.microsoft.com/en-us/regions/#services
#
region: "region_REPLACE_ME"
#
# Azure Cloud Environment to use. Valid values are:
# - azure
# - azure-us-government
# - azure-germany
#
azureCloudEnvironment: "azureCloudEnvironment_REPLACE_ME"
#
# Azure Management URL. (Now deprecated, use azureCloudEnvironment instead.
#
# mgmtUrl: "https://management.core.windows.net/"
#
#
# Azure Active Directory Subscription ID.
#
subscriptionId: "subscriptionId_REPLACE_ME"
#
# Tenant ID (from AAD)
#
tenantId: "tenantId_REPLACE_ME"
#
# Azure Active Directory Application Client ID.
#
clientId: "clientId_REPLACE_ME"
#
# Client Secret
#
clientSecret: "clientSecret_REPLACE_ME"
}
#
# SSH credentials to use to connect to the machines
#
ssh {
username: azuser
privateKey: """-----BEGIN RSA PRIVATE KEY-----
privateKey_REPLACE_ME
-----END RSA PRIVATE KEY-----"""
}
#
# Common variable definitions
#
instances {
base {
type: STANDARD_DS12_V2
image: cloudera-centos-74-latest
networkSecurityGroupResourceGroup: "networkSecurityGroupResourceGroup_REPLACE_ME"
networkSecurityGroup: "networkSecurityGroup_REPLACE_ME"
virtualNetworkResourceGroup: "virtualNetworkResourceGroup_REPLACE_ME"
virtualNetwork: "virtualNetwork_REPLACE_ME"
subnetName: "subnetName_REPLACE_ME"
publicIP: No
hostFqdnSuffix: "cdh-cluster.internal"
storageAccountType: Premium_LRS
managedDisks: Yes
tags {
owner: ${?USER}
}
bootstrapScripts: [ ${bootstrap-script.os-generic},
${bootstrap-script.forcejdk8}]
}
master: ${instances.base} {
computeResourceGroup: "master_computeResourceGroup_REPLACE_ME"
availabilitySet: "master_availabilitySet_REPLACE_ME"
instanceNamePrefix: "master_instanceNamePrefix_REPLACE_ME"
dataDiskCount: 3
dataDiskSize: 512
}
worker: ${instances.base} {
computeResourceGroup: "worker_computeResourceGroup_REPLACE_ME"
availabilitySet: "worker_availabilitySet_REPLACE_ME"
instanceNamePrefix: "worker_instanceNamePrefix_REPLACE_ME"
dataDiskCount: 4
dataDiskSize: 1024
}
edge: ${instances.base} {
computeResourceGroup: "edge_computeResourceGroup_REPLACE_ME"
availabilitySet: "edge_availabilitySet_REPLACE_ME"
instanceNamePrefix: "edge_instanceNamePrefix_REPLACE_ME"
dataDiskCount: 1
dataDiskSize: 512
}
cdswmaster: ${instances.base} {
computeResourceGroup: "cdswmaster_computeResourceGroup_REPLACE_ME"
availabilitySet: "cdswmaster_availabilitySet_REPLACE_ME"
instanceNamePrefix: "cdswmaster_instanceNamePrefix_REPLACE_ME"
# Use an empty subnet dedicated to the cdsw master node to get predictable IP
# that can be registered in DNS. Example, if subnet is 10.6.1.0/29 (3
# available addresses in Azure) the master node will be assigned 10.6.1.4
# See http://tiny.cloudera.com/azure-available-ip for details.
subnetName: "masterSubnetName_REPLACE_ME"
dataDiskCount: 3
dataDiskSize: 1024
normalizationConfig {
mountAllUnmountedDisks: false
}
bootstrapScripts: [ ${bootstrap-script.os-generic},
${bootstrap-script.forcejdk8},
${bootstrap-script.setupcdswmount}]
}
cdswworker: ${instances.base} {
computeResourceGroup: "cdswworker_computeResourceGroup_REPLACE_ME"
availabilitySet: "cdswworker_availabilitySet_REPLACE_ME"
instanceNamePrefix: "cdswworker_instanceNamePrefix_REPLACE_ME"
dataDiskCount: 2
dataDiskSize: 1024
normalizationConfig {
mountAllUnmountedDisks: false
}
}
}
#
# Bootstrap scripts
#
# See azure.reference.conf for general information about bootstrap scripts and
# pre-terminate scripts.
#
# bootstrap-script.os-generic: does DHCP / hostname / DNS stuff
# The os-generic bootstrap script will be run after the VM boots up for the first time. This must
# be used to set up preconditions for successful cluster deployment. Director will restart the
# host after the bootstrap script has run.
bootstrap-script {
os-generic : """#!/bin/sh
#
# This script will bootstrap these OSes:
# - CentOS 6
# - CentOS 7
# - RHEL 6
# - RHEL 7
#
# Notes and notable differences between OSes:
# - CentOS and RHEL 6 use dhclient
# - CentOS and RHEL 7 use NetworkManager
#
#
# Functions
#
#
# CentOS and RHEL 6 use dhclient. Add a script to be automatically invoked when interface comes up.
# Function not indented so EOF works.
#
dhclient_6()
{
# dhclient-exit-hooks explained in dhclient-script man page: http://linux.die.net/man/8/dhclient-script
# cat a here-doc representation of the hooks to the appropriate file
cat > /etc/dhcp/dhclient-exit-hooks <<"EOF"
#!/bin/bash
printf "\ndhclient-exit-hooks running...\n\treason:%s\n\tinterface:%s\n" "${reason:?}" "${interface:?}"
# only execute on the primary nic
if [ "$interface" != "eth0" ]
then
exit 0;
fi
# when we have a new IP, perform nsupdate
if [ "$reason" = BOUND ] || [ "$reason" = RENEW ] || [ "$reason" = REBIND ] || [ "$reason" = REBOOT ]
then
printf "\tnew_ip_address:%s\n" "${new_ip_address:?}"
host=$(hostname -s)
domain=$(nslookup $(grep -i nameserver /etc/resolv.conf | cut -d ' ' -f 2) | grep -i name | cut -d ' ' -f 3 | cut -d '.' -f 2- | rev | cut -c 2- | rev)
IFS='.' read -ra ipparts <<< "$new_ip_address"
ptrrec="$(printf %s "$new_ip_address." | tac -s.)in-addr.arpa"
nsupdatecmds=$(mktemp -t nsupdate.XXXXXXXXXX)
resolvconfupdate=$(mktemp -t resolvconfupdate.XXXXXXXXXX)
echo updating resolv.conf
grep -iv "search" /etc/resolv.conf > "$resolvconfupdate"
echo "search $domain" >> "$resolvconfupdate"
cat "$resolvconfupdate" > /etc/resolv.conf
echo "Attempting to register $host.$domain and $ptrrec"
{
echo "update delete $host.$domain a"
echo "update add $host.$domain 600 a $new_ip_address"
echo "send"
echo "update delete $ptrrec ptr"
echo "update add $ptrrec 600 ptr $host.$domain"
echo "send"
} > "$nsupdatecmds"
nsupdate "$nsupdatecmds"
fi
#done
exit 0;
EOF
chmod 755 /etc/dhcp/dhclient-exit-hooks
service network restart
# Confirm DNS record has been updated, retry if update did not work
i=0
until [ $i -ge 5 ]
do
sleep 5
i=$((i+1))
hostname | nslookup && break
service network restart
done
if [ $i -ge 5 ]; then
echo "DNS update failed"
exit 1
fi
}
centos_6()
{
echo "CentOS 6"
# execute the CentOS / RHEL 6 dhclient-exit-hooks setup
dhclient_6
}
rhel_6()
{
echo "RHEL 6"
# rewrite SELINUX config to disabled and turn off enforcement
sed -i.bak "s/^SELINUX=.*$/SELINUX=disabled/" /etc/selinux/config
setenforce 0
# stop firewall and disable
service iptables stop
chkconfig iptables off
# update config to disable IPv6 and disable
echo "# Disable IPv6" >> /etc/sysctl.conf
echo "net.ipv6.conf.all.disable_ipv6 = 1" >> /etc/sysctl.conf
echo "net.ipv6.conf.default.disable_ipv6 = 1" >> /etc/sysctl.conf
sysctl -w net.ipv6.conf.all.disable_ipv6=1
sysctl -w net.ipv6.conf.default.disable_ipv6=1
# execute the CentOS / RHEL 6 dhclient-exit-hooks setup
dhclient_6
}
#
# CentOS and RHEL 7 use NetworkManager. Add a script to be automatically invoked when interface comes up.
# Function not indented so EOF works.
#
networkmanager_7()
{
cat > /etc/NetworkManager/dispatcher.d/12-register-dns <<"EOF"
#!/bin/bash
# NetworkManager Dispatch script
# Deployed by Cloudera Altus Director Bootstrap
#
# Expected arguments:
# $1 - interface
# $2 - action
#
# See for info: http://linux.die.net/man/8/networkmanager
# Register A and PTR records when interface comes up
# only execute on the primary nic
if [ "$1" != "eth0" ] || [ "$2" != "up" ]
then
exit 0;
fi
# when we have a new IP, perform nsupdate
new_ip_address="$DHCP4_IP_ADDRESS"
host=$(hostname -s)
domain=$(nslookup $(grep -i nameserver /etc/resolv.conf | cut -d ' ' -f 2) | grep -i name | cut -d ' ' -f 3 | cut -d '.' -f 2- | rev | cut -c 2- | rev)
IFS='.' read -ra ipparts <<< "$new_ip_address"
ptrrec="$(printf %s "$new_ip_address." | tac -s.)in-addr.arpa"
nsupdatecmds=$(mktemp -t nsupdate.XXXXXXXXXX)
resolvconfupdate=$(mktemp -t resolvconfupdate.XXXXXXXXXX)
echo updating resolv.conf
grep -iv "search" /etc/resolv.conf > "$resolvconfupdate"
echo "search $domain" >> "$resolvconfupdate"
cat "$resolvconfupdate" > /etc/resolv.conf
echo "Attempting to register $host.$domain and $ptrrec"
{
echo "update delete $host.$domain a"
echo "update add $host.$domain 600 a $new_ip_address"
echo "send"
echo "update delete $ptrrec ptr"
echo "update add $ptrrec 600 ptr $host.$domain"
echo "send"
} > "$nsupdatecmds"
nsupdate "$nsupdatecmds"
exit 0;
EOF
chmod 755 /etc/NetworkManager/dispatcher.d/12-register-dns
service network restart
# Confirm DNS record has been updated, retry if update did not work
i=0
until [ $i -ge 5 ]
do
sleep 5
i=$((i+1))
hostname | nslookup && break
service network restart
done
if [ $i -ge 5 ]; then
echo "DNS update failed"
exit 1
fi
}
centos_7()
{
echo "CentOS 7"
# execute the CentOS / RHEL 7 network manager setup
networkmanager_7
}
rhel_7()
{
echo "RHEL 7"
# rewrite SELINUX config to disable and turn off enforcement
sed -i.bak "s/^SELINUX=.*$/SELINUX=disabled/" /etc/selinux/config
setenforce 0
# stop firewall and disable
systemctl stop iptables
systemctl iptables off
# RHEL 7 uses firewalld
systemctl stop firewalld
systemctl disable firewalld
# Disable tuned so it does not overwrite sysctl.conf
service tuned stop
systemctl disable tuned
# Disable chrony so it does not conflict with ntpd installed by Director
systemctl stop chronyd
systemctl disable chronyd
# update config to disable IPv6 and disable
echo "# Disable IPv6" >> /etc/sysctl.conf
echo "net.ipv6.conf.all.disable_ipv6 = 1" >> /etc/sysctl.conf
echo "net.ipv6.conf.default.disable_ipv6 = 1" >> /etc/sysctl.conf
# swappiness is set by Director in /etc/sysctl.conf
# Poke sysctl to have it pickup the config change.
sysctl -p
# execute the CentOS / RHEL 7 network manager setup
networkmanager_7
}
#
# Main workflow
#
# ensure user is root
if [ "$(id -u)" -ne 0 ]
then
echo "Please run as root."
exit 1
fi
# find the OS and release
os=""
major_release=""
# if it's there, use lsb_release
if rpm -q redhat-lsb
then
os=$(lsb_release -si)
major_release=$(lsb_release -sr | cut -d '.' -f 1)
# if lsb_release isn't installed, use /etc/redhat-release
else
if grep "CentOS.* 6\\." /etc/redhat-release
then
os="CentOS"
major_release="6"
fi
if grep "CentOS.* 7\\." /etc/redhat-release
then
os="CentOS"
major_release="7"
fi
if grep "Red Hat Enterprise Linux Server release 6\\." /etc/redhat-release
then
os="RedHatEnterpriseServer"
major_release="6"
fi
if grep "Red Hat Enterprise Linux Server release 7\\." /etc/redhat-release
then
os="RedHatEnterpriseServer"
major_release="7"
fi
fi
echo "OS: $os $major_release"
# select the OS and run the appropriate setup script
not_supported_msg="OS $os $major_release is not supported."
if [ "$os" = "CentOS" ]; then
if [ "$major_release" = "6" ]; then
centos_6
elif [ "$major_release" = "7" ]; then
centos_7
else
echo "$not_supported_msg"
exit 1
fi
elif [ "$os" = "RedHatEnterpriseServer" ]; then
if [ "$major_release" = "6" ]; then
rhel_6
elif [ "$major_release" = "7" ]; then
rhel_7
else
echo "$not_supported_msg"
exit 1
fi
else
echo "$not_supported_msg"
exit 1
fi
""",
forcejdk8: """#!/bin/sh
# We remove any natively installed JDKs, as both Cloudera Manager and Cloudera Altus Director only support Oracle JDKs
# Install jdk 1.8 required by Spark2
yum remove --assumeyes *openjdk*
rpm -ivh "https://archive.cloudera.com/director6/6.0.0/redhat7/RPMS/x86_64/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm"
""",
setupcdswmount: """#!/bin/sh
set -e
# Mount one volume for application data
device="/dev/disk/azure/scsi1/lun2"
mount="/var/lib/cdsw"
echo "Making file system"
mkfs.ext4 -F -E lazy_itable_init=1 "$device" -m 0
echo "Mounting $device on $mount"
if [ ! -e "$mount" ]; then
mkdir -p "$mount"
fi
mount -o defaults,noatime "$device" "$mount"
echo "$device $mount ext4 defaults,noatime 0 0" >> /etc/fstab
exit 0
"""
}
cloudera-manager {
instance: ${instances.edge} {
tags {
application: "Cloudera Manager 5"
}
}
# Support for CDSW requires Cloudera Manager 5.13.1+
repository: "https://archive.cloudera.com/cm5/redhat/7/x86_64/cm/5.15/"
repositoryKeyUrl: "https://archive.cloudera.com/cm5/redhat/7/x86_64/cm/RPM-GPG-KEY-cloudera"
configs {
CLOUDERA_MANAGER {
enable_api_debug: true
custom_banner_html: "Managed by Cloudera Altus Director"
}
SERVICEMONITOR {
mgmt_log_dir:/data0/log/cloudera-scm-firehose
firehose_storage_dir:/data0/lib/cloudera-service-monitor
}
ACTIVITYMONITOR {
mgmt_log_dir:/data0/log/cloudera-scm-firehose
}
HOSTMONITOR {
mgmt_log_dir: /data0/log/cloudera-scm-firehose
firehose_storage_dir: /data0/lib/cloudera-host-monitor
}
REPORTSMANAGER {
headlamp_scratch_dir: /data0/lib/cloudera-scm-headlamp
mgmt_log_dir: /data0/log/cloudera-scm-headlamp
}
EVENTSERVER {
mgmt_log_dir:/data0/log/cloudera-scm-eventserver
eventserver_index_dir:/data0/lib/cloudera-scm-eventserver
}
ALERTPUBLISHER {
mgmt_log_dir:/data0/log/cloudera-scm-alertpublisher
}
NAVIGATOR {
mgmt_log_dir:/data0/log/cloudera-scm-navigator
}
NAVIGATORMETASERVER {
audit_event_log_dir:/data0/log/cloudera-scm-navigator/audit
data_dir:/data0/lib/cloudera-scm-navigator
mgmt_log_dir:/data0/log/cloudera-scm-navigator
}
}
javaInstallationStrategy: NONE
# Custom service descriptors for CDSW and Spark2
csds: [
"https://archive.cloudera.com/cdsw1/1.4.2/csd/CLOUDERA_DATA_SCIENCE_WORKBENCH-CDH5-1.4.2.jar",
"https://archive.cloudera.com/spark2/csd/SPARK2_ON_YARN-2.3.0.cloudera4.jar"
]
}
#
# Cluster description
#
cluster {
products {
CDH: 5.15
CDSW: 1.4.2
SPARK2: 2
}
parcelRepositories: [ "https://archive.cloudera.com/cdh5/parcels/5.15/",
"https://archive.cloudera.com/cdsw1/1.4.2/parcels/",
"https://archive.cloudera.com/spark2/parcels/2.3.0.cloudera4/"]
services: [HDFS, YARN, ZOOKEEPER, CDSW, SPARK2_ON_YARN]
configs {
CDSW {
"cdsw.domain.config": cdsw.my-domain.com # The fully qualified domain name for the CDSW host
}
}
masters {
count: 1
instance: ${instances.master} {
tags: {
application: "CDH 5.15"
group: masters
}
}
roles {
HDFS: [NAMENODE, SECONDARYNAMENODE]
YARN: [RESOURCEMANAGER, JOBHISTORY]
SPARK2_ON_YARN: [SPARK2_YARN_HISTORY_SERVER]
}
configs {
HDFS {
NAMENODE {
namenode_log_dir: /data0/log/hadoop-hdfs
dfs_name_dir_list: /data1/dfs/nn
}
SECONDARYNAMENODE {
secondarynamenode_log_dir: /data0/log/hadoop-hdfs
fs_checkpoint_dir_list: /data2/dfs/snn
}
}
YARN {
RESOURCEMANAGER {
resource_manager_log_dir: /data0/log/hadoop-yarn
}
JOBHISTORY {
mr2_jobhistory_log_dir: /data0/log/hadoop-mapreduce
}
}
SPARK2_ON_YARN {
SPARK2_YARN_HISTORY_SERVER {
log_dir: /data0/log/spark2
}
}
}
}
workers {
count: 3
minCount: 3
instance: ${instances.worker} {
tags: {
application: "CDH 5.15"
group: workers
}
}
roles {
HDFS: [DATANODE]
YARN: [NODEMANAGER]
ZOOKEEPER: [SERVER]
}
configs {
HDFS {
DATANODE {
datanode_log_dir: /data0/log/hadoop-hdfs
dfs_data_dir_list: "/data2/dfs/dn,/data3/dfs/dn"
dfs_datanode_failed_volumes_tolerated: 1
}
}
YARN {
NODEMANAGER {
node_manager_log_dir: /data0/log/hadoop-yarn
yarn_nodemanager_log_dirs: "/data2/log/hadoop-yarn/container,/data3/log/hadoop-yarn/container"
yarn_nodemanager_local_dirs: "/data2/yarn,/data3/yarn"
}
}
ZOOKEEPER {
SERVER {
zk_server_log_dir: /data0/log/zookeeper
dataDir: /data1/zookeeper
dataLogDir: /data1/zookeeper
maxClientCnxns: 1024
}
}
}
}
cdswmasters {
count: 1
instance: ${instances.cdswmaster} {
tags: {
application: "CDH 5.15 + CDSW 1.4.2"
group: cdswmasters
}
}
roles {
HDFS: [GATEWAY]
YARN: [GATEWAY]
CDSW: [CDSW_MASTER, CDSW_APPLICATION, CDSW_DOCKER]
SPARK2_ON_YARN: [GATEWAY]
}
configs {
CDSW {
CDSW_DOCKER {
"cdsw.docker.devices.config": "/dev/disk/azure/scsi1/lun0 /dev/disk/azure/scsi1/lun1" # related to the data disk configuration
}
}
}
}
cdswworkers {
count: 2
minCount: 2
instance: ${instances.cdswworker} {
tags: {
application: "CDH 5.15 + CDSW 1.4.2"
group: cdswworkers
}
}
roles {
HDFS: [GATEWAY]
YARN: [GATEWAY]
CDSW: [CDSW_WORKER, CDSW_DOCKER]
SPARK2_ON_YARN: [GATEWAY]
}
configs {
CDSW {
CDSW_DOCKER {
"cdsw.docker.devices.config": "/dev/disk/azure/scsi1/lun0 /dev/disk/azure/scsi1/lun1" # related to the data disk configuration
}
}
}
}
}
You can’t perform that action at this time.