-
Notifications
You must be signed in to change notification settings - Fork 338
/
functions.sh
1517 lines (1395 loc) · 51.5 KB
/
functions.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env bash
#
# Copyright © 2016-2018 Cask Data, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
#
# This file contains functions used by the cdap script. These functions should
# be usable by other scripts and should be prefixed to prevent namespace issues
# with other scripts. This file will also set CDAP variables in the environment
# when sourced.
#
###
#
# Global functions (not prefixed)
#
#
# die [message] [exit code]
# Outputs error message, then exits with the given exit code, or 1
#
die() { local readonly __code=${2:-1}; echo "[ERROR] ${1}" >&2; exit ${__code}; };
#
# program_is_installed <program>
# Checks for program in PATH and returns true if found
#
program_is_installed() { type ${1} >/dev/null 2>&1; local readonly __ret=${?}; return ${__ret}; };
#
# split_jvm_opts <variable> [variable] [variable]
# Splits multiple variables of JVM options into a JVM_OPTS bash array
#
split_jvm_opts() { JVM_OPTS=(${@}); };
#
# rotate_log <file> [num]
# Rotates a given file through num iterations (default 5)
#
rotate_log() {
local readonly __log=${1}
local __num=${2:-5} __prev=0
if [[ -f ${__log} ]]; then
while [[ ${__num} -gt 1 ]]; do
__prev=$((__num - 1))
test -f "${__log}".${__prev} && mv -f "${__log}".{${__prev},${__num}}
__num=${__prev}
done
mv -f "${__log}"{,.${__num}}
fi
return 0
}
#
# compare_versions <version> <version>
# returns: 1 if first is greater, 2 if second is greater, 0 if same
#
compare_versions() {
[[ ${1} == ${2} ]] && return 0
local IFS=.
local i ver1=(${1}) ver2=(${2})
# fill empty fields in ver1 with zeros
for ((i=${#ver1[@]}; i<${#ver2[@]}; i++)); do
ver1[i]=0
done
for ((i=0; i<${#ver1[@]}; i++)); do
if [[ -z ${ver2[i]} ]]; then
# fill empty fields in ver2 with zeros
ver2[i]=0
fi
if ((10#${ver1[i]} > 10#${ver2[i]})); then
return 1
fi
if ((10#${ver1[i]} < 10#${ver2[i]})); then
return 2
fi
done
return 0
}
#
# logecho <message>
#
logecho() {
echo ${@} | tee -a ${__logfile}
}
#
# __readlink <file|directory>
#
__readlink() {
local __target_file=${1}
cd $(dirname ${__target_file})
__target_file=$(basename ${__target_file})
while test -L ${__target_file}; do
__target_file=$(readlink ${__target_file})
cd $(dirname ${__target_file})
__target_file=$(basename ${__target_file})
done
echo "$(pwd -P)/${__target_file}"
}
###
#
# Directory functions
#
# Creates a LOCAL_DIR if it doesn't exist
#
cdap_create_local_dir() { test -d "${LOCAL_DIR}" || mkdir -p "${LOCAL_DIR}"; };
# Creates a LOG_DIR if it doesn't exist
#
cdap_create_log_dir() { test -d "${LOG_DIR}" || mkdir -p "${LOG_DIR}"; };
# Creates a PID_DIR if it doesn't exist
#
cdap_create_pid_dir() { test -d "${PID_DIR}" || mkdir -p "${PID_DIR}"; };
cdap_create_dir() { test -d "${1}" || mkdir -p "${1}"; }
# Locates CDAP_HOME and returns its location
#
cdap_home() {
if [[ -n ${CDAP_HOME} ]] && [[ -d ${CDAP_HOME} ]]; then
echo ${CDAP_HOME}
return 0
fi
local readonly __script=${BASH_SOURCE[0]}
local readonly __dirname=$(dirname "${__script}")
local readonly __script_bin=$(cd "${__dirname}"; pwd -P)
local readonly __comp_home=$(cd "${__script%/*/*}" >&-; pwd -P)
if [[ ${__comp_home%/*} == /opt/cdap ]] && [[ ${__comp_home} != /opt/cdap/sdk* ]] && [[ ${__comp_home} != /opt/cdap/sandbox* ]]; then
__app_home=${__comp_home}
__cdap_home=/opt/cdap
elif [[ ${__comp_home##*/} == cli ]]; then
__app_home=${__comp_home}
__cdap_home=${__comp_home%/*}
else
__app_home=$(dirname "${__script_bin}")
__cdap_home=${__app_home}
fi
echo ${__cdap_home}
}
###
#
# Service/Process manipulation via PID file
#
#
# cdap_status_pidfile <pidfile>
# returns: 3 if file exists but process not running, 2 if no process, 0 if running, 1 otherwise
#
cdap_status_pidfile() {
local readonly __pidfile=${1} __label=${2:-Process}
if [[ -f ${__pidfile} ]]; then
local readonly __pid=$(<${__pidfile})
if kill -0 ${__pid} >/dev/null 2>&1; then
echo "${__label} running as PID ${__pid}"
return 0
else
echo "PID file ${__pidfile} exists, but process ${__pid} does not appear to be running"
return 3
fi
else
echo "${__label} is not running"
return 2
fi
return 1
}
#
# cdap_stop_pidfile <pidfile>
# returns: exit code
#
cdap_stop_pidfile() {
local readonly __ret __pidfile=${1} __label=${2:-Process}
if [[ -f ${__pidfile} ]]; then
local readonly __pid=$(<${__pidfile})
echo -n "$(date) Stopping ${__label} ..."
if kill -0 ${__pid} >/dev/null 2>&1; then
kill ${__pid} >/dev/null 2>&1
while kill -0 ${__pid} >/dev/null 2>&1; do
echo -n .
sleep 1
done
rm -f ${__pidfile}
echo
__ret=0
else
__ret=${?}
fi
echo
fi
return ${__ret}
}
cdap_kill_pidfile() {
local readonly __ret __pidfile=${1} __label=${2:-Process}
if [[ -f ${__pidfile} ]]; then
local readonly __pid=$(<${__pidfile})
echo -n "$(date) Killing ${__label} ..."
if kill -0 ${__pid} >/dev/null 2>&1; then
kill -9 ${__pid} >/dev/null 2>&1
while kill -0 ${__pid} >/dev/null 2>&1; do
echo -n .
sleep 1
done
rm -f ${__pidfile}
echo
__ret=0
else
__ret=${?}
fi
echo
fi
return ${__ret}
}
#
# cdap_check_pidfile <pidfile> [label]
# returns: 1 on error, 0 otherwise
#
cdap_check_pidfile() {
local readonly __pidfile=${1} __label=${2:-Process}
local readonly __ret
cdap_status_pidfile ${__pidfile} ${__label} > /dev/null
__ret=$?
case ${__ret} in
0) echo "$(date) Please stop CDAP ${__label} running as process $(<${__pidfile}) first, or use the restart function" ;;
*) return 0 ;;
esac
return 1
}
###
#
# CDAP helper functions
#
#
# cdap_check_node_version <version>
# returns: 1 if not found or not high enough version, 0 otherwise
#
cdap_check_node_version() {
local readonly __ver=${1/v/} __ret
program_is_installed node || die "Cannot locate node, is Node.js installed?"
local readonly __node=$(node -v 2>/dev/null | sed -e 's/v//g')
compare_versions ${__node} ${__ver}
__ret=$?
case ${__ret} in
0|1) return 0 ;;
*) echo "Node.js ${__node} is not supported. The minimum version supported is ${__ver}" ; return 1 ;;
esac
return 0
}
#
# cdap_check_mapr
# returns: 0 if MapR is detected, 1 otherwise
#
cdap_check_mapr() {
if [[ -f /opt/mapr/MapRBuildVersion ]]; then
return 0
fi
return 1
}
#
# cdap_get_conf <property> <conf-file> [default]
# returns: property value if found, default if not found and default set, otherwise returns 1
#
cdap_get_conf() {
local readonly __pn=${1} __fn=${2} __default=${3} __result __property __sed_fu
# Check for xmllint
[[ $(which xmllint 2>/dev/null) ]] || {
case ${PLATFORM} in
RHEL) die "Cannot locate xmllint, is libxml2 installed?" ;;
UBUNTU) die "Cannot locate xmllint, is libxml2-utils installed?" ;;
esac
# If we get here, die
die "Cannot locate xmllint, are XML tools installed?"
}
# Get property from file, return last result, if multiple are returned
__property="cat //configuration/property[name='${__pn}']/value[text()]"
__sed_fu='/^\//d;s/^.*<value>//;s/<\/value>.*$//'
__result=$(echo "${__property}" | xmllint --shell "${__fn}" | sed "${__sed_fu}" | tail -n 1)
# Found result, echo it and return 0
[[ -n ${__result} ]] && echo ${__result} && return 0
# No result, echo default and return 0
[[ -n ${__default} ]] && echo ${__default} && return 0
return 1
}
#
# cdap_kinit
# Initializes Kerberos ticket using principal/keytab
#
cdap_kinit() {
local readonly __principal=${CDAP_PRINCIPAL:-$(cdap_get_conf "cdap.master.kerberos.principal" "${CDAP_CONF}"/cdap-site.xml)}
local readonly __keytab=${CDAP_KEYTAB:-$(cdap_get_conf "cdap.master.kerberos.keytab" "${CDAP_CONF}"/cdap-site.xml)}
if [[ -z ${__principal} ]] || [[ -z ${__keytab} ]]; then
die "Both cdap.master.kerberos.principal and cdap.master.kerberos.keytab must be configured for Kerberos"
fi
if [[ ! -r ${__keytab} ]]; then
die "Cannot read keytab: ${__keytab}"
fi
if [[ $(which kinit 2>/dev/null) ]]; then
# Replace _HOST in principal w/ FQDN, like Hadoop does
kinit -kt "${__keytab}" "${__principal/_HOST/${HOSTNAME}}"
if [[ ! ${?} ]]; then
die "Failed executing 'kinit -kt \"${__keytab}\" \"${__principal/_HOST/${HOSTNAME}}\"'"
fi
else
die "Cannot locate kinit! Please, ensure the appropriate Kerberos utilities are installed"
fi
return 0
}
#
# cdap_set_java
# Attempts to find JAVA in few ways and sets JAVA variable
#
cdap_set_java () {
local readonly __java __java_version
# Check JAVA_HOME, first
if [[ -n ${JAVA_HOME} ]] && [[ -d ${JAVA_HOME} ]]; then
__java="${JAVA_HOME}"/bin/java
[[ -x ${__java} ]] || die "JAVA_HOME is set to an invalid location: ${JAVA_HOME}"
else
__java=${JAVA:-java}
if [[ ! $(which java 2>/dev/null) ]]; then
die "JAVA_HOME is not set and 'java' was not found in your PATH. Please set JAVA_HOME to the location of your Java install"
fi
fi
__java_version=$("${__java}" -version 2>&1 | grep version | awk '{print $3}' | awk -F '.' '{print $2}')
if [[ -z ${__java_version} ]]; then
die "Could not detect Java version. Aborting..."
elif [[ ${__java_version} -lt 8 ]]; then
die "Java version not supported. Please install Java 8 - other versions of Java are not supported."
fi
export JAVA=${__java}
return 0
}
#
# cdap_set_classpath <home-dir> <conf-dir> [verbose: true/false]
# Assembles CLASSPATH from home-dir, hbase classpath, and conf-dir and optionally echoes if verbose is set true
# NOTE: this function is also sourced and invoked by the CSD control script, found here:
# https://github.com/caskdata/cm_csd/blob/develop/src/scripts/cdap-control.sh
# Any changes to this function must be compatible with the CSD's invocation
#
cdap_set_classpath() {
local readonly __home=${1} __conf=${2} __verbose=${3:-false}
local readonly __homelib=$(find -L "${__home}"/lib -type f 2>/dev/null | sort | tr '\n' ':')
local __cp __hbase_cp
# Get HBase's CLASSPATH
if [[ -n ${HBASE_CLASSPATH} ]] && [[ ${HBASE_CLASSPATH} != '' ]]; then
__cp=${__homelib}:${HBASE_CLASSPATH}:${__conf}/:${__home}/conf/:${EXTRA_CLASSPATH}
elif [[ -n ${HBASE_HOME} ]] && [[ -d ${HBASE_HOME} ]]; then
__hbase_cp=$("${HBASE_HOME}"/bin/hbase classpath)
elif [[ $(which hbase 2>/dev/null) ]]; then
__hbase_cp=$(hbase classpath)
elif [[ -n ${HADOOP_HOME} ]] && [[ -d ${HADOOP_HOME} ]]; then
# For the no hbase case, we still want to setup the Hadoop classpath
__hbase_cp=$("${HADOOP_HOME}"/bin/hadoop classpath)
else
# assume Hadoop/HBase libs are included via EXTRA_CLASSPATH
logecho "[WARN] Could not find Hadoop and HBase libraries, using EXTRA_CLASSPATH"
__cp=${__homelib}:${__conf}/:${__home}/conf/:${EXTRA_CLASSPATH}
fi
# Add HBase's CLASSPATH, if found and not provided
if [[ -n ${__hbase_cp} ]] && [[ -z ${__cp} ]]; then
__cp=${__homelib}:${__hbase_cp}:${__conf}/:${__home}/conf/:${EXTRA_CLASSPATH}
fi
if [[ -n ${CLASSPATH} ]]; then
CLASSPATH+=:${__cp}
else
CLASSPATH=${__cp}
fi
export CLASSPATH
if [[ ${__verbose} == 'true' ]]; then
echo ${CLASSPATH}
fi
return 0
}
#
# cdap_set_hbase
# Sets the correct HBase support library to use, based on what version exists in the CLASSPATH
# NOTE: this function is also sourced and invoked by the CSD control script, found here:
# https://github.com/caskdata/cm_csd/blob/develop/src/scripts/cdap-control.sh
# Any changes to this function must be compatible with the CSD's invocation
#
cdap_set_hbase() {
local readonly __compat __compatlib __class=io.cdap.cdap.data2.util.hbase.HBaseVersion
HBASE_VERSION=${HBASE_VERSION:-$("${JAVA}" -cp ${CLASSPATH} ${__class} 2>/dev/null)}
case ${HBASE_VERSION} in
1.0-cdh5.5*|1.0-cdh5.6*) __compat=hbase-compat-1.0-cdh5.5.0 ;; # 5.5 and 5.6 are compatible
1.0-cdh*) __compat=hbase-compat-1.0-cdh ;;
1.0*) __compat=hbase-compat-1.0 ;;
1.1*) __compat=hbase-compat-1.1 ;;
1.2-cdh*) __compat=hbase-compat-1.2-cdh5.7.0 ;; # 5.7 and 5.8 are compatible
1.2*) __compat=hbase-compat-1.1 ;; # 1.1 and 1.2 are compatible
"") die "Unable to determine HBase version! Aborting." ;;
*)
if [[ $(cdap_get_conf "hbase.version.resolution.strategy" "${CDAP_CONF}"/cdap-site.xml auto.strict) == 'auto.latest' ]]; then
local readonly __latest_hbase_compat
if [[ ${HBASE_VERSION} =~ -cdh ]]; then
__compat=hbase-compat-1.2-cdh5.7.0 # must be updated if a new CDH HBase version is added
else
__compat=hbase-compat-1.1 # must be updated if a new HBase version is added
fi
echo "Using ${__compat} for HBase version ${HBASE_VERSION} due to 'auto.latest' resolution strategy."
else
die "Unknown or unsupported HBase version found: ${HBASE_VERSION}"
fi
;;
esac
__compatlib=$(find -L "${CDAP_HOME}"/${__compat}/lib -type f 2>/dev/null | sort | tr '\n' ':')
export CLASSPATH="${__compatlib}"${CLASSPATH}
return 0
}
#
# cdap_set_hive_classpath
# Determine Hive's CLASSPATH, and set EXPLORE_CLASSPATH.
# Hive classpath is not added as part of system classpath as hive jars bundle unrelated jars like guava,
# and hence need to be isolated.
# NOTE: this function is also sourced and invoked by the CSD control script, found here:
# https://github.com/caskdata/cm_csd/blob/develop/src/scripts/cdap-control.sh
# Any changes to this function must be compatible with the CSD's invocation
#
cdap_set_hive_classpath() {
local __explore=${EXPLORE_ENABLED:-$(cdap_get_conf "explore.enabled" "${CDAP_CONF}"/cdap-site.xml true)}
if [[ ${__explore} == true ]]; then
if [[ -z ${HIVE_HOME} ]] || [[ -z ${HIVE_CONF_DIR} ]] || [[ -z ${HADOOP_CONF_DIR} ]] || [[ -z ${HIVE_EXEC_ENGINE} ]] || [[ -z ${HIVE_CLASSPATH} ]]; then
__secure=${KERBEROS_ENABLED:-$(cdap_get_conf "kerberos.auth.enabled" "${CDAP_CONF}"/cdap-site.xml false)}
if [[ ${__secure} == true ]]; then
cdap_kinit || return 1
fi
if [[ -n ${HIVE_HOME} ]] && [[ -d ${HIVE_HOME} ]] && [[ -x ${HIVE_HOME}/bin/hive ]]; then
HIVE_CMD="${HIVE_HOME}"/bin/hive
else
HIVE_CMD=hive
fi
if [[ $(which ${HIVE_CMD} 2>/dev/null) ]]; then
ERR_FILE=$(mktemp)
HIVE_VAR_OUT=$(${HIVE_CMD} -e 'set -v' 2>${ERR_FILE})
__ret=$?
HIVE_ERR_MSG=$(< ${ERR_FILE})
rm ${ERR_FILE}
if [ ${__ret} -ne 0 ]; then
echo "[ERROR] While determining Hive classpath, failed to get Hive settings using: hive -e 'set -v'"
echo " If you do not want CDAP with Hive functionality, set the 'explore.enabled' property in cdap-site.xml to 'false'"
echo " Otherwise, check that the Hive client is installed, and that Hive and HDFS are running."
echo " stderr:"
echo "${HIVE_ERR_MSG}"
return 1
fi
HIVE_VARS=$(echo ${HIVE_VAR_OUT} | tr ' ' '\n')
# Quotes preserve whitespace
HIVE_HOME=${HIVE_HOME:-$(echo -e "${HIVE_VARS}" | grep '^env:HIVE_HOME=' | cut -d= -f2)}
HIVE_CONF_DIR=${HIVE_CONF_DIR:-$(echo -e "${HIVE_VARS}" | grep '^env:HIVE_CONF_DIR=' | cut -d= -f2)}
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$(echo -e "${HIVE_VARS}" | grep '^env:HADOOP_CONF_DIR=' | cut -d= -f2)}
HIVE_CLASSPATH=${HIVE_CLASSPATH:-$(echo -e "${HIVE_VARS}" | grep '^env:CLASSPATH=' | cut -d= -f2)}
HIVE_EXEC_ENGINE=${HIVE_EXEC_ENGINE:-$(echo -e "${HIVE_VARS}" | grep '^hive.execution.engine=' | cut -d= -f2)}
fi
fi
# If Hive classpath is successfully determined, derive explore
# classpath from it and export it to use it in the launch command
if [[ -n ${HIVE_HOME} ]] && [[ -n ${HIVE_CONF_DIR} ]] && [[ -n ${HADOOP_CONF_DIR} ]] && [[ -n ${HIVE_CLASSPATH} ]]; then
EXPLORE_CONF_DIRS="${HIVE_CONF_DIR}:${HADOOP_CONF_DIR}"
EXPLORE_CLASSPATH=${HIVE_CLASSPATH}
if [[ -n ${TEZ_HOME} ]] && [[ -n ${TEZ_CONF_DIR} ]]; then
# tez-site.xml also need to be passed to explore service
EXPLORE_CONF_DIRS="${EXPLORE_CONF_DIRS}:${TEZ_CONF_DIR}"
fi
if [[ ${HIVE_EXEC_ENGINE} == spark ]]; then
# We require SPARK_HOME to be set for CDAP to include the Spark assembly JAR for Explore
cdap_set_spark || die "Unable to get SPARK_HOME, but default Hive engine is Spark"
fi
export EXPLORE_CONF_DIRS EXPLORE_CLASSPATH
fi
fi
}
#
# cdap_set_spark
# Attempts to find SPARK_HOME and setup Spark specifics env by running $SPARK_HOME/conf/spark-env.sh
#
cdap_set_spark() {
local readonly __saved_stty=$(stty -g 2>/dev/null)
# If SPARK_HOME is either not set or is not directory, tries to auto-detect it
if [[ -z ${SPARK_HOME} ]] || [[ ! -d ${SPARK_HOME} ]]; then
if cdap_check_mapr; then
# MapR installs spark to a known location
SPARK_HOME=$(ls -d /opt/mapr/spark/spark-* 2>/dev/null)
if [[ -z ${SPARK_HOME} ]] || [[ ! -d ${SPARK_HOME} ]]; then
return 1
fi
elif [[ $(which spark-shell 2>/dev/null) ]]; then
# If there is no valid SPARK_HOME, we should unset the existing one if it is set
# Otherwise the spark-shell won't run correctly
unset SPARK_HOME
local __spark_shell=$(which spark-shell 2>/dev/null)
local __spark_client_version=None
for __dist in hdp iop; do
if [[ ! -d /usr/${__dist} ]]; then
continue
fi
if [[ $(which ${__dist}-select 2>/dev/null) ]]; then
__spark_name="spark"
if [[ ${SPARK_MAJOR_VERSION} -ne 1 ]]; then
__spark_name="spark${SPARK_MAJOR_VERSION}"
fi
__spark_client_version=$(${__dist}-select status ${__spark_name}-client | awk '{print $3}')
if [[ ${__spark_client_version} == 'None' ]]; then # defaults None, we're hoping for a version
logecho "$(date) Spark client not installed via ${__dist}-select detection"
return 1
elif [[ -x /usr/${__dist}/${__spark_client_version}/${__spark_name}/bin/spark-shell ]]; then
__spark_shell=/usr/${__dist}/${__spark_client_version}/${__spark_name}/bin/spark-shell
else
logecho "$(date) Spark client not installed via on-disk detection"
return 1
fi
fi
done
ERR_FILE=$(mktemp)
SPARK_VAR_OUT=$(echo '(sys.env ++ Map(("sparkVersion", org.apache.spark.SPARK_VERSION),("scalaVersion", scala.util.Properties.releaseVersion.get))).foreach { case (k, v) => println(s"$k=$v") }; sys.exit' | ${__spark_shell} --master local 2>${ERR_FILE})
__ret=$?
# spark-shell invocation above does not properly restore the stty.
stty ${__saved_stty} 2>/dev/null
SPARK_ERR_MSG=$(< ${ERR_FILE})
rm ${ERR_FILE}
if [[ ${__ret} -ne 0 ]]; then
echo "[ERROR] While determining Spark home, failed to get Spark settings using: ${__spark_shell} --master local"
echo " stderr:"
echo "${SPARK_ERR_MSG}"
return 1
fi
SPARK_HOME=$(echo -e "${SPARK_VAR_OUT}" | grep ^SPARK_HOME= | cut -d= -f2)
cdap_set_spark_compat "${SPARK_VAR_OUT}"
fi
fi
if [[ -z ${SPARK_COMPAT} ]]; then
cdap_set_spark_compat_with_spark_shell "${SPARK_HOME}"
fi
export SPARK_HOME
# Find environment variables setup via spark-env.sh
cdap_load_spark_env || logecho "[WARN] Fail to source spark-env.sh to setup environment variables for Spark"
return 0
}
cdap_load_spark_env() {
# The same logic as used by Spark to find spark-env.sh
# When this method is called, SPARK_HOME should already been set,
# as this method should only be called from cdap_set_spark()
if [[ -z ${SPARK_HOME} ]]; then
return 1
fi
CONF_DIR="${SPARK_CONF_DIR:-"$SPARK_HOME"/conf}"
if [[ -f "${CONF_DIR}/spark-env.sh" ]]; then
if [[ -z ${SPARK_ENV_PATTERN} ]]; then
# By default, only get env from spark-env.sh that starts with PY or SPARK
SPARK_ENV_PATTERN="^(PY|SPARK)"
fi
SPARK_ENV=$(source ${CONF_DIR}/spark-env.sh; env | grep -E "${SPARK_ENV_PATTERN}")
__ret=$?
if [[ ${__ret} -ne 0 ]]; then
return 1
fi
while read -r line; do
# Prefix the env variable with _SPARK_ to avoid conflicts
export "_SPARK_${line%%=*}"="${line#*=}"
done <<< "${SPARK_ENV}"
fi
return 0
}
#
# Attempts to determine the spark version and set the SPARK_COMPAT env variable for the CDAP master to use
# by running spark-shell
#
cdap_set_spark_compat_with_spark_shell() {
local readonly __spark_home=${1}
local readonly __saved_stty=$(stty -g 2>/dev/null)
# If SPARK_COMPAT is not already set, try to determine it
if [[ -z ${SPARK_COMPAT} ]]; then
ERR_FILE=$(mktemp)
SPARK_VAR_OUT=$(echo 'Map(("sparkVersion", org.apache.spark.SPARK_VERSION),("scalaVersion", scala.util.Properties.releaseVersion.get)).foreach { case (k, v) => println(s"$k=$v") }; sys.exit' | ${__spark_home}/bin/spark-shell --master local 2>${ERR_FILE})
__ret=$?
# spark-shell invocation above does not properly restore the stty.
stty ${__saved_stty}
SPARK_ERR_MSG=$(< ${ERR_FILE})
rm ${ERR_FILE}
if [[ ${__ret} -ne 0 ]]; then
echo "[ERROR] Failed to get Spark and Scala versions using spark-shell"
echo " stderr:"
echo "${SPARK_ERR_MSG}"
return 1
fi
cdap_set_spark_compat "${SPARK_VAR_OUT}"
fi
return 0
}
#
# Set the SPARK_VERSION and SPARK_COMPAT based on output from spark-shell
#
cdap_set_spark_compat() {
local __output=${1}
SPARK_VERSION=$(echo -e "${__output}" | grep "^sparkVersion=" | cut -d= -f2)
SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION:-$(echo ${SPARK_VERSION} | cut -d. -f1)}
SCALA_VERSION=$(echo -e "${__output}" | grep "^scalaVersion=" | cut -d= -f2)
SCALA_MAJOR_VERSION=$(echo ${SCALA_VERSION} | cut -d. -f1)
SCALA_MINOR_VERSION=$(echo ${SCALA_VERSION} | cut -d. -f2)
SPARK_COMPAT="spark${SPARK_MAJOR_VERSION}_${SCALA_MAJOR_VERSION}.${SCALA_MINOR_VERSION}"
export SPARK_VERSION
export SPARK_COMPAT
return 0
}
#
# cdap_service <service> <action> [arguments]
# Used for interacting with CDAP services where action is one of start/stop/status/restart/condrestart/classpath
#
cdap_service() {
local readonly __service=${1} __action=${2}
shift; shift
local readonly __args=${@}
local readonly __pidfile=${PID_DIR}/${__service}-${IDENT_STRING}.pid
local readonly __gc_log_and_heapdump_dir=${LOG_DIR}/${__service}-${IDENT_STRING}
local readonly __log_prefix=${LOG_DIR}/${__service}-${IDENT_STRING}-${HOSTNAME}
local readonly __logfile=${__log_prefix}.log
local readonly __svc=${__service/-server/}
local readonly __ret
# awk taken from http://stackoverflow.com/a/1541178
local __name=$(echo ${__service/-/ } | awk '{for(i=1;i<=NF;i++){ $i=toupper(substr($i,1,1)) substr($i,2) }}1')
case ${__service} in
auth-server) local readonly __comp_home="security" ;;
kafka-server) local readonly __comp_home=${__svc} ;;
router) local readonly __comp_home="gateway" ;;
*) local readonly __comp_home=${__service} ;;
esac
[[ ${__service} == ui ]] && __name="UI"
cdap_create_log_dir
case ${__action} in
status|stop|kill) cdap_${__action}_pidfile ${__pidfile} "CDAP ${__name}"; __ret=${?} ;;
start|restart|condrestart)
if [[ ${__action} == condrestart ]]; then
cdap_status_pidfile ${__pidfile} "CDAP ${__name}" >/dev/null && \
cdap_stop_pidfile ${__pidfile} "CDAP ${__name}" && \
cdap_${__svc} ${__action} ${__args}
elif [[ ${__action} == restart ]]; then
cdap_stop_pidfile ${__pidfile} "CDAP ${__name}" ; \
cdap_${__svc} ${__action} ${__args}
else
cdap_${__svc} ${__action} ${__args}
fi
__ret=${?}
;;
classpath)
cdap_set_classpath "${CDAP_HOME}"/${__comp_home} "${CDAP_CONF}"
[[ ${__service} == master ]] && cdap_set_java && cdap_set_hbase
echo ${CLASSPATH}
__ret=0
;;
run) cdap_run_class ${__args} ; __ret=${?} ;;
exec) cdap_exec_class ${__args} ; __ret=${?} ;;
usage|-h|--help) echo "Usage: $0 ${__service} {start|stop|restart|status|condrestart|classpath|run|exec}"; __ret=0 ;;
*) die "Usage: $0 ${__service} {start|stop|restart|status|condrestart|classpath|run|exec}" ;;
esac
return ${__ret}
}
#
# cdap_start_bin [args]
# Start a non-Java application with arguments in the background
#
cdap_start_bin() {
local readonly __args=${@}
local readonly __svc=${CDAP_SERVICE/-server/}
local readonly __ret __pid
local readonly __name=$(if [[ ${__svc} == ui ]]; then echo UI ; else echo ${__svc/-/ } | awk '{for(i=1;i<=NF;i++){ $i=toupper(substr($i,1,1)) substr($i,2) }}1' ; fi)
cdap_check_pidfile ${__pidfile} ${__name} || exit 0 # Error output is done in function
cdap_create_pid_dir || die "Could not create PID dir: ${PID_DIR}"
logecho "$(date) Starting CDAP ${__name} service on ${HOSTNAME}"
ulimit -a >>${__logfile} 2>&1
nohup nice -n ${NICENESS} ${MAIN_CMD} ${MAIN_CMD_ARGS} ${__args} </dev/null >>${__logfile} 2>&1 &
__pid=${!}
__ret=${?}
echo ${__pid} >${__pidfile}
if ! kill -0 ${__pid} >/dev/null 2>&1; then
die "${MAIN_CMD} failed to start, please check logs at ${LOG_DIR} for more information"
fi
return ${__ret}
}
#
# cdap_run_bin [args]
# Runs a non-Java application with arguments in the foreground
#
cdap_run_bin() {
local readonly __bin=${1}
shift
local readonly __args=${@}
local readonly __ret
${__bin} ${__args}
__ret=${?}
return ${__ret}
}
#
# cdap_start_java [args]
# Start a Java application from class name with arguments in the background
#
cdap_start_java() {
local readonly __name=$(echo ${CDAP_SERVICE/-/ } | awk '{for(i=1;i<=NF;i++){ $i=toupper(substr($i,1,1)) substr($i,2) }}1')
cdap_check_pidfile ${__pidfile} ${__name} || exit 0 # Error output is done in function
cdap_create_pid_dir || die "Could not create PID dir: ${PID_DIR}"
# Check and set classpath if in development environment.
cdap_check_and_set_classpath_for_dev_environment "${CDAP_HOME}"
# Setup classpaths.
cdap_set_classpath "${CDAP_HOME}"/${__comp_home} "${CDAP_CONF}"
# Setup Java
cdap_set_java || return 1
# Set JAVA_HEAPMAX from variable defined in JAVA_HEAP_VAR, unless defined already
JAVA_HEAPMAX=${JAVA_HEAPMAX:-${!JAVA_HEAP_VAR}}
export JAVA_HEAPMAX
# Split JVM_OPTS array
eval split_jvm_opts ${!JAVA_OPTS_VAR} ${OPTS} ${JAVA_OPTS}
local __defines="-Dcdap.service=${CDAP_SERVICE} ${JAVA_HEAPMAX} -Duser.dir=${LOCAL_DIR} -Djava.io.tmpdir=${TEMP_DIR}"
# Enable GC logging
cdap_create_dir ${__gc_log_and_heapdump_dir}
if [[ ${HEAPDUMP_ON_OOM} == true ]]; then
__defines+=" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=${__gc_log_and_heapdump_dir}"
fi
__defines+=" -verbose:gc -Xloggc:${__gc_log_and_heapdump_dir}/gc.log -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=1M"
logecho "$(date) Starting CDAP ${__name} service on ${HOSTNAME}"
echo
if [[ ${CDAP_SERVICE} == master ]]; then
# Determine SPARK_HOME
cdap_set_spark || logecho "$(date) Could not determine SPARK_HOME! Spark support unavailable!"
if [[ -n ${SPARK_COMPAT} ]]; then
__defines+=" -Dapp.program.spark.compat=${SPARK_COMPAT}"
fi
# Master requires setting hive classpath
cdap_set_hive_classpath || return 1
local readonly __explore="-Dexplore.conf.dirs=${EXPLORE_CONF_DIRS} -Dexplore.classpath=${EXPLORE_CLASSPATH}"
__defines+=" ${__explore}"
# Add proper HBase compatibility to CLASSPATH
cdap_set_hbase || return 1
# Master requires this local directory
cdap_create_local_dir || die "Could not create Master local directory"
# Check for JAVA_LIBRARY_PATH
if [[ -n ${JAVA_LIBRARY_PATH} ]]; then
__defines+=" -Djava.library.path=${JAVA_LIBRARY_PATH}"
fi
# Check for HDP 2.2+ or IOP, otherwise do nothing and leave up to the user to configure
for __dist in hdp iop; do
if [[ $(which ${__dist}-select 2>/dev/null) ]]; then
local __auto_version=$(${__dist}-select status hadoop-client | awk '{print $3}')
# Check for version configured in OPTS
if [[ ${OPTS} =~ -D${__dist}.version ]]; then
local __conf_version=$(echo ${OPTS} | grep -oP "\-D${__dist}.version=\d+\.\d+\.\d+\.\d+-\d+" | cut -d= -f2)
if [[ ${__conf_version} != ${__auto_version} ]]; then
local __caps=$(echo ${__dist} | awk 'BEGIN { getline; print toupper($0) }')
logecho "$(date) [WARN] ${__caps} version mismatch! Detected: ${__auto_version}, Configured: ${__conf_version}"
logecho "$(date) [WARN] Using configured ${__caps} version: ${__conf_version}"
fi
else
# No version specified in OPTS or incorrect format, appending ours
__defines+=" -D${__dist}.version=${__auto_version}"
logecho "$(date) Detected ${__dist} version ${__auto_version} and adding to CDAP Master command line"
fi
fi
done
# Build and upload coprocessor jars
logecho "$(date) Ensuring required HBase coprocessors are on HDFS"
cdap_setup_coprocessors </dev/null >>${__logfile} 2>&1 || die "Could not setup coprocessors. Please check ${__logfile} for more information."
__startup_checks=${CDAP_STARTUP_CHECKS:-$(cdap_get_conf "master.startup.checks.enabled" "${CDAP_CONF}"/cdap-site.xml true)}
if [[ ${__startup_checks} == true ]]; then
logecho "$(date) Running CDAP Master startup checks -- this may take a few minutes"
"${JAVA}" ${JAVA_HEAPMAX} ${__explore} ${JVM_OPTS[@]} -cp ${CLASSPATH} io.cdap.cdap.master.startup.MasterStartupTool </dev/null >>${__logfile} 2>&1
if [ $? -ne 0 ]; then
die "Master startup checks failed. Please check ${__logfile} to address issues."
fi
fi
fi
"${JAVA}" -version 2>>${__logfile}
ulimit -a >>${__logfile}
__defines+=" ${JVM_OPTS[@]}"
echo "$(date) Running: ${JAVA} ${__defines} -cp ${CLASSPATH} ${MAIN_CLASS} ${MAIN_CLASS_ARGS} ${@}" >>${__logfile}
# Start our JVM
nohup nice -n ${NICENESS} "${JAVA}" ${__defines} -cp ${CLASSPATH} ${MAIN_CLASS} ${MAIN_CLASS_ARGS} ${@} </dev/null >>${__logfile} 2>&1 &
echo $! >${__pidfile}
sleep 2 # Now, wait for JVM spinup
kill -0 $(<${__pidfile}) >/dev/null 2>&1
return $?
}
#
# cdap_run_class <class> [arguments]
# Executes a given class' main method with the CLASSPATH and environment setup
#
cdap_run_class() {
local readonly __class=${1}
shift
local readonly __args=${@}
local readonly __ret
local JAVA_HEAPMAX=${JAVA_HEAPMAX:--Xmx1024m}
[[ -z ${__class} ]] && echo "[ERROR] No class name given!" && die "Usage: ${0} run <fully-qualified-class> [arguments]"
# Check and set classpath if in development environment.
cdap_check_and_set_classpath_for_dev_environment "${CDAP_HOME}"
# Setup classpaths.
cdap_set_classpath "${CDAP_HOME}"/master "${CDAP_CONF}"
# Setup Java
cdap_set_java || return 1
cdap_set_spark || logecho "$(date) [WARN] Could not determine SPARK_HOME! Spark support unavailable!"
cdap_set_hive_classpath || return 1
# Add proper HBase compatibility to CLASSPATH
cdap_set_hbase || exit 1
cdap_create_local_dir || die "Could not create local directory"
if [[ -n ${__args} ]] && [[ ${__args} != '' ]]; then
echo "$(date) Running class ${__class} with arguments: ${__args}"
else
echo "$(date) Running class ${__class}"
fi
"${JAVA}" ${JAVA_HEAPMAX} -Dhive.classpath=${HIVE_CLASSPATH} -Duser.dir=${LOCAL_DIR} -Djava.io.tmpdir=${TEMP_DIR} ${OPTS} -cp ${CLASSPATH} ${__class} ${__args}
__ret=${?}
return ${__ret}
}
#
# cdap_exec_class <class> [arguments]
# Executes a given class' main method with the CLASSPATH and environment setup. It replaces the current process
# with the new Java process
#
cdap_exec_class() {
local readonly __class=${1}
shift
local readonly __args=${@}
local JAVA_HEAPMAX=${JAVA_HEAPMAX:--Xmx1024m}
[[ -z ${__class} ]] && echo "[ERROR] No class name given!" && die "Usage: ${0} run <fully-qualified-class> [arguments]"
# Check and set classpath if in development environment.
cdap_check_and_set_classpath_for_dev_environment "${CDAP_HOME}"
# Setup classpaths.
cdap_set_classpath "${CDAP_HOME}"/master "${CDAP_CONF}"
# Setup Java
cdap_set_java || return 1
cdap_set_spark || logecho "$(date) [WARN] Could not determine SPARK_HOME! Spark support unavailable!"
cdap_set_hive_classpath || return 1
# Add proper HBase compatibility to CLASSPATH
cdap_set_hbase || exit 1
cdap_create_local_dir || die "Could not create local directory"
if [[ -n ${__args} ]] && [[ ${__args} != '' ]]; then
echo "$(date) Running class ${__class} with arguments: ${__args}"
else
echo "$(date) Running class ${__class}"
fi
exec "${JAVA}" ${JAVA_HEAPMAX} -Dhive.classpath=${HIVE_CLASSPATH} -Duser.dir=${LOCAL_DIR} -Djava.io.tmpdir=${TEMP_DIR} ${OPTS} -cp ${CLASSPATH} ${__class} ${__args}
}
#
# cdap_check_and_set_classpath_for_dev_environment <home-dir>
# check and set classpath if in development enviroment
#
cdap_check_and_set_classpath_for_dev_environment() {
local readonly __home=${1}
# Detect if we are in development.
IN_DEV_ENVIRONMENT=${IN_DEV_ENVIRONMENT:-false}
# for developers only, add flow and flow related stuff to class path.
if [[ ${IN_DEV_ENVIRONMENT} == true ]]; then
logecho "Constructing classpath for development environment ..."
[[ -f "${__home}"/build/generated-classpath ]] && CLASSPATH+=":$(<${__home}/build/generated-classpath)"
[[ -d "${__home}"/build/classes ]] && CLASSPATH+=":${__home}/build/classes/main:${__home}/conf/*"
[[ -d "${__home}"/../data-fabric/build/classes ]] && CLASSPATH+=":${__home}/../data-fabric/build/classes/main"
[[ -d "${__home}"/../common/build/classes ]] && CLASSPATH+=":${__home}/../common/build/classes/main"
[[ -d "${__home}"/../gateway/build/classes ]] && CLASSPATH+=":${__home}/../gateway/build/classes/main"
export CLASSPATH
fi
return 0
}
#
# cdap_context
# returns "distributed" or "sdk" based on current CDAP_HOME
#
cdap_context() {
local readonly __context __version=$(cdap_version)
if [[ -e ${CDAP_HOME}/lib/io.cdap.cdap.cdap-standalone-${__version}.jar ]]; then
__context=sdk
else
__context=distributed
fi
echo ${__context}
}
#
# cdap_version [component]
# returns the version of CDAP or <component> in CDAP_HOME, replacing snapshot timestamps with -SNAPSHOT
#
cdap_version() {
local readonly __component=${1}
local readonly __cdap_major __cdap_minor __cdap_patch __cdap_snapshot
local __version
if [[ -z ${__component} ]]; then
__version=$(<"${CDAP_HOME}"/VERSION)
else
__version=$(<"${CDAP_HOME}"/${__component}/VERSION)
fi
__cdap_major=$(echo ${__version} | cut -d. -f1)
__cdap_minor=$(echo ${__version} | cut -d. -f2)
__cdap_patch=$(echo ${__version} | cut -d. -f3)
__cdap_snapshot=$(echo ${__version} | cut -d. -f4)
if [[ -z ${__cdap_snapshot} ]]; then
__version=${__cdap_major}.${__cdap_minor}.${__cdap_patch}
else
__version=${__cdap_major}.${__cdap_minor}.${__cdap_patch}-SNAPSHOT
fi
echo ${__version}
}
#
# cdap_version_command
# returns the version of CDAP SDK or all locally installed CDAP Distributed components
#
cdap_version_command() {
local readonly __context=$(cdap_context)
local __component __name
if [[ ${__context} == sdk ]]; then
echo "CDAP Sandbox version $(cdap_version)"
echo
else # Distributed, possibly CLI-only
if [[ -r ${CDAP_HOME}/VERSION ]]; then
echo "CDAP configuration version $(cdap_version)"
fi
for __component in cli gateway kafka master security ui; do
if [[ -r ${CDAP_HOME}/${__component}/VERSION ]]; then
echo "CDAP ${__component} version $(cdap_version ${__component})"
fi
done
echo
fi
return 0
}
###
#
# CDAP SDK functions
#
#
# cdap_sdk_usage
# Outputs usage for the CDAP SDK
# returns: true
#
cdap_sdk_usage() {
echo
echo "Usage: ${0} sandbox {start|stop|restart|status|usage}"
echo
echo "Additional options with start, restart:"
echo "--enable-debug [ <port> ] to connect to a debug port for CDAP Sandbox (default port is 5005)"
echo "--foreground to run the Sandbox in the foreground, showing logs on STDOUT"