-
Notifications
You must be signed in to change notification settings - Fork 1
/
bigdata-docker9.txt
475 lines (432 loc) · 23.9 KB
/
bigdata-docker9.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
FROM joegagliardo/ubuntu
MAINTAINER joegagliardo
# As much as possible I am trying to put as many steps in a single RUN command to minimize
# the ultimate build size. I also prefer to echo a file and build it in a RUN so there is
# no reliance on outside files needed if you use an ADD
# This section is an easy place to change the desired password and versions to install
EXPOSE 50020 50090 50070 50010 50075 8031 8032 8033 8040 8042 49707 22 8088 8030 3306 10000 10001 10002
# MYSQL Passwords
ARG HIVEUSER_PASSWORD=hivepassword
ARG HIVE_METASTORE=hivemetastore
ADD examples /examples
ADD datasets /examples
ADD conf /conf
ADD scripts /scripts
# Versions
ARG HADOOP_VERSION=2.9.0
ARG HADOOP_BASE_URL=http://mirrors.sonic.net/apache/hadoop/common
ARG HADOOP_URL=${HADOOP_BASE_URL}/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ARG PIG_VERSION=0.17.0
ARG PIG_BASE_URL=http://apache.claz.org/pig
ARG PIG_URL=${PIG_BASE_URL}/pig-${PIG_VERSION}/pig-${PIG_VERSION}.tar.gz
ARG HIVE_VERSION=2.3.2
ARG HIVE_BASE_URL=http://apache.claz.org/hive
ARG HIVE_URL=${HIVE_BASE_URL}/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz
ARG SPARK_VERSION=2.2.1
ARG SPARK_BASE_URL=http://apache.claz.org/spark
#ARG SPARK_BASE_URL=https://d3kbcqa49mib13.cloudfront.net
ARG SPARK_URL=${SPARK_BASE_URL}/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz
ARG ZOOKEEPER_VERSION=3.4.11
#ARG ZOOKEEPER_BASE_URL=http://apache.mirrors.lucidnetworks.net/zookeeper/stable
ARG ZOOKEEPER_BASE_URL=http://apache.claz.org/zookeeper/
ARG ZOOKEEPER_URL=${ZOOKEEPER_BASE_URL}/zookeeper-${ZOOKEEPER_VERSION}/zookeeper-${ZOOKEEPER_VERSION}.tar.gz
ARG HBASE_VERSION=1.4.0
ARG HBASE_BASE_URL=http://apache.mirrors.pair.com/hbase
ARG HBASE_URL=${HBASE_BASE_URL}/${HBASE_VERSION}/hbase-${HBASE_VERSION}-bin.tar.gz
ARG MONGO_VERSION=3.6.2
ARG MONGO_BASE_URL=https://fastdl.mongodb.org/linux
ARG MONGO_URL=${MONGO_BASE_URL}/mongodb-linux-x86_64-${MONGO_VERSION}.tgz
ARG MONGO_JAVA_DRIVER_VERSION=3.5.0
ARG MONGO_JAVA_DRIVER_BASE_URL=https://repo1.maven.org/maven2/org/mongodb
ARG MONGO_JAVA_DRIVER_URL=${MONGO_JAVA_DRIVER_BASE_URL}/mongo-java-driver/${MONGO_JAVA_DRIVER_VERSION}/mongo-java-driver-${MONGO_JAVA_DRIVER_VERSION}.jar
ARG MONGO_HADOOP_VERSION=2.0.2
ARG MONGO_HADOOP_BASE_URL=https://repo1.maven.org/maven2/org/mongodb/mongo-hadoop
ARG MONGO_HADOOP_CORE_URL=${MONGO_HADOOP_BASE_URL}/mongo-hadoop-core/${MONGO_HADOOP_VERSION}/mongo-hadoop-core-${MONGO_HADOOP_VERSION}.jar
ARG MONGO_HADOOP_HIVE_URL=${MONGO_HADOOP_BASE_URL}/mongo-hadoop-hive/${MONGO_HADOOP_VERSION}/mongo-hadoop-hive-${MONGO_HADOOP_VERSION}.jar
ARG MONGO_HADOOP_PIG_URL=${MONGO_HADOOP_BASE_URL}/mongo-hadoop-pig/${MONGO_HADOOP_VERSION}/mongo-hadoop-pig-${MONGO_HADOOP_VERSION}.jar
ARG MONGO_HADOOP_SPARK_URL=${MONGO_HADOOP_BASE_URL}/mongo-hadoop-spark/${MONGO_HADOOP_VERSION}/mongo-hadoop-spark-${MONGO_HADOOP_VERSION}.jar
ARG MONGO_HADOOP_STREAMING_URL=${MONGO_HADOOP_BASE_URL}/mongo-hadoop-streaming/${MONGO_HADOOP_VERSION}/mongo-hadoop-streaming-${MONGO_HADOOP_VERSION}.jar
ARG CASSANDRA_VERSION=311
ARG CASSANDRA_URL=http://www.apache.org/dist/cassandra
ARG SPARK_CASSANDRA_VERSION=2.0.1-s_2.11
ARG SPARK_CASSANDRA_BASE_URL=http://dl.bintray.com/spark-packages/maven/datastax/spark-cassandra-connector
ARG SPARK_CASSANDRA_URL=${SPARK_CASSANDRA_BASE_URL}/${SPARK_CASSANDRA_VERSION}/spark-cassandra-connector-${SPARK_CASSANDRA_VERSION}.jar
ARG SPARK_CASSANDRA_FILE=spark-cassandra-connector-${SPARK_CASSANDRA_VERSION}.jar
ARG SPARK_HBASE_GIT=https://github.com/hortonworks-spark/shc.git
ARG SPARK_XML_GIT=https://github.com/databricks/spark-xml.git
ARG MONGO_REPO_URL=http://repo.mongodb.org/apt/ubuntu
ARG COCKROACH_VERSION=1.1.4
ARG COCKROACH_BASE_URL=https://binaries.cockroachdb.com
ARG COCKROACH_URL=${COCKROACH_BASE_URL}/cockroach-v${COCKROACH_VERSION}.linux-amd64.tgz
RUN url_exists() { echo $1; if curl -s --head $1 | head -n 1 | grep "HTTP/1.[01] [2].." ; then urlexists='YES'; else exit 1; fi } && \
url_exists $HADOOP_URL && \
url_exists $PIG_URL && \
url_exists $HIVE_URL && \
url_exists $SPARK_URL && \
url_exists $ZOOKEEPER_URL && \
url_exists $HBASE_URL && \
url_exists $MONGO_URL && \
url_exists $SPARK_CASSANDRA_URL && \
url_exists $MONGO_JAVA_DRIVER_URL && \
url_exists $MONGO_HADOOP_CORE_URL && \
url_exists $MONGO_HADOOP_PIG_URL && \
url_exists $MONGO_HADOOP_HIVE_URL && \
url_exists $MONGO_HADOOP_SPARK_URL && \
url_exists $MONGO_HADOOP_STREAMING_URL && \
url_exists $MONGO_JAVA_DRIVER_URL && \
url_exists $SPARK_CASSANDRA_URL && \
url_exists $COCKROACH_URL
USER root
ENV BOOTSTRAP /etc/bootstrap.sh
#ENV JAVA_HOME /usr/lib/jvm/java-8-oracle
ENV JAVA_HOME /usr/lib/jvm/java-1.9.0-openjdk-amd64
ENV HADOOP_PREFIX /usr/local/hadoop
ENV PIG_HOME /usr/local/pig
ENV HIVE_HOME /usr/local/hive
ENV HCAT_HOME /usr/local/hive/hcatalog
ENV ZOOKEEPER_HOME /usr/local/zookeeper/
ENV SPARK_HOME /usr/local/spark
#ENV SPARK_CLASSPATH '/usr/local/spark/conf/mysql-connector-java.jar'
ENV SPARK_CLASSPATH '/usr/local/spark/conf/mysql-connector-java.jar:$HCAT_HOME/share/hcatalog/hcatalog-core*.jar:$HCAT_HOME/share/hcatalog/hcatalog-pig-adapter*.jar:$HIVE_HOME/lib/hive-metastore-*.jar:$HIVE_HOME/lib/libthrift-*.jar:$HIVE_HOME/lib/hive-exec-*.jar:$HIVE_HOME/lib/libfb303-*.jar:$HIVE_HOME/lib/jdo2-api-*-ec.jar:$HIVE_HOME/conf:$HADOOP_HOME/conf:$HIVE_HOME/lib/slf4j-api-*.jar'
ENV PYTHONPATH ${SPARK_HOME}/python/:$(echo ${SPARK_HOME}/python/lib/py4j-*-src.zip):${PYTHONPATH}
ENV HBASE_HOME /usr/local/hbase
ENV HBASE_CONF_DIR=$HBASE_HOME/conf
ENV PATH $PATH:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin:$PIG_HOME/bin:$HIVE_HOME/bin:$ZOOKEEPER_HOME:bin:$SPARK_HOME/bin:$HBASE_HOME/bin
RUN echo "# ---------------------------------------------" && \
echo "# passwordless ssh" && \
echo "# ---------------------------------------------" && \
chmod 0777 /examples && \
apt-get update && \
rm -f /etc/ssh/ssh_host_dsa_key /etc/ssh/ssh_host_rsa_key /root/.ssh/id_rsa && \
ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key && \
ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key && \
ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa && \
cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \
cd /scripts && \
echo "# ---------------------------------------------" && \
echo "# Cockroach DB" && \
echo "# ---------------------------------------------" && \
mkdir /cr && \
cd /cr && \
wget ${COCKROACH_URL} && \
tar xfz cockroach-* && \
mv cockroach-v${COCKROACH_VERSION}.linux-amd64/cockroach /usr/local/bin && \
echo "#! /bin/sh" > /scripts/start-cockroach.sh && \
echo "cd /data" >> /scripts/start-cockroach.sh && \
echo "cockroach start --insecure --host=localhost &" >> /scripts/start-cockroach.sh && \
chmod +x /scripts/start-cockroach.sh && \
echo "#! /bin/sh" > /scripts/cockroach-shell.sh && \
echo "cd /data" >> /scripts/cockroach-shell.sh && \
echo "cockroach sql --insecure" >> /scripts/cockroach-shell.sh && \
chmod +x /scripts/cockroach-shell.sh && \
cd / && \
rm -r /cr && \
echo "# ---------------------------------------------" && \
echo "# Postgresql" && \
echo "# ---------------------------------------------" && \
DEBIAN_FRONTEND=noninteractive apt-get -yq install postgresql postgresql-contrib postgresql-client && \
chmod +x /scripts/start-postgres.sh && \
chmod +x /scripts/stop-postgres.sh && \
chmod +x /scripts/postgres-client.sh && \
/etc/init.d/postgresql start && \
sudo -u postgres psql -c "create user root with password ''; alter user root with SUPERUSER;" && \
sudo -u postgres psql -c "create database root;" && \
echo "# ---------------------------------------------" && \
echo "# Make folders for HDFS data" && \
echo "# ---------------------------------------------" && \
mkdir /data/hdfs && \
mkdir /data/hdfs/name && \
mkdir /data/hdfs/data && \
echo "# ---------------------------------------------" && \
echo "# Hadoop" && \
echo "# ---------------------------------------------" && \
echo ${HADOOP_URL} && \
curl -s ${HADOOP_URL} | tar -xz -C /usr/local/ && \
cd /usr/local && \
ln -s /usr/local/hadoop-${HADOOP_VERSION} /usr/local/hadoop && \
ln -s /usr/local/hadoop-${HADOOP_VERSION} /hadoop && \
sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr\nexport HADOOP_PREFIX=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh && \
sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh && \
mv /usr/local/hadoop/etc/hadoop /usr/local/hadoop/etc/hadoop_backup && \
mv /etc/my.cnf /etc/my.cnf.bak && \
ln -s /conf/my.cnf /etc/my.cnf && \
ln -s /conf/hadoop /usr/local/hadoop/etc/hadoop && \
mv /conf/ssh_config /root/.ssh/config && \
chmod 600 /root/.ssh/config && \
chown root:root /root/.ssh/config && \
ln -s /conf/bootstrap-mysql.sh /etc/bootstrap.sh && \
chown root:root /etc/bootstrap.sh && \
chmod 700 /etc/bootstrap.sh && \
chown root:root /conf/bootstrap-mysql.sh && \
chmod 700 /conf/bootstrap-mysql.sh && \
chown root:root /conf/bootstrap-postgres.sh && \
chmod 700 /conf/bootstrap-postgres.sh && \
chmod 700 /scripts/start-hadoop.sh && \
chmod 700 /scripts/stop-hadoop.sh && \
ls -la /usr/local/hadoop/etc/hadoop/*-env.sh && \
chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh && \
ls -la /usr/local/hadoop/etc/hadoop/*-env.sh && \
echo "# fix the 254 error code" && \
sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config && \
echo "UsePAM no" >> /etc/ssh/sshd_config && \
echo "Port 2122" >> /etc/ssh/sshd_config && \
service ssh start $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh $HADOOP_PREFIX/sbin/start-dfs.sh $HADOOP_PREFIX/bin/hdfs dfs -mkdir -p /user/root && \
service ssh start $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh $HADOOP_PREFIX/sbin/start-dfs.sh $HADOOP_PREFIX/bin/hdfs dfs -put $HADOOP_PREFIX/etc/hadoop/ input && \
chmod +x /scripts/loglevel-debug.sh && \
chmod +x /scripts/loglevel-info.sh && \
chmod +x /scripts/loglevel-warn.sh && \
chmod +x /scripts/loglevel-error.sh && \
echo "# ---------------------------------------------" && \
echo "# Hive" && \
echo ${HIVE_URL} && \
echo "# ---------------------------------------------" && \
curl ${HIVE_URL} | tar -zx -C /usr/local && \
ln -s /usr/local/apache-hive-${HIVE_VERSION}-bin /usr/local/hive && \
ln -s /usr/share/java/mysql-connector-java.jar /usr/local/hive/lib/mysql-connector-java.jar && \
mv /usr/local/hive/conf /usr/local/hive/conf_backup && \
ln -s /conf/hive /usr/local/hive/conf && \
wget https://jdbc.postgresql.org/download/postgresql-42.1.3.jar && \
mv postgresql-42.1.3.jar /usr/local/hive/jdbc && \
cp /usr/local/hive/jdbc/postgresql-42.1.3.jar /usr/local/hive/lib && \
ln -s /usr/local/hive/hcatalog/share/hcatalog/hive-hcatalog-core-${HIVE_VERSION}.jar /usr/local/hive/hcatalog/share/hcatalog/hive-hcatalog-core.jar && \
echo "# ---------------------------------------------" && \
echo "# Hiveserver2 Python Package" && \
echo "# ---------------------------------------------" && \
apt-get -y install libsasl2-dev && \
pip2 install pyhs2 && \
pip3 install pyhs2 && \
echo "# ---------------------------------------------" && \
echo "# Pig " && \
echo ${PIG_URL} && \
echo "# ---------------------------------------------" && \
curl ${PIG_URL} | tar -zx -C /usr/local && \
ln -s /usr/local/pig-${PIG_VERSION} /usr/local/pig && \
mv /usr/local/pig/conf /usr/local/pig/conf_backup && \
ln -s /conf/pig /usr/local/pig/conf && \
mkdir /usr/local/hive/hcatalog/lib && \
ln -s /conf/hive-hcatalog-hbase-storage-handler-0.13.1.jar /usr/local/hive/hcatalog/lib && \
ln -s /conf/slf4j-api-1.6.0.jar /usr/local/hive/lib && \
echo "# ---------------------------------------------" && \
echo "# Make scripts executable" && \
echo "# ---------------------------------------------" && \
chmod +x /scripts/format-namenode.sh && \
chmod +x /scripts/exit-safemode.sh && \
chmod +x /scripts/start-thrift.sh && \
chmod +x /scripts/init-schema-mysql.sh && \
chmod +x /scripts/init-schema-postgres.sh && \
chmod +x /scripts/start-everything.sh && \
chmod +x /scripts/stop-everything.sh && \
echo "# ---------------------------------------------" && \
echo "# Spark" && \
echo ${SPARK_URL} && \
echo "# ---------------------------------------------" && \
curl ${SPARK_URL} | tar -zx -C /usr/local && \
ln -s /usr/local/spark-${SPARK_VERSION}-bin-hadoop2.7 /usr/local/spark && \
ln -s /usr/local/hive/conf/hive-site.xml /usr/local/spark/conf/hive-site.xml && \
ln -s /usr/share/java/mysql-connector-java.jar /usr/local/spark/conf/mysql-connector-java.jar && \
ln -s /usr/share/java/mysql-connector-java.jar /usr/local/spark/jars/mysql-connector-java.jar && \
mv /usr/local/spark/conf /usr/local/spark/conf_backup && \
ln -s /conf/spark /usr/local/spark/conf && \
cd /home && \
echo "# ---------------------------------------------" && \
echo "# Spark HBase" && \
echo ${SPARK_HBASE_GIT} && \
echo "# ---------------------------------------------" && \
git clone ${SPARK_HBASE_GIT} && \
cd shc && \
mvn package -DskipTests && \
mvn clean package test && \
mvn -DwildcardSuites=org.apache.spark.sql.DefaultSourceSuite test && \
echo "# ---------------------------------------------" && \
echo "# HBase" && \
echo ${HBASE_URL} && \
echo "# ---------------------------------------------" && \
curl ${HBASE_URL} | tar -zx -C /usr/local && \
ln -s /usr/local/hbase-${HBASE_VERSION} /usr/local/hbase && \
mv /usr/local/hbase/conf /usr/local/hbase/conf_backup &&\
ln -s /conf/hbase /usr/local/hbase/conf && \
ln -s /usr/local/hbase/bin/start-hbase.sh /scripts/starthbase.sh &&\
ln -s /usr/local/hbase/bin/stop-hbase.sh /scripts/stophbase.sh && \
echo "# ---------------------------------------------" && \
echo "# Zookeeper" && \
echo ${ZOOKEEPER_URL} && \
echo "# ---------------------------------------------" && \
curl ${ZOOKEEPER_URL} | tar -zx -C /usr/local && \
ln -s /usr/local/zookeeper-${ZOOKEEPER_VERSION} /usr/local/zookeeper && \
mkdir /usr/local/zookeeper/data && \
mv /usr/local/zookeeper/conf /usr/local/zookeeper/conf_backup && \
ln -s /conf/zookeeper /usr/local/zookeeper/conf && \
pip2 install happybase psycopg2 && \
pip3 install happybase psycopg2 && \
echo "# ---------------------------------------------" && \
echo "# Mongo & Cassandra Keys" && \
echo "# ---------------------------------------------" && \
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 0C49F3730359A14518585931BC711F9BA15703C6 && \
echo "deb [ arch=amd64,arm64 ] ${MONGO_REPO_URL} xenial/mongodb-org/3.4 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.4.list && \
echo "deb ${CASSANDRA_URL}/debian ${CASSANDRA_VERSION}x main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list && \
curl ${CASSANDRA_URL}/KEYS | sudo apt-key add - && \
apt-get update && \
echo "# ---------------------------------------------" && \
echo "# Mongo" && \
echo "# ---------------------------------------------" && \
apt-get -y install mongodb-org && \
pip2 install pymongo && \
pip3 install pymongo && \
mkdir /data/mongo && \
mkdir /data/mongo/data && \
chmod +x /scripts/start-mongo.sh && \
chmod +x /scripts/stop-mongo.sh && \
echo "# ---------------------------------------------" && \
echo "# Cassandra" && \
echo ${CASSANDRA_URL} && \
echo "# ---------------------------------------------" && \
apt-get -y install cassandra && \
chmod +x /scripts/start-cassandra.sh && \
chmod +x /scripts/stop-cassandra.sh && \
echo "# change the data and log folder" && \
mkdir /data/cassandra && \
mkdir /data/cassandra/data && \
mkdir /data/cassandra/log && \
mv /etc/cassandra /etc/cassandra_backup && \
ln -s /conf/cassandra /etc/cassandra && \
chmod +x /examples/cassandra/test-cassandra-table.py && \
echo "# ---------------------------------------------" && \
echo "# Cassandra libraries" && \
echo "# ---------------------------------------------" && \
pip2 install cassandra-driver && \
pip3 install cassandra-driver && \
echo "# ---------------------------------------------" && \
echo "# Helper scripts" && \
echo "# ---------------------------------------------" && \
chmod +x /scripts/create-datadirs.sh && \
chmod +x /scripts/delete-datadirs.sh && \
echo "# ---------------------------------------------" && \
echo "# Spark XML library" && \
echo "# ---------------------------------------------" && \
cd /home && \
git clone ${SPARK_XML_GIT} && \
cd /home/spark-xml && \
sbt/sbt package && \
cp /home/spark-xml/target/scala-2.11/*.jar /usr/local/spark/jars && \
ln -s /usr/local/spark/jars/spark-xml_2.11-0.4.1.jar /usr/local/spark/jars/spark-xml.jar && \
cd /home && \
rm -r /home/spark-xml && \
cd /home && \
git clone https://github.com/minrk/findspark.git && \
cd /home/findspark && \
python2 setup.py install && \
python3 setup.py install && \
cd /home && \
rm -r /home/findspark && \
cd /home && \
echo "# ---------------------------------------------" && \
echo "# Spark Cassandra Connector" && \
echo ${SPARK_CASSANDRA_URL} && \
echo "# ---------------------------------------------" && \
wget ${SPARK_CASSANDRA_URL} && \
mv /home/${SPARK_CASSANDRA_FILE} /usr/local/spark/jars && \
ln -s /usr/local/spark/jars/${SPARK_CASSANDRA_FILE} /usr/local/spark/jars/spark-cassandra-connector.jar && \
echo "# ---------------------------------------------" && \
echo "MONGO-HADOOP" && \
echo "# ---------------------------------------------" && \
cd /home && \
wget --content-disposition ${MONGO_HADOOP_CORE_URL} && \
wget --content-disposition ${MONGO_HADOOP_PIG_URL} && \
wget --content-disposition ${MONGO_HADOOP_HIVE_URL} && \
wget --content-disposition ${MONGO_HADOOP_SPARK_URL} && \
wget --content-disposition ${MONGO_HADOOP_STREAMING_URL} && \
wget --content-disposition ${MONGO_JAVA_DRIVER_URL} && \
mkdir /usr/local/mongo-hadoop && \
mv mongo-hadoop-core-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop && \
mv mongo-hadoop-pig-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop && \
mv mongo-hadoop-hive-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop && \
mv mongo-hadoop-spark-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop && \
mv mongo-hadoop-streaming-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop && \
mv mongo-java-driver-${MONGO_JAVA_DRIVER_VERSION}.jar /usr/local/mongo-hadoop && \
ln -s /usr/local/mongo-hadoop/mongo-hadoop-core-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop/mongo-hadoop-core.jar && \
ln -s /usr/local/mongo-hadoop/mongo-hadoop-pig-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop/mongo-hadoop-pig.jar && \
ln -s /usr/local/mongo-hadoop/mongo-hadoop-hive-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop/mongo-hadoop-hive.jar && \
ln -s /usr/local/mongo-hadoop/mongo-hadoop-spark-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop/mongo-hadoop-spark.jar && \
ln -s /usr/local/mongo-hadoop/mongo-hadoop-streaming-${MONGO_HADOOP_VERSION}.jar /usr/local/mongo-hadoop/mongo-hadoop-streaming.jar && \
ln -s /usr/local/mongo-hadoop/mongo-java-driver-${MONGO_JAVA_DRIVER_VERSION}.jar /usr/local/mongo-hadoop/mongo-java-driver.jar && \
cd /usr/local/mongo-hadoop && \
git clone https://github.com/mongodb/mongo-hadoop.git && \
cd /usr/local/mongo-hadoop/mongo-hadoop/spark/src/main/python && \
python setup.py install && \
python3 setup.py install && \
echo "# ---------------------------------------------" && \
echo "# Miscellaneous" && \
echo "# ---------------------------------------------" && \
echo "alias hist='f(){ history | grep \"\$1\"; unset -f f; }; f'" >> ~/.bashrc && \
echo "alias pyspark0='python -i -c\"exec(\\\"from initSpark import initspark, hdfsPath\nsc, spark, conf = initspark()\nfrom pyspark.sql.types import *\\\")\"'" >> ~/.bashrc && \
echo "export PIG_OPTS=-Dhive.metastore.uris=thrift://bigdata:9083" >> ~/.bashrc && \
echo "export PIG_CLASSPATH=$HCAT_HOME/share/hcatalog/hcatalog-core*.jar:$HCAT_HOME/share/hcatalog/hcatalog-pig-adapter*.jar:$HIVE_HOME/lib/hive-metastore-*.jar:$HIVE_HOME/lib/libthrift-*.jar:$HIVE_HOME/lib/hive-exec-*.jar:$HIVE_HOME/lib/libfb303-*.jar:$HIVE_HOME/lib/jdo2-api-*-ec.jar:$HIVE_HOME/conf:$HADOOP_HOME/conf:$HIVE_HOME/lib/slf4j-api-*.jar" >> ~/.bashrc && \
echo "export HCAT_HOME=/usr/local/hive/hcatalog" >> ~/.bashrc && \
echo "# Final Cleanup" && \
apt-get -y clean && \
apt-get -y autoremove && \
rm -rf /var/lib/apt/lists/* && \
echo "*************"
RUN echo "*************" && \
echo "" >> /scripts/notes.txt
CMD ["/etc/bootstrap.sh", "-d"]
# end of actual build
# echo "# ---------------------------------------------" && \
# echo "# Cockroach DB" && \
# echo "# ---------------------------------------------" && \
# wget ${COCKROACH_URL} && \
# tar xfz cockroach-* && \
# mv cockroach-v${COCKROACH_VERSION}.linux-amd64/cockroach /usr/local/bin && \
# rm -r /scripts/cockroach* && \
# echo "#! /bin/sh" > /scripts/start-cockroach.sh && \
# echo "cd /data" >> /scripts/start-cockroach.sh && \
# echo "cockroach start --insecure --host=localhost &" >> /scripts/start-cockroach.sh && \
# chmod +x /scripts/start-cockroach.sh && \
# echo "#! /bin/sh" > /scripts/cockroach-shell.sh && \
# echo "cd /data" >> /scripts/cockroach-shell.sh && \
# echo "cockroach sql --insecure" >> /scripts/cockroach-shell.sh && \
# chmod +x /scripts/cockroach-shell.sh && \
# hive --service hiveserver2 start
# hive --service hiveserver2 stop
# sudo service hive-server2 start
# !connect jdbc:hive2://localhost:10000
#<name>hadoop.proxyuser.hive.groups</name>
#<value>*</value>
#</property>
#<property>
#<name>hadoop.proxyuser.hive.hosts</name>
#<value>*</value>
#</property>
# <property>
# <name>hive.server2.enable.doAs</name>
# <value>false</value>
# </property>
#https://repo1.maven.org/maven2/org/apache/parquet/parquet-pig/1.9.0/parquet-pig-1.9.0.jar
#https://repo1.maven.org/maven2/com/twitter/elephantbird/elephant-bird-pig/4.9/elephant-bird-pig-4.9.jar
# hive on spark
#ln -s /usr/local/spark/jars/scala-library-*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/spark-core_*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/spark-network-common_*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/chill-java*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/chill*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/jackson-module-paranamer*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/jackson-module-scala*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/jersey-container-servlet-core*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/jersey-server*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/json4s-ast*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/kryo-shaded*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/minlog*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/scala-xml*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/spark-launcher*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/spark-network-shuffle*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/spark-unsafe*.jar /usr/local/hive/lib
#ln -s /usr/local/spark/jars/xbean-asm5-shaded*.jar /usr/local/hive/lib
#mkdir /tmp/spark
#set hive.execution.engine=spark;
#use northwind;
#select c.categoryid, c.categoryname, p.productid, p.productname from categories as c join products as p on c.categoryid = p.categoryid;
# export HCAT_HOME=/usr/local/hive/hcatalog
# alias and export pig
# start-hbase.sh
# make scripts executable removed
# chmod +x /scripts/start-hiveserver.sh && \