In [6]:
%load_ext dockermagic

# Hadoop - multi-node cluster setup 
![Hadoop](https://hadoop.apache.org/elephant.png)

## Create Hadoop base image

### Create docker container

- Ubuntu 18.04 (https://ubuntu.com/)
- Docker (https://www.docker.com/)
    - container based virtualization

In [2]:
%%bash

docker run -d -t --rm --name hadoopimg -h hadoopimg ubuntu:18.04

docker ps

4084bf3511e3c8eef467f5afd224267d1e49e0ec8ee5f9124f518052b6abc5f5
CONTAINER ID   IMAGE                                                    COMMAND                  CREATED        STATUS                  PORTS                                                                                  NAMES
4084bf3511e3   ubuntu:18.04                                             "bash"                   1 second ago   Up Less than a second                                                                                          hadoopimg
61b6274d5344   bde2020/hadoop-historyserver:2.0.0-hadoop3.2.1-java8     "/entrypoint.sh /run…"   3 days ago     Up 4 hours (healthy)    8188/tcp                                                                               historyserver
0d86bc20719c   bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8       "/entrypoint.sh /run…"   3 days ago     Up 4 hours (healthy)    8042/tcp                                                                               nodemanager
7

### Install Dependencies

- Java 8 (OpenJDK) - https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
- Other packages: ssh pdsh wget apt-utils

In [8]:
%%dockerexec hadoopimg

# Update package list
apt -qq update > /install.log 2>&1

# Install Hadoop dependencies
apt -qq -f -y install openjdk-8-jdk ssh pdsh >> /install.log 2>&1

# Install other dependencies
apt -qq -f -y install vim wget apt-utils python3 python3-pip \
    ipython3 less unzip sudo net-tools >> /install.log 2>&1

### Install Hadoop

- http://hadoop.apache.org/
- Version 3.2.1
- Base directory: /opt
- User/group: hadoop/hadoop
- Package with binaries (version 3.2.1): https://hadoop.apache.org/releases.html

In [9]:
%%dockerexec hadoopimg

# Enable rwx for all on /opt
chmod 777 /opt

# Create user/group hadoop
useradd -m -U -s /bin/bash hadoop

# Enable sudo for hadoop
sed -i "\$ahadoop  ALL=(ALL) NOPASSWD:ALL" /etc/sudoers

In [18]:
%%bash

# Download package
cd ../pkgs
wget -q -c https://downloads.apache.org/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz

# Copy installation package to container
docker cp hadoop-3.2.2.tar.gz hadoopimg:/opt

In [19]:
%%dockerexec -u hadoop hadoopimg

rm /opt/hadoop

# Modify user/group permissions and unpack file
sudo chown hadoop:hadoop /opt/hadoop-3.2.2.tar.gz
tar -zxf /opt/hadoop-3.2.2.tar.gz -C /opt
rm /opt/hadoop-3.2.2.tar.gz

# Create link
ln -s /opt/hadoop-3.2.2 /opt/hadoop

### Configure environment variables

- Create file /opt/envvars.sh with environment variables

In [20]:
%%dockerexec -u hadoop hadoopimg

cat > /opt/envvars.sh << EOF
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
export PDSH_RCMD_TYPE=ssh

export HADOOP_HOME=/opt/hadoop
export HADOOP_COMMON_HOME=\${HADOOP_HOME}
export HADOOP_CONF_DIR=\${HADOOP_HOME}/etc/hadoop
export HADOOP_HDFS_HOME=\${HADOOP_HOME}
export HADOOP_MAPRED_HOME=\${HADOOP_HOME}
export HADOOP_YARN_HOME=\${HADOOP_HOME}

export PATH=\${PATH}:\${HADOOP_HOME}/bin:\${HADOOP_HOME}/sbin     

EOF

cat /opt/envvars.sh

export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
export PDSH_RCMD_TYPE=ssh

export HADOOP_HOME=/opt/hadoop
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}

export PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin     



### Configure passwordless ssh

In [21]:
%%dockerexec -u hadoop hadoopimg

# Disable host key checking
sudo tee -a /etc/ssh/ssh_config << EOF
    StrictHostKeyChecking no
    UserKnownHostsFile /dev/null
EOF

# Create ssh key
ssh-keygen -q -t rsa -P "" -f ~/.ssh/id_rsa

# Copy public key to authorized_keys file
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

    StrictHostKeyChecking no
    UserKnownHostsFile /dev/null


### Hadoop configuration files

- Hadoop configuration files location: \$HADOOP\_HOME\/etc\/hadoop
- All cluster nodes contain the same files

#### hadoop-env.sh

- Definition of environment variables used by Hadoop processes

In [22]:
%%dockerexec -u hadoop hadoopimg

cat >> /opt/hadoop/etc/hadoop/hadoop-env.sh << EOF
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
EOF

#### core-site.xml

- Hadoop main configuration
- Default parameters: http://hadoop.apache.org/docs/r3.2.1/hadoop-project-dist/hadoop-common/core-default.xml

In [23]:
%%dockerexec -u hadoop hadoopimg

cat > /opt/hadoop/etc/hadoop/core-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>

<property>
    <name>fs.defaultFS</name>
    <value>hdfs://hadoop:9000</value>
</property>

<property>
    <name>hadoop.proxyuser.hadoop.groups</name>
    <value>*</value>
</property>

<property>
    <name>hadoop.proxyuser.hadoop.hosts</name>
    <value>*</value>
</property>

</configuration>
EOF

#### hdfs-site.xml

- HDFS configuration
- Default parameters: http://hadoop.apache.org/docs/r3.2.1/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml

In [24]:
%%dockerexec -u hadoop hadoopimg

cat > /opt/hadoop/etc/hadoop/hdfs-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>

<property>
    <name>dfs.namenode.name.dir</name>
    <value>/opt/hadoop/data/nameNode</value>
</property>

<property>
    <name>dfs.datanode.data.dir</name>
    <value>/opt/hadoop/data/dataNode</value>
</property>

<property>
    <name>dfs.replication</name>
    <value>2</value>
</property>

<property>
    <name>dfs.blocksize</name>
    <value>33554432</value>
</property>

<property>
    <name>dfs.hosts.exclude</name>
    <value>/opt/hadoop/etc/hadoop/dfs.exclude</value>
</property>

<property>
    <name>dfs.namenode.heartbeat.recheck-interval</name>
    <value>10000</value>
</property>

</configuration>

EOF

#### yarn-site.xml

- YARN configuration
- Default parameters: http://hadoop.apache.org/docs/r3.2.1/hadoop-yarn/hadoop-yarn-common/yarn-default.xml

In [25]:
%%dockerexec -u hadoop hadoopimg

cat > /opt/hadoop/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<configuration>

<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>hadoop</value>
</property>

<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>

<property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>1536</value>
</property>

<property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>1536</value>
</property>

<property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>128</value>
</property>

<property>
    <name>yarn.timeline-service.enabled</name>
    <value>true</value>
</property>

<property>
    <name>yarn.timeline-service.hostname</name>
    <value>hadoop</value>
</property>

<property>
    <name>yarn.system-metrics-publisher.enabled</name>
    <value>true</value>
</property>

<property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
</property>

<property>
    <name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
    <value>10000</value>
</property>

</configuration>
EOF

#### mapred-site.xml

- MapReduce configuration
- Default parameters: http://hadoop.apache.org/docs/r3.2.1/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml

In [26]:
%%dockerexec -u hadoop hadoopimg

cat > /opt/hadoop/etc/hadoop/mapred-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>

<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>

<property>
    <name>mapreduce.application.classpath</name>
    <value>/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/mapreduce/lib/*</value>
</property>

<property>
    <name>yarn.app.mapreduce.am.resource.mb</name>
    <value>512</value>
</property>

<property>
    <name>mapreduce.map.memory.mb</name>
    <value>256</value>
</property>

<property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>256</value>
</property>

</configuration>
EOF

#### workers

- List of worker nodes (NodeManager and DataNode)

In [27]:
%%dockerexec -u hadoop hadoopimg

cat > /opt/hadoop/etc/hadoop/workers << EOF
hadoop1
hadoop2
hadoop3
EOF

### Commit base image

In [28]:
%%bash

# Create hadoopimg image based on hadoop container
docker commit hadoopimg hadoopimg

# Stop base container
docker stop hadoopimg

sha256:7390b5b85b05c1226578f06e357184cbb7b72b1eb6e9697166adcc6fa80969a6
hadoopimg


## Create cluster

### Run nodes

In [12]:
%%bash

# MASTER

# Ports
# 9870 - Namenode
# 9868 - Secondary Namenode
# 8088 - ResourceManager
# 19888 - MapReduce Job History
# 8188 - Timeline Service
# 4040 - Spark Application UI
# 8080 - Jupyter

cd ..
docker run -d -t --memory 4g --memory-swap 4g --rm --name hadoop -h hadoop -u hadoop \
    -v "$(pwd)"/pkgs:/opt/pkgs -v "$(pwd)"/notebooks:/opt/notebooks -v "$(pwd)"/datasets:/opt/datasets \
    -p 9870:9870 -p 9868:9868 -p 8088:8088 -p 19888:19888 -p 8188:8188 -p 4040:4040 -p 8080:8080 hadoopimg

# WORKERS

# Ports
# 9864 - DataNode WebUI
# 8042 - NodeManager WebUI

# Hadoop1
docker run -d -t --memory 2g --memory-swap 2g --rm --name hadoop1 -h hadoop1 -u hadoop \
    -p 9864:9864 -p 8042:8042 hadoopimg
# Hadoop2
docker run -d -t --memory 2g --memory-swap 2g --rm --name hadoop2 -h hadoop2 -u hadoop \
    -p 9865:9864 -p 8043:8042  hadoopimg
# Hadoop3
docker run -d -t --memory 2g --memory-swap 2g --rm --name hadoop3 -h hadoop3 -u hadoop \
    -p 9866:9864 -p 8044:8042  hadoopimg

docker ps

CONTAINER ID   IMAGE       COMMAND   CREATED         STATUS         PORTS                                                                                                                                                                                                                                                                                                             NAMES
fb54f623a7f6   hadoopimg   "bash"    4 minutes ago   Up 4 minutes   0.0.0.0:8044->8042/tcp, :::8044->8042/tcp, 0.0.0.0:9866->9864/tcp, :::9866->9864/tcp                                                                                                                                                                                                                              hadoop3
a2c64460f6c9   hadoopimg   "bash"    4 minutes ago   Up 4 minutes   0.0.0.0:8043->8042/tcp, :::8043->8042/tcp, 0.0.0.0:9865->9864/tcp, :::9865->9864/tcp                                                                                      

docker: Error response from daemon: Conflict. The container name "/hadoop" is already in use by container "a6c69353170e9a0d158fc0b005bcc2c688ab1c63e0802f7e681fb041e009b103". You have to remove (or rename) that container to be able to reuse that name.
See 'docker run --help'.
docker: Error response from daemon: Conflict. The container name "/hadoop1" is already in use by container "e3ccef4919c84433a4f59ea36c39b7173ed3311e71ed18b3a48c508e78fca0f9". You have to remove (or rename) that container to be able to reuse that name.
See 'docker run --help'.
docker: Error response from daemon: Conflict. The container name "/hadoop2" is already in use by container "a2c64460f6c92e98dfbfe1c078baed5e0fe4c12edb8e01191556ec00bfb3684e". You have to remove (or rename) that container to be able to reuse that name.
See 'docker run --help'.
docker: Error response from daemon: Conflict. The container name "/hadoop3" is already in use by container "fb54f623a7f697bcf45d7da33fd92a4f182ffc21caac7b3948f142ecc86482

### Configure hosts file on all nodes

- /etc/hosts

In [13]:
%%bash
docker ps

CONTAINER ID   IMAGE       COMMAND   CREATED         STATUS         PORTS                                                                                                                                                                                                                                                                                                             NAMES
fb54f623a7f6   hadoopimg   "bash"    4 minutes ago   Up 4 minutes   0.0.0.0:8044->8042/tcp, :::8044->8042/tcp, 0.0.0.0:9866->9864/tcp, :::9866->9864/tcp                                                                                                                                                                                                                              hadoop3
a2c64460f6c9   hadoopimg   "bash"    4 minutes ago   Up 4 minutes   0.0.0.0:8043->8042/tcp, :::8043->8042/tcp, 0.0.0.0:9865->9864/tcp, :::9865->9864/tcp                                                                                      

In [14]:
%%bash

# Get IPs
M=$(docker inspect hadoop | grep \"IPAddress\" | head -1 | awk '{ print $2 }' | tr -d \",)
H1=$(docker inspect hadoop1 | grep \"IPAddress\" | head -1 | awk '{ print $2 }' | tr -d \",)
H2=$(docker inspect hadoop2 | grep \"IPAddress\" | head -1 | awk '{ print $2 }' | tr -d \",)
H3=$(docker inspect hadoop3 | grep \"IPAddress\" | head -1 | awk '{ print $2 }' | tr -d \",)

# Create hosts file
cat > hosts << EOF  
$M hadoop
$H1 hadoop1
$H2 hadoop2
$H3 hadoop3
EOF

cat hosts

# Copy to all nodes
docker exec -i -u root hadoop sh -c 'cat >> /etc/hosts' < hosts
docker exec -i -u root hadoop1 sh -c 'cat >> /etc/hosts' < hosts
docker exec -i -u root hadoop2 sh -c 'cat >> /etc/hosts' < hosts
docker exec -i -u root hadoop3 sh -c 'cat >> /etc/hosts' < hosts

# Remove local file
rm hosts

172.17.0.2 hadoop
172.17.0.3 hadoop1
172.17.0.4 hadoop2
172.17.0.5 hadoop3


### Start ssh server on all nodes

In [15]:
%%bash

for HOST in hadoop hadoop1 hadoop2 hadoop3
do
    echo $HOST
    docker exec -u root $HOST service ssh restart
    docker exec -u root $HOST service ssh status
done

hadoop
 * Restarting OpenBSD Secure Shell server sshd
   ...done.
 * sshd is running
hadoop1
 * Restarting OpenBSD Secure Shell server sshd
   ...done.
 * sshd is running
hadoop2
 * Restarting OpenBSD Secure Shell server sshd
   ...done.
 * sshd is running
hadoop3
 * Restarting OpenBSD Secure Shell server sshd
   ...done.
 * sshd is running


## Format HDFS on Namenode

In [16]:
%%dockerexec hadoop
source /opt/envvars.sh

hdfs namenode -format -force -nonInteractive

2021-06-27 19:01:15,928 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = hadoop/172.17.0.2
STARTUP_MSG:   args = [-format, -force, -nonInteractive]
STARTUP_MSG:   version = 3.2.2
STARTUP_MSG:   classpath = /opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/commons-lang3-3.7.jar:/opt/hadoop/share/hadoop/common/lib/kerb-core-1.0.1.jar:/opt/hadoop/share/hadoop/common/lib/jsch-0.1.55.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-server-1.19.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-api-2.2.11.jar:/opt/hadoop/share/hadoop/common/lib/snappy-java-1.0.5.jar:/opt/hadoop/share/hadoop/common/lib/kerb-common-1.0.1.jar:/opt/hadoop/share/hadoop/common/lib/httpcore-4.4.13.jar:/opt/hadoop/share/hadoop/common/lib/jersey-json-1.19.jar:/opt/hadoop/share/hadoop/common/lib/javax.servlet-api-3.1.0.jar:/opt/hadoop/share/hadoop/common/lib

## Start Hadoop daemons

- manual execution: ```hdfs --daemon start (namenode|datanode)``` and ```yarn --daemon start (resourcemanager|nodemanager)```
- auxilliary scripts to run all processes on the cluster: start-dfs.sh (HDFS) and start-yarn.sh (YARN)
- some services still need to be executed manually (timelineserver, historyserver)

In [17]:
%%dockerexec hadoop
source /opt/envvars.sh

# HDFS
start-dfs.sh

# YARN
start-yarn.sh

# timelineserver
yarn --daemon start timelineserver

# historyserver
mapred --daemon start historyserver

Starting namenodes on [hadoop]
Starting datanodes
hadoop3: datanode is running as process 131.  Stop it first.
pdsh@hadoop: hadoop3: ssh exited with exit code 1
hadoop2: datanode is running as process 131.  Stop it first.
pdsh@hadoop: hadoop2: ssh exited with exit code 1
hadoop1: datanode is running as process 131.  Stop it first.
pdsh@hadoop: hadoop1: ssh exited with exit code 1
Starting secondary namenodes [hadoop]
hadoop: secondarynamenode is running as process 518.  Stop it first.
pdsh@hadoop: hadoop: ssh exited with exit code 1
Starting resourcemanager
resourcemanager is running as process 780.  Stop it first.
Starting nodemanagers
hadoop1: nodemanager is running as process 239.  Stop it first.
pdsh@hadoop: hadoop1: ssh exited with exit code 1
hadoop3: nodemanager is running as process 239.  Stop it first.
pdsh@hadoop: hadoop3: ssh exited with exit code 1
hadoop2: nodemanager is running as process 239.  Stop it first.
pdsh@hadoop: hadoop2: ssh exited with exit code 1
timelineserve

In [18]:
%%bash

# Listing all processes
for HOST in hadoop hadoop1 hadoop2 hadoop3; do
    echo $HOST
    docker exec $HOST jps
done

hadoop
1218 JobHistoryServer
518 SecondaryNameNode
780 ResourceManager
1148 ApplicationHistoryServer
2253 Jps
1599 NameNode
hadoop1
131 DataNode
538 Jps
239 NodeManager
hadoop2
131 DataNode
539 Jps
239 NodeManager
hadoop3
131 DataNode
538 Jps
239 NodeManager


## Create HDFS directories

In [20]:
%%dockerexec hadoop
source /opt/envvars.sh

hdfs dfs -mkdir -p /user/hadoop
hdfs dfs -chown hadoop:hadoop /user/hadoop
hdfs dfs -mkdir /tmp
hdfs dfs -chmod 777 /tmp

mkdir: `/tmp': File exists


## Install and run Jupyter on master node

In [22]:
%%dockerexec hadoop

# pip3 -q install notebook
pip3 -q install jupyterlab

IP=$(ifconfig eth0 | grep inet | awk '{ print $2 }')

cd /opt

export SHELL=/bin/bash
# nohup /home/hadoop/.local/bin/jupyter-notebook --ip=$IP --port=8080 --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.notebook_dir='/' --no-browser &
nohup /home/hadoop/.local/bin/jupyter-lab --ip=$IP --port=8080 --notebook-dir='/' --ServerApp.token='' --ServerApp.password='' --no-browser &

echo $! > /tmp/jupyter.pid

# To kill
# kill $(cat /tmp/jupyter.pid)

[I 2021-06-27 19:41:49.287 ServerApp] jupyterlab | extension was successfully linked.
[W 2021-06-27 19:41:49.306 ServerApp] All authentication is disabled.  Anyone who can connect to this server will be able to run code.
[I 2021-06-27 19:41:49.308 LabApp] JupyterLab extension loaded from /home/hadoop/.local/lib/python3.6/site-packages/jupyterlab
[I 2021-06-27 19:41:49.308 LabApp] JupyterLab application directory is /home/hadoop/.local/share/jupyter/lab
[I 2021-06-27 19:41:49.313 ServerApp] jupyterlab | extension was successfully loaded.
[I 2021-06-27 19:41:49.313 ServerApp] Serving notebooks from local directory: /
[I 2021-06-27 19:41:49.313 ServerApp] Jupyter Server 1.9.0 is running at:
[I 2021-06-27 19:41:49.313 ServerApp] http://172.17.0.2:8080/lab
[I 2021-06-27 19:41:49.313 ServerApp]  or http://127.0.0.1:8080/lab
[I 2021-06-27 19:41:49.313 ServerApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).


## Access web interfaces

- Jupyterlab
    - http://localhost:8080
- Master - hadoop
    - Resource Manager: http://localhost:8088
    - NameNode: http://localhost:9870
    - Secondary NameNode: http://localhost:9868
    - MapReduce Job History: http://localhost:19888
    - Timeline Service: http://localhost:8188
- Workers
    - hadoop1
        - NodeManager: http://localhost:8042
        - DataNode: http://localhost:9864
    - hadoop2
        - NodeManager: http://localhost:8043
        - DataNode: http://localhost:9865
    - hadoop3
        - NodeManager: http://localhost:8044
        - DataNode: http://localhost:9866

## Run mapreduce Pi example

In [43]:
%%dockerexec hadoop
source /opt/envvars.sh
cd /opt/hadoop/share/hadoop/mapreduce

hadoop jar ./hadoop-mapreduce-examples-3.2.2.jar pi 6 10000

Number of Maps  = 6
Samples per Map = 10000
Wrote input for Map #0
Wrote input for Map #1
Wrote input for Map #2
Wrote input for Map #3
Wrote input for Map #4
Wrote input for Map #5
Starting Job
2021-06-26 02:24:45,818 INFO client.RMProxy: Connecting to ResourceManager at hadoop/172.17.0.2:8032
2021-06-26 02:24:45,958 INFO client.AHSProxy: Connecting to Application History server at hadoop/172.17.0.2:10200
2021-06-26 02:24:46,095 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/hadoop/.staging/job_1624673771620_0001
2021-06-26 02:24:46,246 INFO input.FileInputFormat: Total input files to process : 6
2021-06-26 02:24:46,375 INFO mapreduce.JobSubmitter: number of splits:6
2021-06-26 02:24:46,534 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1624673771620_0001
2021-06-26 02:24:46,535 INFO mapreduce.JobSubmitter: Executing with tokens: []
2021-06-26 02:24:46,690 INFO conf.Configuration: resource-types.xml not found
2021-06-26 02:

# SHUTDOWN PROCEDURE

## Stop Jupyter

In [None]:
%%dockerexec hadoop

kill $(cat /tmp/jupyter.pid)

## Stop Hadoop daemons

In [None]:
%%dockerexec hadoop
source /opt/envvars.sh

stop-dfs.sh
stop-yarn.sh
yarn --daemon stop timelineserver
mapred --daemon stop historyserver

## Stop Docker containers

In [None]:
%%bash

for HOST in hadoop hadoop1 hadoop2 hadoop3; do
    docker stop $HOST
done

docker ps