Skip to content

Commit

Permalink
[docker] docker dev environment
Browse files Browse the repository at this point in the history
  • Loading branch information
nhandyal committed Feb 21, 2021
1 parent 53f5474 commit 801f1ae
Show file tree
Hide file tree
Showing 7 changed files with 280 additions and 5 deletions.
116 changes: 116 additions & 0 deletions sparkler-core/bin/dockler.dev.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python3

import argparse
import os
import subprocess
import sys
from typing import Any


REPO_ROOT: str = os.path.realpath(os.path.join(__file__, '..', '..', '..'))


def main() -> None:
parser = argparse.ArgumentParser(
description="CLI to manage docker containers for development",
)

parser.add_argument(
"--build",
help="Builds a dev image",
default=False,
action="store_true",
)

parser.add_argument(
"--run",
help="Run a dev container",
default=False,
action="store_true",
)

parser.add_argument(
"--login",
help="Login to a running dev container",
default=False,
action="store_true",
)

parser.add_argument(
"--clean",
help="Remove all dev images and containers",
default=False,
action="store_true",
)

parsed_args = parser.parse_args()

if parsed_args.clean:
clean()
if parsed_args.build:
build()
if parsed_args.run:
run_container()
if parsed_args.login:
login()


def build() -> None:
cmd = " ".join([
"docker exec -it sparkler-dev 'cd sparkler-core'"
])


def build() -> None:
cmd = " ".join([
"docker build",
"--tag sparkler-dev:latest",
"--file sparkler-core/sparkler-deployment/docker/Dockerfile.dev",
"sparkler-core",
])

_shell_exec_check_output(cmd, cwd = REPO_ROOT)


def run_container() -> None:
cmd = " ".join([
"docker run",
"--detach",
"--name sparkler-dev",
f"--volume '{REPO_ROOT}/sparkler-core:/data/sparkler-core'",
"--publish 8983:8983",
"--publish 4041:4041",
"sparkler-dev:latest",
])

_shell_exec_check_output(cmd, cwd = REPO_ROOT)
print("Container running. Login via 'docker exec -it sparkler-dev /bin/bash'")


def login() -> None:
print("docker exec -it sparkler-dev /bin/bash'")


def clean() -> None:
cmd = " ".join([
"docker container stop sparkler-dev;",
"docker container rm sparkler-dev;",
"docker image rm sparkler-dev:latest",
])

_shell_exec_check_output(cmd, cwd = REPO_ROOT)


def _eprint(msg: str) -> None:
sys.stderr.write("%s\n" % msg)


def _shell_exec_check_output(cmd: str, **kwargs: Any) -> None:
_eprint(f"Exec: {cmd}")
result = subprocess.run(cmd, shell=True, **kwargs)
if result.returncode != 0:
sys.exit(result.returncode)


if __name__ == "__main__":
main()
16 changes: 11 additions & 5 deletions sparkler-core/bin/sparkler.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
#!/usr/bin/env bash

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
DIR="$DIR/.."
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
SPARKLER_CORE_DIR="$DIR/.."

JAR=`echo $DIR/sparkler-app-*-SNAPSHOT.jar`
JAR=`echo $SPARKLER_CORE_DIR/build/sparkler-app-*-SNAPSHOT.jar`
if [ ! -f "$JAR" ]
then
echo "ERROR: Can't find Sparkler Jar at $JAR.
Looks like the jar is not built. Please refer to build instructions"
Looks like the jar is not built. Please refer to build instructions. Or see ./dockler.sh"
exit 2
fi

# run
# -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
java -Xms1g -cp $DIR/conf:$JAR -Dpf4j.pluginsDir=$DIR/plugins edu.usc.irds.sparkler.Main $@
java -Xms1g -cp $DIR/conf:$JAR -Dpf4j.pluginsDir=$SPARKLER_CORE_DIR/build/plugins edu.usc.irds.sparkler.Main $@
11 changes: 11 additions & 0 deletions sparkler-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@
<plugin>
<artifactId>maven-release-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>${maven.clean.plugin.version}</version>
<configuration>
<filesets>
<fileset>
<directory>build</directory>
</fileset>
</filesets>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
Expand Down
92 changes: 92 additions & 0 deletions sparkler-core/sparkler-deployment/docker/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Pull base image.

# NOTE: always run docker build command from root of sparkler project
# Build command:
# docker build -t sparkler-dev:latest -f sparkler-deployment/docker/Dockerfile.dev sparkler-core

FROM openjdk:13

# Setting Maven Version that needs to be installed
ARG MAVEN_VERSION=3.6.3

ENV MAVEN_VERSION=${MAVEN_VERSION}
ENV M2_HOME /usr/share/maven
ENV maven.home $M2_HOME
ENV M2 $M2_HOME/bin
ENV PATH $M2:$PATH

# Install required tools
# which: otherwise 'mvn version' prints '/usr/share/maven/bin/mvn: line 93: which: command not found'
RUN yum update -y && \
yum install -y which wget procps lsof git vim && \
yum clean all

# configure root user
RUN ["usermod", "-p", "$6$W8SF/w7v$xVsCcv9ZLrpm/QvzojWDYFOrfaQiZrXOcfC.PhU2k0tWRzY41glUHixNkzuPx399k9lueK.Fi8RyBzw5F6Jnu0", "root"]

# Define working directory.
WORKDIR /data

# Maven
RUN curl -fsSL https://archive.apache.org/dist/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.tar.gz | tar xzf - -C /usr/share \
&& mv /usr/share/apache-maven-$MAVEN_VERSION /usr/share/maven \
&& ln -s /usr/share/maven/bin/mvn /usr/bin/mvn

## Setup Solr
RUN wget http://archive.apache.org/dist/lucene/solr/8.5.0/solr-8.5.0.tgz -O /data/solr.tgz && \
cd /data/ && tar xzf /data/solr.tgz && \
mv /data/solr-* /data/solr && rm /data/solr.tgz

# Setup Solr
# This is done in start.dev.sh on container start
#
# RUN wget http://archive.apache.org/dist/lucene/solr/8.5.0/solr-8.5.0.tgz -O /data/solr.tgz && \
# cd /data/ && tar xzf /data/solr.tgz && \
# mv /data/solr-* /data/solr && rm /data/solr.tgz

# add sparkler contents
# Once the container is running, call mvn build to generate the build directory
#
# ADD ./build /data/sparkler

# sparkler ui with banana dashboard
# when building locally, the war is located in
RUN ["ln", "-s", "/data/sparkler-core/sparkler-ui/sparkler-dashboard/sparkler-ui.war", "/data/solr/server/solr-webapp/sparkler"]
RUN ["rm", "-f", "/data/solr/server/etc/jetty.xml"]
RUN ["ln", "-s", "/data/sparkler-core/conf/solr/sparkler-jetty-context.xml", "/data/solr/server/contexts/sparkler-jetty-context.xml"]

## Patch Solr's Jetty for Banana with the CSP directive header
RUN ["ln", "-s", "/data/sparkler-core/sparkler-deployment/docker/jetty-csp-patch/jetty.xml", "/data/solr/server/etc/jetty.xml"]

# configure start script
COPY sparkler-deployment/docker/scripts/start.dev.sh /data/start.dev.sh
RUN ["chmod", "+x", "/data/start.dev.sh"]

# add solr and sparkler to path
RUN ["ln", "-s", "/data/solr/bin/solr", "/usr/bin/solr"]
RUN ["ln", "-s", "/data/sparkler-core/bin/sparkler.sh", "/usr/bin/sparkler"]

# configure welcome message
COPY sparkler-deployment/docker/scripts/greeting.dev.sh /usr/local/bin/greeting.dev.sh
RUN chmod +x /usr/local/bin/greeting.dev.sh
RUN echo "sh /usr/local/bin/greeting.dev.sh" >> /root/.bashrc

# Define default command
CMD ["/data/start.dev.sh"]
37 changes: 37 additions & 0 deletions sparkler-core/sparkler-deployment/docker/scripts/greeting.dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash

cat << EOF
_____ _ _
/ ____| | | | |
| (___ _ __ __ _ _ __| | _| | ___ _ __
\___ \| '_ \ / _` | '__| |/ / |/ _ \ '__|
____) | |_) | (_| | | | <| | __/ |
|_____/| .__/ \__,_|_| |_|\_\_|\___|_|
| |
|_|
You can access solr at http://localhost:8983/solr when solr is running
You can spark master UI at http://localhost:4041 when spark master is running
Some useful queries:
- Get stats on groups, status, depth:
http://localhost:8983/solr/crawldb/query?q=*:*&rows=0&facet=true&&facet.field=crawl_id&facet.field=status&facet.field=group&facet.field=discover_depth
Inside docker, you can do the following:
solr - command line tool for administering solr
start -force -> start solr
stop -force -> stop solr
status -force -> get status of solr
restart -force -> restart solr
sparkler - command line interface to sparkler
inject - inject seed urls
crawl - launch a crawl job
build sparkler
cd sparkler-core && mvn install
EOF
12 changes: 12 additions & 0 deletions sparkler-core/sparkler-deployment/docker/scripts/start.dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# file is running as root

# create solr core
/data/solr/bin/solr start -force && \
/data/solr/bin/solr create_core -force -c crawldb -d /data/sparkler-core/build/conf/solr/crawldb/ && \
/data/solr/bin/solr stop -force

/data/solr/bin/solr start -force

tail -f '/dev/null'
1 change: 1 addition & 0 deletions sparkler-core/sparkler-ui/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
</dependencies>

<build>
<finalName>${project.artifactId}</finalName>
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
Expand Down

0 comments on commit 801f1ae

Please sign in to comment.