Skip to content

Commit

Permalink
Update for PennState WARCshop
Browse files Browse the repository at this point in the history
* Update to Ubuntu 16.04
* Update Spark and Hadoop
* Update Spark Notebook
* Update warcbase
* Update documentation and lesson plan
* Update README
  • Loading branch information
ruebot committed Apr 27, 2017
1 parent 00e53dd commit 1231822
Show file tree
Hide file tree
Showing 7 changed files with 279 additions and 104 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
downloads
scripts/custom.sh
package.box
ubuntu-xenial-16.04-cloudimg-console.log
166 changes: 121 additions & 45 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.hostname = "warcbase"

# Every Vagrant virtual environment requires a box to build off of.
config.vm.box = "ubuntu/trusty64"
config.vm.box = "ubuntu/xenial64"

config.vm.network :forwarded_port, guest: 9000, host: 9000 # Spark Notebook

Expand Down
2 changes: 1 addition & 1 deletion coursework/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# IIPC General Assembly 2016, Workshop

Ian Milligan (Waterloo) and Nick Ruest (York) will be leading this workshop through warcbase at IIPC GA 2016. Our lesson can be [found here](https://github.com/web-archive-group/warcbase_workshop_vagrant/blob/master/coursework/lessonplan.md).
Ian Milligan (Waterloo) and Nick Ruest (York) will be leading this workshop through warcbase at IIPC GA 2016. Our lesson can be [found here](https://github.com/web-archive-group/warcbase_workshop_vagrant/blob/master/coursework/lessonplan.md).
175 changes: 134 additions & 41 deletions coursework/lessonplan.md

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion scripts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ sudo echo "LANGUAGE=en_US.UTF-8" >> /etc/environment
sudo echo "LC_ALL=en_US.UTF-8" >> /etc/environment
sudo echo "LC_CTYPE=en_US.UTF-8" >> /etc/environment

#######################################################################
# Work around for https://bugs.launchpad.net/cloud-images/+bug/1569237
echo "ubuntu:ubuntu" | chpasswd
#######################################################################

# Update
apt-get -y update && apt-get -y upgrade

Expand Down Expand Up @@ -49,4 +54,4 @@ MAN_FILES=$(wget -qO- "http://sourceforge.net/projects/zsh/files/zsh/5.0.2/zsh-5
for MAN_FILE in $MAN_FILES; do gzip /usr/share/man/man1/"${MAN_FILE##*/}"; done

# More helpful packages
apt-get -y install htop tree zsh
apt-get -y install htop tree zsh unzip
30 changes: 15 additions & 15 deletions scripts/warcbase.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
#/bin/bash

# warcbase
cd /home/vagrant
cd /home/ubuntu
mkdir project

# Apache Spark
cd /home/vagrant/project
wget http://d3kbcqa49mib13.cloudfront.net/spark-1.5.1-bin-hadoop2.6.tgz
tar -xvf spark-1.5.1-bin-hadoop2.6.tgz
rm spark-1.5.1-bin-hadoop2.6.tgz
cd /home/ubuntu/project
wget "http://d3kbcqa49mib13.cloudfront.net/spark-1.6.1-bin-hadoop2.6.tgz"
tar -xvf spark-1.6.1-bin-hadoop2.6.tgz
rm spark-1.6.1-bin-hadoop2.6.tgz

# Spark Notebook
cd /home/vagrant/project
wget https://s3.eu-central-1.amazonaws.com/spark-notebook/tgz/spark-notebook-master-scala-2.10.4-spark-1.5.1-hadoop-2.6.0-cdh5.4.2.tgz
tar -xvf spark-notebook-master-scala-2.10.4-spark-1.5.1-hadoop-2.6.0-cdh5.4.2.tgz
rm spark-notebook-master-scala-2.10.4-spark-1.5.1-hadoop-2.6.0-cdh5.4.2.tgz
cd /home/ubuntu/project
wget "https://s3.eu-central-1.amazonaws.com/spark-notebook/zip/spark-notebook-0.6.3-scala-2.10.5-spark-1.6.1-hadoop-2.6.0.zip"
unzip spark-notebook-0.6.3-scala-2.10.5-spark-1.6.1-hadoop-2.6.0.zip
rm spark-notebook-0.6.3-scala-2.10.5-spark-1.6.1-hadoop-2.6.0.zip

# warcbase dependencies (vagrant isn't playing nice with maven, or I don't have paths setup right)
cd /tmp
Expand All @@ -34,9 +34,9 @@ wget http://central.maven.org/maven2/org/apache/commons/commons-compress/1.9/com
wget http://central.maven.org/maven2/org/apache/commons/commons-compress/1.9/commons-compress-1.9.pom

# warcbase
cd /home/vagrant/project
cd /home/ubuntu/project
git clone http://github.com/lintool/warcbase.git
cd /home/vagrant/project/warcbase
cd /home/ubuntu/project/warcbase
mvn install:install-file -Dfile=/usr/share/java/bsh-2.0b4.jar -DpomFile=/usr/share/maven-repo/org/beanshell/bsh/2.0b4/bsh-2.0b4.pom
mvn install:install-file -Dfile=/usr/share/java/commons-cli-1.2.jar -DpomFile=/usr/share/maven-repo/commons-cli/commons-cli/1.2/commons-cli-1.2.pom
mvn install:install-file -Dfile=/tmp/commons-logging-api-1.1.jar -DpomFile=/tmp/commons-logging-api-1.1.pom
Expand All @@ -46,12 +46,12 @@ mvn install:install-file -Dfile=/tmp/commons-lang-2.6.jar -DpomFile=/tmp/commons
mvn install:install-file -Dfile=/tmp/commons-collections-3.2.1.jar -DpomFile=/tmp/commons-collections-3.2.1.pom
mvn install:install-file -Dfile=/tmp/hamcrest-core-1.3.jar -DpomFile=/tmp/hamcrest-core-1.3.pom
mvn install:install-file -Dfile=/tmp/commons-compress-1.9.jar -DpomFile=/tmp/commons-compress-1.9.pom
mvn clean package appassembler:assemble -DskipTests
mvn clean package -pl warcbase-core -DskipTests

# sample files
cd /home/vagrant/project
cd /home/ubuntu/project
git clone https://github.com/lintool/warcbase-resources.git

# make sure permissions are fine
cd /home/vagrant
chown -hR vagrant:vagrant *
cd /home/ubuntu
chown -hR ubuntu:ubuntu *

0 comments on commit 1231822

Please sign in to comment.