diff --git a/rudra/deployments/emr_scripts/maven_emr.py b/rudra/deployments/emr_scripts/maven_emr.py index 56aa0ca..d744231 100644 --- a/rudra/deployments/emr_scripts/maven_emr.py +++ b/rudra/deployments/emr_scripts/maven_emr.py @@ -26,7 +26,7 @@ def run_job(self, input_dict): bucket=self.bucket_name, log_file=log_file_name) - logger.info("Logs are gonna store at {}".format(log_uri)) + logger.info("Logs will be stored at {}".format(log_uri)) emr_config_obj = EMRConfig(name=name, s3_bootstrap_uri=bootstrap_uri, @@ -37,6 +37,8 @@ def run_job(self, input_dict): hyper_params=self.hyper_params) configs = emr_config_obj.get_config() + configs["Applications"] = [] + logger.info("Configurations for Maven EMR are: {}".format(configs)) status = self.aws_emr.run_flow(configs) logger.info("EMR job is running {}".format(status)) status_code = status.get('ResponseMetadata', {}).get('HTTPStatusCode') diff --git a/scripts/bootstrap_maven.sh b/scripts/bootstrap_maven.sh new file mode 100644 index 0000000..2e10d9f --- /dev/null +++ b/scripts/bootstrap_maven.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +set -e -x + +# enable debugging & set strict error trap +sudo yum install -y zip gcc-c++ git python36-pip python36-requests httpd httpd-devel python36-devel wget git +sudo pip install --upgrade pip +sudo python3.6 -m pip install pandas +sudo python3.6 -m pip install Cython==0.29.1 +sudo python3.6 -m pip install hpfrec==0.2.2.9 +sudo python3.6 -m pip install git+https://github.com/fabric8-analytics/fabric8-analytics-rudra + +# Now set the PYTHONPATH +export PYTHONPATH='/home/hadoop'