Skip to content

Commit

Permalink
fix(emr): set up jupyterlab after emr cluster bootstrap (#13691)
Browse files Browse the repository at this point in the history
  • Loading branch information
hongbo-miao committed Jan 7, 2024
1 parent 234792d commit 0aef135
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,3 @@ sudo curl --silent --fail --show-error --location --remote-name --output-dir /us
sudo curl --silent --fail --show-error --location --remote-name --output-dir /usr/lib/spark/jars/ https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/1.5.0-28.2/geotools-wrapper-1.5.0-28.2.jar
"/usr/local/python${PYTHON_VERSION}/bin/python${PYTHON_VERSION%.*}" -m pip install \
apache-sedona[spark]==1.5.0

echo "# Install JupyterLab-scoped dependencies"
sudo /emr/notebook-env/bin/conda create --name="python${PYTHON_VERSION}" python=${PYTHON_VERSION} --yes
sudo "/emr/notebook-env/envs/python${PYTHON_VERSION}/bin/python" -m pip install \
apache-sedona[spark]==1.5.0 \
attrs==23.1.0 \
descartes==1.1.0 \
ipykernel==6.28.0 \
matplotlib==3.8.2 \
pandas==2.1.4 \
shapely==2.0.2

echo "# Add JupyterLab kernel"
sudo "/emr/notebook-env/envs/python${PYTHON_VERSION}/bin/python" -m ipykernel install --name="python${PYTHON_VERSION}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -e

echo "# Install JupyterLab-scoped dependencies"
PYTHON_VERSION=3.11.7
sudo /emr/notebook-env/bin/conda create --name="python${PYTHON_VERSION}" python=${PYTHON_VERSION} --yes
sudo "/emr/notebook-env/envs/python${PYTHON_VERSION}/bin/python" -m pip install \
apache-sedona[spark]==1.5.0 \
attrs==23.1.0 \
descartes==1.1.0 \
ipykernel==6.28.0 \
matplotlib==3.8.2 \
pandas==2.1.4 \
shapely==2.0.2

echo "# Add JupyterLab kernel"
sudo "/emr/notebook-env/envs/python${PYTHON_VERSION}/bin/python" -m ipykernel install --name="python${PYTHON_VERSION}"
18 changes: 18 additions & 0 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ module "hm_sedona_s3_validate_python_version_script" {
amazon_s3_key = "amazon-emr/clusters/hm-amazon-emr-cluster-sedona/steps/validate_python_version.py"
local_file_path = "./data/amazon-emr/hm-amazon-emr-cluster-sedona/steps/validate_python_version.py"
}
module "hm_sedona_s3_set_up_jupyterlab_script" {
source = "./modules/hm_amazon_s3_object"
amazon_s3_bucket = "hongbomiao-bucket"
amazon_s3_key = "amazon-emr/clusters/hm-amazon-emr-cluster-sedona/steps/set_up_jupyterlab.sh"
local_file_path = "./data/amazon-emr/hm-amazon-emr-cluster-sedona/steps/set_up_jupyterlab.sh"
}
module "hm_sedona_emr" {
source = "./modules/hm_amazon_emr_cluster"
amazon_emr_cluster_name = "hm-sedona"
Expand All @@ -137,6 +143,18 @@ module "hm_sedona_emr" {
properties = {}
}
]
},
{
name = "Set Up JupyterLab"
action_on_failure = "CONTINUE"
hadoop_jar_step = [
{
jar = "s3://us-west-2.elasticmapreduce/libs/script-runner/script-runner.jar"
args = ["bash", "--deploy-mode", "client", module.hm_sedona_s3_set_up_jupyterlab_script.uri]
main_class = ""
properties = {}
}
]
}
]
configurations_json_string = <<EOF
Expand Down

0 comments on commit 0aef135

Please sign in to comment.