diff --git a/Makefile b/Makefile
index 7a65bc7..f7e5b5a 100644
--- a/Makefile
+++ b/Makefile
@@ -7,10 +7,10 @@ SHELL          := /bin/sh
 
 # Set variables if testing locally
 ifeq ($(IS_RELEASE_BUILD),)
-    SPARK_VERSION := 2.4
+    SPARK_VERSION := 3.0
     PROCESSOR := cpu
     FRAMEWORK_VERSION := py37
-    SM_VERSION := 0.1
+    SM_VERSION := 1.0
     USE_CASE := processing
     BUILD_CONTEXT := ./spark/${USE_CASE}/${SPARK_VERSION}/py3
     AWS_PARTITION := aws
@@ -84,8 +84,8 @@ test-sagemaker: install-sdk build-tests
 	# History server tests can't run in parallel since they use the same container name.
 	pytest -s -vv test/integration/history \
 	--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
-	--spark-version=$(SPARK_VERSION)
-	--framework_version=$(FRAMEWORK_VERSION) \
+	--spark-version=$(SPARK_VERSION) \
+	--framework-version=$(FRAMEWORK_VERSION) \
 	--role $(ROLE) \
 	--image_uri $(IMAGE_URI) \
 	--region ${REGION} \
@@ -93,9 +93,10 @@ test-sagemaker: install-sdk build-tests
 	# OBJC_DISABLE_INITIALIZE_FORK_SAFETY: https://github.com/ansible/ansible/issues/32499#issuecomment-341578864
 	OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES pytest --workers auto -s -vv test/integration/sagemaker \
 	--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
-	--spark-version=$(SPARK_VERSION)
-	--framework_version=$(FRAMEWORK_VERSION) \
+	--spark-version=$(SPARK_VERSION) \
+	--framework-version=$(FRAMEWORK_VERSION) \
 	--role $(ROLE) \
+	--account-id ${INTEG_TEST_ACCOUNT} \
 	--image_uri $(IMAGE_URI) \
 	--region ${REGION} \
 	--domain ${AWS_DOMAIN}
@@ -104,8 +105,8 @@ test-sagemaker: install-sdk build-tests
 test-prod:
 	pytest -s -vv test/integration/tag \
 	--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
-	--spark-version=$(SPARK_VERSION)
-	--framework_version=$(FRAMEWORK_VERSION) \
+	--spark-version=$(SPARK_VERSION) \
+	--framework-version=$(FRAMEWORK_VERSION) \
 	--role $(ROLE) \
 	--image_uri $(IMAGE_URI) \
 	--region ${REGION} \
diff --git a/new_images.yml b/new_images.yml
index cdf8822..c5dcdda 100644
--- a/new_images.yml
+++ b/new_images.yml
@@ -1,6 +1,6 @@
 ---
 new_images:
-  - spark: "2.4.4"
+  - spark: "3.0.0"
     use-case: "processing"
     processors: ["cpu"]
     python: ["py37"]
diff --git a/setup.py b/setup.py
index 5ab6b7a..24cb4ee 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@
     ],
     setup_requires=["setuptools", "wheel"],
     # Be frugal when adding dependencies. Prefer Python's standard library.
-    install_requires = install_reqs,
+    install_requires=install_reqs,
 
     extras_require={
         "test": test_install_reqs,
diff --git a/spark/processing/3.0/py3/container-bootstrap-config/bootstrap.sh b/spark/processing/3.0/py3/container-bootstrap-config/bootstrap.sh
new file mode 100644
index 0000000..e4e23bb
--- /dev/null
+++ b/spark/processing/3.0/py3/container-bootstrap-config/bootstrap.sh
@@ -0,0 +1 @@
+echo "Not implemented"
\ No newline at end of file
diff --git a/spark/processing/3.0/py3/docker/Dockerfile.cpu b/spark/processing/3.0/py3/docker/Dockerfile.cpu
new file mode 100644
index 0000000..a61b4f4
--- /dev/null
+++ b/spark/processing/3.0/py3/docker/Dockerfile.cpu
@@ -0,0 +1,100 @@
+FROM amazonlinux:2
+ARG REGION
+ENV AWS_REGION ${REGION}
+RUN yum clean all
+RUN yum update -y
+RUN yum install -y awscli bigtop-utils curl gcc gzip unzip python3 python3-setuptools python3-pip python-devel python3-devel python-psutil gunzip tar wget liblapack* libblas* libopencv* libopenblas*
+
+# install nginx amazonlinux:2.0.20200304.0 does not have nginx, so need to install epel-release first
+RUN wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+RUN yum install -y epel-release-latest-7.noarch.rpm
+RUN yum install -y nginx
+
+RUN rm -rf /var/cache/yum
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# http://blog.stuart.axelbrooke.com/python-3-on-spark-return-of-the-pythonhashseed
+ENV PYTHONHASHSEED 0
+ENV PYTHONIOENCODING UTF-8
+ENV PIP_DISABLE_PIP_VERSION_CHECK 1
+
+# Install EMR Spark/Hadoop
+ENV HADOOP_HOME /usr/lib/hadoop
+ENV HADOOP_CONF_DIR /usr/lib/hadoop/etc/hadoop
+ENV SPARK_HOME /usr/lib/spark
+
+COPY yum/emr-apps.repo /etc/yum.repos.d/emr-apps.repo
+
+# Install hadoop / spark dependencies from EMR's yum repository for Spark optimizations.
+# replace placeholder with region in repository URL
+RUN sed -i "s/REGION/${AWS_REGION}/g" /etc/yum.repos.d/emr-apps.repo
+
+# These packages are a subset of what EMR installs in a cluster with the
+# "hadoop", "spark", and "hive" applications.
+# They include EMR-optimized libraries and extras.
+RUN yum install -y aws-hm-client \
+    aws-java-sdk \
+    aws-sagemaker-spark-sdk \
+    emr-goodies \
+    emr-scripts \
+    emr-s3-select \
+    emrfs \
+    hadoop \
+    hadoop-client \
+    hadoop-hdfs \
+    hadoop-hdfs-datanode \
+    hadoop-hdfs-namenode \
+    hadoop-httpfs \
+    hadoop-kms \
+    hadoop-lzo \
+    hadoop-yarn \
+    hadoop-yarn-nodemanager \
+    hadoop-yarn-proxyserver \
+    hadoop-yarn-resourcemanager \
+    hadoop-yarn-timelineserver \
+    hive \
+    hive-hcatalog \
+    hive-hcatalog-server \
+    hive-jdbc \
+    hive-server2 \
+    python37-numpy \
+    python37-sagemaker_pyspark \
+    s3-dist-cp \
+    spark-core \
+    spark-datanucleus \
+    spark-external \
+    spark-history-server \
+    spark-python
+
+
+# Point Spark at proper python binary
+ENV PYSPARK_PYTHON=/usr/bin/python3
+
+# Setup Spark/Yarn/HDFS user as root
+ENV PATH="/usr/bin:/opt/program:${PATH}"
+ENV YARN_RESOURCEMANAGER_USER="root"
+ENV YARN_NODEMANAGER_USER="root"
+ENV HDFS_NAMENODE_USER="root"
+ENV HDFS_DATANODE_USER="root"
+ENV HDFS_SECONDARYNAMENODE_USER="root"
+
+# Set up bootstrapping program and Spark configuration
+COPY *.whl /opt/program/
+RUN /usr/bin/python3 -m pip install /opt/program/*.whl
+COPY hadoop-config /opt/hadoop-config
+COPY nginx-config /opt/nginx-config
+COPY aws-config /opt/aws-config
+
+# Setup container bootstrapper
+COPY container-bootstrap-config /opt/container-bootstrap-config
+RUN chmod +x /opt/container-bootstrap-config/bootstrap.sh
+RUN /opt/container-bootstrap-config/bootstrap.sh
+
+# With this config, spark history server will not run as daemon, otherwise there
+# will be no server running and container will terminate immediately
+ENV SPARK_NO_DAEMONIZE TRUE
+
+WORKDIR $SPARK_HOME
+
+ENTRYPOINT ["smspark-submit"]
diff --git a/spark/processing/3.0/py3/hadoop-config/core-site.xml b/spark/processing/3.0/py3/hadoop-config/core-site.xml
new file mode 100644
index 0000000..52db7b2
--- /dev/null
+++ b/spark/processing/3.0/py3/hadoop-config/core-site.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ <!-- Put site-specific property overrides in this file. -->
+
+ <configuration>
+     <property>
+         <name>fs.defaultFS</name>
+         <value>hdfs://nn_uri/</value>
+         <description>NameNode URI</description>
+     </property>
+     <property>
+         <name>fs.s3a.aws.credentials.provider</name>
+         <value>com.amazonaws.auth.DefaultAWSCredentialsProviderChain</value>
+         <description>AWS S3 credential provider</description>
+     </property>
+     <property>
+         <name>fs.s3.impl</name>
+         <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
+         <description>s3a filesystem implementation</description>
+     </property>
+     <property>
+         <name>fs.AbstractFileSystem.s3a.imp</name>
+         <value>org.apache.hadoop.fs.s3a.S3A</value>
+         <description>s3a filesystem implementation</description>
+     </property>
+ </configuration>
diff --git a/spark/processing/3.0/py3/hadoop-config/hdfs-site.xml b/spark/processing/3.0/py3/hadoop-config/hdfs-site.xml
new file mode 100644
index 0000000..6ccfb8f
--- /dev/null
+++ b/spark/processing/3.0/py3/hadoop-config/hdfs-site.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ <!-- Put site-specific property overrides in this file. -->
+
+ <configuration>
+     <property>
+         <name>dfs.datanode.data.dir</name>
+         <value>file:///opt/amazon/hadoop/hdfs/datanode</value>
+         <description>Comma separated list of paths on the local filesystem of a DataNode where it should store its\
+  blocks.</description>
+     </property>
+
+     <property>
+         <name>dfs.namenode.name.dir</name>
+         <value>file:///opt/amazon/hadoop/hdfs/namenode</value>
+         <description>Path on the local filesystem where the NameNode stores the namespace and transaction logs per\
+ sistently.</description>
+     </property>
+ </configuration>
diff --git a/spark/processing/3.0/py3/hadoop-config/spark-defaults.conf b/spark/processing/3.0/py3/hadoop-config/spark-defaults.conf
new file mode 100644
index 0000000..c1f1c17
--- /dev/null
+++ b/spark/processing/3.0/py3/hadoop-config/spark-defaults.conf
@@ -0,0 +1,6 @@
+spark.driver.extraClassPath      /usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar
+spark.driver.extraLibraryPath    /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
+spark.executor.extraClassPath    /usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar
+spark.executor.extraLibraryPath  /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
+spark.driver.host=sd_host
+spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2
diff --git a/spark/processing/3.0/py3/hadoop-config/spark-env.sh b/spark/processing/3.0/py3/hadoop-config/spark-env.sh
new file mode 100644
index 0000000..1b58aa1
--- /dev/null
+++ b/spark/processing/3.0/py3/hadoop-config/spark-env.sh
@@ -0,0 +1,3 @@
+#EMPTY FILE AVOID OVERRIDDING ENV VARS
+# Specifically, without copying the empty file, SPARK_HISTORY_OPTS will be overriden, 
+# spark.history.ui.port defaults to 18082, and spark.eventLog.dir defaults to local fs
diff --git a/spark/processing/3.0/py3/hadoop-config/yarn-site.xml b/spark/processing/3.0/py3/hadoop-config/yarn-site.xml
new file mode 100644
index 0000000..3790582
--- /dev/null
+++ b/spark/processing/3.0/py3/hadoop-config/yarn-site.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<!-- Site specific YARN configuration properties -->
+ <configuration>
+     <property>
+         <name>yarn.resourcemanager.hostname</name>
+         <value>rm_hostname</value>
+         <description>The hostname of the RM.</description>
+     </property>
+     <property>
+         <name>yarn.nodemanager.hostname</name>
+         <value>nm_hostname</value>
+         <description>The hostname of the NM.</description>
+     </property>
+     <property>
+         <name>yarn.nodemanager.webapp.address</name>
+         <value>nm_webapp_address</value>
+     </property>
+     <property>
+         <name>yarn.nodemanager.vmem-pmem-ratio</name>
+         <value>5</value>
+         <description>Ratio between virtual memory to physical memory.</description>
+     </property>
+     <property>
+         <name>yarn.resourcemanager.am.max-attempts</name>
+         <value>1</value>
+         <description>The maximum number of application attempts.</description>
+     </property>
+     <property>
+         <name>yarn.nodemanager.env-whitelist</name>
+         <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME,AWS_CONTAINER_CREDENTIALS_RELATIVE_URI</value>
+         <description>Environment variable whitelist</description>
+     </property>
+
+ </configuration>
diff --git a/spark/processing/3.0/py3/nginx-config/default.conf b/spark/processing/3.0/py3/nginx-config/default.conf
new file mode 100644
index 0000000..a8a50a5
--- /dev/null
+++ b/spark/processing/3.0/py3/nginx-config/default.conf
@@ -0,0 +1,17 @@
+server {
+    listen 15050;
+    server_name localhost;
+    client_header_buffer_size 128k;
+    large_client_header_buffers 4 128k;
+
+    location ~ ^/history/(.*)/(.*)/jobs/$ {
+        proxy_pass http://localhost:18080/history/$1/jobs/;
+        proxy_redirect http://localhost:18080/history/$1/jobs/ $domain_name/proxy/15050/history/$1/jobs/;
+        expires off;
+    }
+
+    location / {
+        proxy_pass http://localhost:18080;
+        expires off;
+    }
+}
\ No newline at end of file
diff --git a/spark/processing/3.0/py3/nginx-config/nginx.conf b/spark/processing/3.0/py3/nginx-config/nginx.conf
new file mode 100644
index 0000000..1e3a51c
--- /dev/null
+++ b/spark/processing/3.0/py3/nginx-config/nginx.conf
@@ -0,0 +1,66 @@
+# For more information on configuration, see:
+#   * Official English Documentation: http://nginx.org/en/docs/
+#   * Official Russian Documentation: http://nginx.org/ru/docs/
+
+user nginx;
+worker_processes auto;
+error_log /var/log/nginx/error.log;
+pid /run/nginx.pid;
+
+# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic.
+include /usr/share/nginx/modules/*.conf;
+
+events {
+    worker_connections 1024;
+}
+
+http {
+    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
+                      '$status $body_bytes_sent "$http_referer" '
+                      '"$http_user_agent" "$http_x_forwarded_for"';
+
+    access_log  /var/log/nginx/access.log  main;
+
+    sendfile            on;
+    tcp_nopush          on;
+    tcp_nodelay         on;
+    keepalive_timeout   65;
+    types_hash_max_size 2048;
+
+    include             /etc/nginx/mime.types;
+    default_type        application/octet-stream;
+
+    # Load modular configuration files from the /etc/nginx/conf.d directory.
+    # See http://nginx.org/en/docs/ngx_core_module.html#include
+    # for more information.
+    include /etc/nginx/conf.d/*.conf;
+
+    server {
+        listen       80 default_server;
+        listen       [::]:80 default_server;
+        server_name  _;
+        root         /usr/share/nginx/html;
+
+        # Load configuration files for the default server block.
+        include /etc/nginx/default.d/*.conf;
+
+        location  /proxy/15050 {
+            proxy_pass http://localhost:15050/;
+        }
+
+        location ~ ^/proxy/15050/(.*) {
+            proxy_pass http://localhost:15050/$1;
+        }
+
+        location / {
+        }
+
+        error_page 404 /404.html;
+            location = /40x.html {
+        }
+
+        error_page 500 502 503 504 /50x.html;
+            location = /50x.html {
+        }
+    }
+}
\ No newline at end of file
diff --git a/spark/processing/3.0/py3/smspark-0.1-py3-none-any.whl b/spark/processing/3.0/py3/smspark-0.1-py3-none-any.whl
new file mode 100644
index 0000000..3d4c3d6
Binary files /dev/null and b/spark/processing/3.0/py3/smspark-0.1-py3-none-any.whl differ
diff --git a/spark/processing/3.0/py3/yum/emr-apps.repo b/spark/processing/3.0/py3/yum/emr-apps.repo
new file mode 100644
index 0000000..6466eba
--- /dev/null
+++ b/spark/processing/3.0/py3/yum/emr-apps.repo
@@ -0,0 +1,7 @@
+[emr-apps]
+name = EMR Application Repository
+gpgkey = https://s3-REGION.amazonaws.com/repo.REGION.emr.amazonaws.com/apps-repository/emr-6.1.0/72a9ec2e-9bf6-4d7d-9244-86a0ab1e50d6/repoPublicKey.txt
+enabled = 1
+baseurl = https://s3-REGION.amazonaws.com/repo.REGION.emr.amazonaws.com/apps-repository/emr-6.1.0/72a9ec2e-9bf6-4d7d-9244-86a0ab1e50d6
+priority = 5
+gpgcheck = 0
diff --git a/src/smspark/bootstrapper.py b/src/smspark/bootstrapper.py
index ff1ff65..0369662 100644
--- a/src/smspark/bootstrapper.py
+++ b/src/smspark/bootstrapper.py
@@ -19,12 +19,13 @@
 import shutil
 import socket
 import subprocess
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, List, Sequence, Tuple, Union
 
 import psutil
 import requests
 from smspark.config import Configuration
 from smspark.defaults import default_resource_config
+from smspark.errors import AlgorithmError
 from smspark.waiter import Waiter
 
 
@@ -36,11 +37,18 @@ class Bootstrapper:
     HADOOP_CONFIG_PATH = "/opt/hadoop-config/"
     HADOOP_PATH = "/usr/lib/hadoop"
     SPARK_PATH = "/usr/lib/spark"
+
     HIVE_PATH = "/usr/lib/hive"
     PROCESSING_CONF_INPUT_PATH = "/opt/ml/processing/input/conf/configuration.json"
     PROCESSING_JOB_CONFIG_PATH = "/opt/ml/config/processingjobconfig.json"
     INSTANCE_TYPE_INFO_PATH = "/opt/aws-config/ec2-instance-type-info.json"
     EMR_CONFIGURE_APPS_URL = "https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html"
+    JAR_DEST = SPARK_PATH + "/jars"
+    # jets3t-0.9.0.jar is used by hadoop 2.8.5(https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common)
+    # and 2.10.0(https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common/2.10.0). However, it's not
+    # needed in 3.2.1 (https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common/3.2.1)
+    # TODO: use a map with spark version as the key to maintain the optional jars
+    OPTIONAL_JARS = {"jets3t-0.9.0.jar": HADOOP_PATH + "/lib"}
 
     def __init__(self, resource_config: Dict[str, Any] = default_resource_config):
         logging.basicConfig(level=logging.INFO)
@@ -63,28 +71,43 @@ def bootstrap_history_server(self) -> None:
 
     def copy_aws_jars(self) -> None:
         self.logger.info("copying aws jars")
-        jar_dest = Bootstrapper.SPARK_PATH + "/jars"
         for f in glob.glob("/usr/share/aws/aws-java-sdk/*.jar"):
-            shutil.copyfile(f, os.path.join(jar_dest, os.path.basename(f)))
-        hadoop_aws_jar = "hadoop-aws-2.8.5-amzn-6.jar"
-        jets3t_jar = "jets3t-0.9.0.jar"
-        shutil.copyfile(
-            os.path.join(Bootstrapper.HADOOP_PATH, hadoop_aws_jar), os.path.join(jar_dest, hadoop_aws_jar),
-        )
-        # this jar required for using s3a client
+            shutil.copyfile(f, os.path.join(self.JAR_DEST, os.path.basename(f)))
+        hadoop_aws_jar = self._get_hadoop_jar()
         shutil.copyfile(
-            os.path.join(Bootstrapper.HADOOP_PATH + "/lib", jets3t_jar), os.path.join(jar_dest, jets3t_jar),
+            os.path.join(Bootstrapper.HADOOP_PATH, hadoop_aws_jar), os.path.join(self.JAR_DEST, hadoop_aws_jar)
         )
+
+        self._copy_optional_jars()
         # copy hmclient (glue data catalog hive metastore client) jars to classpath:
         # https://github.com/awslabs/aws-glue-data-catalog-client-for-apache-hive-metastore
         for f in glob.glob("/usr/share/aws/hmclient/lib/*.jar"):
-            shutil.copyfile(f, os.path.join(jar_dest, os.path.basename(f)))
+            shutil.copyfile(f, os.path.join(self.JAR_DEST, os.path.basename(f)))
+
+    # TODO: use glob.glob
+    def _get_hadoop_jar(self) -> str:
+        for file_name in os.listdir(Bootstrapper.HADOOP_PATH):
+            if file_name.startswith("hadoop-aws") and file_name.endswith(".jar"):
+                self.logger.info(f"Found hadoop jar {file_name}")
+                return file_name
+
+        raise AlgorithmError("Error finding hadoop jar", caused_by=FileNotFoundError())
+
+    def _copy_optional_jars(self) -> None:
+        for jar, jar_path in self.OPTIONAL_JARS.items():
+            if os.path.isfile(os.path.join(jar_path, jar)):
+                self.logger.info(f"Copying optional jar {jar} from {jar_path} to {self.JAR_DEST}")
+                shutil.copyfile(
+                    os.path.join(jar_path, jar), os.path.join(self.JAR_DEST, jar),
+                )
+            else:
+                self.logger.info(f"Optional jar {jar} in {jar_path} does not exist")
 
     def copy_cluster_config(self) -> None:
         self.logger.info("copying cluster config")
 
         def copy_config(src: str, dst: str) -> None:
-            self.logger.info("copying {} to {}".format(src, dst))
+            self.logger.info(f"copying {src} to {dst}")
             shutil.copyfile(src, dst)
 
         copy_config(
@@ -395,7 +418,7 @@ def get_yarn_spark_resource_config(
             {
                 "spark.driver.memory": f"{driver_mem_mb}m",
                 "spark.driver.memoryOverhead": f"{driver_mem_ovr_mb}m",
-                "spark.driver.defaultJavaOptions": f"{driver_java_opts}m",
+                "spark.driver.defaultJavaOptions": f"{driver_java_opts}",
                 "spark.executor.memory": f"{executor_mem_mb}m",
                 "spark.executor.memoryOverhead": f"{executor_mem_ovr_mb}m",
                 "spark.executor.cores": f"{executor_cores}",
diff --git a/test/integration/local/test_multinode_container.py b/test/integration/local/test_multinode_container.py
index 4c63857..d70143a 100644
--- a/test/integration/local/test_multinode_container.py
+++ b/test/integration/local/test_multinode_container.py
@@ -75,12 +75,12 @@ def test_pyspark_multinode(input_data: str, output_data: str, verbose_opt: str)
 def test_scala_spark_multinode(input_data: str, output_data: str, verbose_opt: str) -> None:
     input = "--input {}".format(input_data)
     output = "--output {}".format(output_data)
-    host_jars_dir = "./test/resources/code/scala/hello-scala-spark/lib_managed/jars/org.json4s/json4s-native_2.11"
+    host_jars_dir = "./test/resources/code/scala/hello-scala-spark/lib_managed/jars/org.json4s/json4s-native_2.12"
     container_jars_dir = "/opt/ml/processing/input/jars"
     jars_mount = f"{host_jars_dir}:{container_jars_dir}"
     jars_arg = f"--jars {container_jars_dir}"
     class_arg = "--class com.amazonaws.sagemaker.spark.test.HelloScalaSparkApp"
-    app_jar = "/opt/ml/processing/input/code/scala/hello-scala-spark/target/scala-2.11/hello-scala-spark_2.11-1.0.jar"
+    app_jar = "/opt/ml/processing/input/code/scala/hello-scala-spark/target/scala-2.12/hello-scala-spark_2.12-1.0.jar"
     docker_compose_cmd = (
         f"JARS_MOUNT={jars_mount} "
         f"CMD='{jars_arg} {class_arg} {verbose_opt} {app_jar} {input} {output}' "
diff --git a/test/integration/sagemaker/test_spark.py b/test/integration/sagemaker/test_spark.py
index 5f1c772..be25bd9 100644
--- a/test/integration/sagemaker/test_spark.py
+++ b/test/integration/sagemaker/test_spark.py
@@ -210,6 +210,69 @@ def test_sagemaker_pyspark_sse_s3(role, image_uri, sagemaker_session, region, sa
     assert len(output_contents) != 0
 
 
+def test_sagemaker_pyspark_sse_kms_s3(role, image_uri, sagemaker_session, region, sagemaker_client, account_id):
+    spark = PySparkProcessor(
+        base_job_name="sm-spark-py",
+        image_uri=image_uri,
+        role=role,
+        instance_count=2,
+        instance_type="ml.c5.xlarge",
+        max_runtime_in_seconds=1200,
+        sagemaker_session=sagemaker_session,
+    )
+
+    # This test expected AWS managed s3 kms key to be present. The key will be in
+    # KMS > AWS managed keys > aws/s3
+    kms_key_id = None
+    kms_client = sagemaker_session.boto_session.client("kms", region_name=region)
+    for alias in kms_client.list_aliases()["Aliases"]:
+        if "s3" in alias["AliasName"]:
+            kms_key_id = alias["TargetKeyId"]
+
+    if not kms_key_id:
+        raise ValueError("AWS managed s3 kms key(alias: aws/s3) does not exist")
+
+    bucket = sagemaker_session.default_bucket()
+    timestamp = datetime.now().isoformat()
+    input_data_key = f"spark/input/sales/{timestamp}/data.jsonl"
+    input_data_uri = f"s3://{bucket}/{input_data_key}"
+    output_data_uri_prefix = f"spark/output/sales/{timestamp}"
+    output_data_uri = f"s3://{bucket}/{output_data_uri_prefix}"
+    s3_client = sagemaker_session.boto_session.client("s3", region_name=region)
+    with open("test/resources/data/files/data.jsonl") as data:
+        body = data.read()
+        s3_client.put_object(
+            Body=body, Bucket=bucket, Key=input_data_key, ServerSideEncryption="aws:kms", SSEKMSKeyId=kms_key_id
+        )
+
+    spark.run(
+        submit_app="test/resources/code/python/hello_py_spark/hello_py_spark_app.py",
+        submit_py_files=["test/resources/code/python/hello_py_spark/hello_py_spark_udfs.py"],
+        arguments=["--input", input_data_uri, "--output", output_data_uri],
+        configuration={
+            "Classification": "core-site",
+            "Properties": {
+                "fs.s3a.server-side-encryption-algorithm": "SSE-KMS",
+                "fs.s3a.server-side-encryption.key": f"arn:aws:kms:{region}:{account_id}:key/{kms_key_id}",
+            },
+        },
+    )
+    processing_job = spark.latest_job
+    waiter = sagemaker_client.get_waiter("processing_job_completed_or_stopped")
+    waiter.wait(
+        ProcessingJobName=processing_job.job_name,
+        # poll every 15 seconds. timeout after 15 minutes.
+        WaiterConfig={"Delay": 15, "MaxAttempts": 60},
+    )
+
+    s3_objects = s3_client.list_objects(Bucket=bucket, Prefix=output_data_uri_prefix)["Contents"]
+    assert len(s3_objects) != 0
+    for s3_object in s3_objects:
+        object_metadata = s3_client.get_object(Bucket=bucket, Key=s3_object["Key"])
+        assert object_metadata["ServerSideEncryption"] == "aws:kms"
+        assert object_metadata["SSEKMSKeyId"] == f"arn:aws:kms:{region}:{account_id}:key/{kms_key_id}"
+
+
 def test_sagemaker_scala_jar_multinode(role, image_uri, configuration, sagemaker_session, sagemaker_client):
     """Test SparkJarProcessor using Scala application jar with external runtime dependency jars staged by SDK"""
     spark = SparkJarProcessor(
@@ -233,10 +296,10 @@ def test_sagemaker_scala_jar_multinode(role, image_uri, configuration, sagemaker
 
     scala_project_dir = "test/resources/code/scala/hello-scala-spark"
     spark.run(
-        submit_app="{}/target/scala-2.11/hello-scala-spark_2.11-1.0.jar".format(scala_project_dir),
+        submit_app="{}/target/scala-2.12/hello-scala-spark_2.12-1.0.jar".format(scala_project_dir),
         submit_class="com.amazonaws.sagemaker.spark.test.HelloScalaSparkApp",
         submit_jars=[
-            "{}/lib_managed/jars/org.json4s/json4s-native_2.11/json4s-native_2.11-3.6.9.jar".format(scala_project_dir)
+            "{}/lib_managed/jars/org.json4s/json4s-native_2.12/json4s-native_2.12-3.6.9.jar".format(scala_project_dir)
         ],
         arguments=["--input", input_data_uri, "--output", output_data_uri],
         configuration=configuration,
diff --git a/test/resources/code/scala/hello-scala-spark/hello-scala-spark.sbt b/test/resources/code/scala/hello-scala-spark/hello-scala-spark.sbt
index 7b44959..1ba7d17 100644
--- a/test/resources/code/scala/hello-scala-spark/hello-scala-spark.sbt
+++ b/test/resources/code/scala/hello-scala-spark/hello-scala-spark.sbt
@@ -1,9 +1,9 @@
 name := "hello-scala-spark"
 version := "1.0"
-scalaVersion := "2.11.12"
+scalaVersion := "2.12.12"
 useCoursier := false
 retrieveManaged := true
-libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.4"
+libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.0.0"
 libraryDependencies += "org.json4s" %% "json4s-native" % "3.6.9"
 mainClass in (Compile, packageBin) := Some("HelloScalaSparkApp")
 mainClass in (Compile, run) := Some("HelloScalaSparkApp")
diff --git a/test/unit/test_bootstrapper.py b/test/unit/test_bootstrapper.py
index c20430c..759fd57 100644
--- a/test/unit/test_bootstrapper.py
+++ b/test/unit/test_bootstrapper.py
@@ -97,9 +97,11 @@ def test_env_classification(default_bootstrapper):
     assert output == expected
 
 
+@patch("os.listdir", return_value=["hadoop-aws-2.8.5-amzn-5.jar"])
+@patch("os.path.isfile", return_value=True)
 @patch("glob.glob", side_effect=[["/aws-sdk.jar"], ["/hmclient/lib/client.jar"]])
 @patch("shutil.copyfile", side_effect=None)
-def test_copy_aws_jars(patched_copyfile, patched_glob, default_bootstrapper) -> None:
+def test_copy_aws_jars(patched_copyfile, patched_glob, patched_isfile, patched_listdir, default_bootstrapper) -> None:
     default_bootstrapper.copy_aws_jars()
 
     expected = [
@@ -422,7 +424,7 @@ def test_get_yarn_spark_resource_config(default_bootstrapper: Bootstrapper) -> N
     exp_spark_config_props = {
         "spark.driver.memory": f"{exp_driver_mem_mb}m",
         "spark.driver.memoryOverhead": f"{exp_driver_mem_ovr_mb}m",
-        "spark.driver.defaultJavaOptions": f"{exp_driver_java_opts}m",
+        "spark.driver.defaultJavaOptions": f"{exp_driver_java_opts}",
         "spark.executor.memory": f"{exp_executor_mem_mb}m",
         "spark.executor.memoryOverhead": f"{exp_executor_mem_ovr_mb}m",
         "spark.executor.cores": f"{exp_executor_cores}",