diff --git a/dev/Dockerfile b/dev/Dockerfile index 702dff3492..3470e5b991 100644 --- a/dev/Dockerfile +++ b/dev/Dockerfile @@ -42,9 +42,22 @@ ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12 ENV ICEBERG_VERSION=1.9.1 ENV PYICEBERG_VERSION=0.10.0 -RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \ - && tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \ - && rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz +# Try the primary Apache mirror (downloads.apache.org) first, then fall back to the archive +RUN set -eux; \ + FILE=spark-${SPARK_VERSION}-bin-hadoop3.tgz; \ + URLS="https://downloads.apache.org/spark/spark-${SPARK_VERSION}/${FILE} https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${FILE}"; \ + for url in $URLS; do \ + echo "Attempting download: $url"; \ + if curl --retry 3 --retry-delay 5 -f -s -C - "$url" -o "$FILE"; then \ + echo "Downloaded from: $url"; \ + break; \ + else \ + echo "Failed to download from: $url"; \ + fi; \ + done; \ + if [ ! -f "$FILE" ]; then echo "Failed to download Spark from all mirrors" >&2; exit 1; fi; \ + tar xzf "$FILE" --directory /opt/spark --strip-components 1; \ + rm -rf "$FILE" # Download iceberg spark runtime RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \