diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2c39ae8b..51a6158d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,6 +19,12 @@ jobs: uses: actions/setup-python@v1 with: python-version: ${{ matrix.python }} + - uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ hashFiles('**/requirements-test.txt') }} + restore-keys: | + ${{ runner.os }}-pip- - name: Install dependencies run: | python -m pip install --upgrade pip @@ -54,24 +60,37 @@ jobs: uses: actions/setup-python@v1 with: python-version: ${{ matrix.python }} + - name: Cache pip + Spark + id: cache-spark + uses: actions/cache@v3 + with: + path: | + /home/runner/work/spark/ + ~/.cache/pip + key: ${{ runner.os }}-spark-${{ matrix.SPARK_VERSION }}-hadoop${{ matrix.HADOOP_VERSION }}-java${{ matrix.JAVA_VERSION }}-${{ hashFiles('**/requirements-test.txt') }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install -e . pip install -r requirements-test.txt + - name: Download spark + if: steps.cache-spark.outputs.cache-hit != 'true' + env: + BUILD_DIR: "/home/runner/work/" + SPARK_VERSION: "${{ matrix.SPARK_VERSION }}" + HADOOP_VERSION: "${{ matrix.HADOOP_VERSION }}" + run: | + curl https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz --output ${BUILD_DIR}spark.tgz - name: Install Spark env: - BUILD_DIR: "/home/runner/work/" #${{ github.workspace }} - JAVA_HOME: "/usr/lib/jvm/java-${{ matrix.JAVA_VERSION }}-openjdk-amd64" + BUILD_DIR: "/home/runner/work/" SPARK_VERSION: "${{ matrix.SPARK_VERSION }}" HADOOP_VERSION: "${{ matrix.HADOOP_VERSION }}" - SPARK_HOME: "/home/runner/work/spark/" #${{ github.workspace }}/spark/ - SPARK_LOCAL_IP: "localhost" + SPARK_HOME: "/home/runner/work/spark/" run: | sudo apt-get update sudo apt-get -y install openjdk-${{ matrix.JAVA_VERSION }}-jdk - curl https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz --output ${BUILD_DIR}/spark.tgz - tar -xvzf ${BUILD_DIR}/spark.tgz && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME} + tar -xvzf ${BUILD_DIR}spark.tgz && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME} pip install pytest-spark>=0.6.0 pyarrow>=0.8.0 pyspark==${SPARK_VERSION} - name: Test with pytest (spark-specific) env: @@ -79,7 +98,7 @@ jobs: JAVA_HOME: "/usr/lib/jvm/java-${{ matrix.JAVA_VERSION }}-openjdk-amd64" SPARK_VERSION: "${{ matrix.SPARK_VERSION }}" HADOOP_VERSION: "${{ matrix.HADOOP_VERSION }}" - SPARK_HOME: "/home/runner/work/spark/" #${{ github.workspace }}/spark/ + SPARK_HOME: "/home/runner/work/spark/" SPARK_LOCAL_IP: "localhost" run: | pytest -m spark