Skip to content

Commit

Permalink
3 test workflow (#4)
Browse files Browse the repository at this point in the history
* build: support docker builds for both cuda and cpu-only pytorch

* build: add python pipeline to make deps command

* refactor: python item parser to support arbitrary files

* feat: add script to download model files

* refactor: clean up old requirements

* ci: add build and test workflow

* fix: sync python lock file

* ci: skip test environment tear down

* ci: fix docker commands for test setup/teardown to work in a headless environment

* fix: run docker python tasks inside venv to prevent lambda base image dependency conflicts

* test: add small delay to test environment setup for CI

* test: increase delay for test env setup for CI

* fix: database migration run scripts argument handling

* test: disable approximate vector search in testing only

* test: display full test logs

* test: remove variable cursor from paginated item recommendations test
  • Loading branch information
ae9is committed May 29, 2024
1 parent 3df418c commit e063f7e
Show file tree
Hide file tree
Showing 21 changed files with 925 additions and 621 deletions.
3 changes: 3 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ MODEL_API_URL=http://localhost:5000

PYTHON_ENV=development
CUDA_VISIBLE_DEVICES=0
TORCH_VERSION=cpu

MODEL_DIR=data/models/blair-roberta-base
MODEL_NAME=hyp1231/blair-roberta-base

ITEM_META_FILE=data/import/meta_Musical_Instruments.jsonl
REVIEWS_FILE=data/import/Musical_Instruments.jsonl
88 changes: 88 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
name: main

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
# Manual run from GitHub UI
workflow_dispatch:
# Wednesdays at 0400
# schedule:
# - cron: '0 4 * * 3'

jobs:
build-and-test:
timeout-minutes: 10
runs-on: ubuntu-latest
strategy:
matrix:
java-version: [17]
env:
ENV_FILE: .env
# Below should be overwritten by .env
TORCH_VERSION: cpu
MODEL_NAME: hyp1231/blair-roberta-base
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Load environment variables
run: cat ${{ env.ENV_FILE }} >> $GITHUB_ENV

- name: Use Java ${{ matrix.java-version }}
uses: actions/setup-java@v2
with:
java-version: ${{ matrix.java-version }}
cache: 'gradle'
distribution: 'liberica'

- name: Setup pdm
uses: pdm-project/setup-pdm@v4
- name: Restore cached venv
id: cache-venv-restore
uses: actions/cache/restore@v4
with:
path: |
.venv
key: venv-${{ runner.os }}-${{ hashFiles('pdm.lock') }}
restore-keys: |
venv-${{ runner.os }}-
venv-
- name: Install dependencies
run: |
pdm install
pdm install-${{ env.TORCH_VERSION }}
- name: Save venv to cache
id: cache-venv-save
uses: actions/cache/save@v4
with:
path: |
.venv
key: ${{ steps.cache-venv-restore.outputs.cache-primary-key }}

- name: Restore cached model files
id: cache-model-restore
uses: actions/cache/restore@v4
with:
path: |
data/models
key: models-${{ env.MODEL_NAME }}
- name: Download model files
run: make get-model
- name: Save model files to cache
id: cache-model-save
uses: actions/cache/save@v4
with:
path: |
data/models
key: ${{ steps.cache-model-restore.outputs.cache-primary-key }}

- name: Install
run: make deps

- name: Build
run: make docker-build

- name: Test
run: make test-ci
11 changes: 7 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
FROM public.ecr.aws/lambda/python:3.12 as build
# No python 3.12 yet in AL2023, so we use the Lambda team's image for now.
# ref: https://github.com/amazonlinux/amazon-linux-2023/issues/483
# However, note to actually run on Lambda would need to swap out GPU torch for CPU-only in pyproject.toml.
# To actually run on Lambda or other platforms without CUDA, swap out GPU torch for CPU-only.
#FROM public.ecr.aws/amazonlinux/amazonlinux:2023 as build

ENV TASK_ROOT=/var/task

# Non-root user and group (only with AL2023 not Lambda base images)
#RUN dnf install -y shadow-utils
#RUN groupadd -g 888 python && useradd -r -u 888 -g python python
#ENV TASK_ROOT=/var/task
#RUN mkdir -p "${TASK_ROOT}"
#RUN chown python:python "${TASK_ROOT}"
#WORKDIR "${TASK_ROOT}"
Expand All @@ -18,6 +19,7 @@ FROM public.ecr.aws/lambda/python:3.12 as build
#USER 888
RUN python3.12 -m venv "${TASK_ROOT}"
ENV PATH="${TASK_ROOT}/bin:${PATH}"
RUN source "${TASK_ROOT}/bin/activate"
RUN python3.12 -m ensurepip
RUN python3.12 -m pip install --no-cache-dir --disable-pip-version-check -U gunicorn uvicorn[standard]

Expand All @@ -27,8 +29,9 @@ ARG MODEL_DIR=${MODEL_DIR:-data/models/blair-roberta-base}
COPY --chown=python:python "${MODEL_DIR}"/* ./amazonrev/model/

# Project dependencies
COPY --chown=python:python requirements.prod.txt ./
RUN python3.12 -m pip install --no-cache-dir --disable-pip-version-check -U -r requirements.prod.txt
ARG TORCH_VERSION=${TORCH_VERSION:-cpu}
COPY --chown=python:python requirements.prod.${TORCH_VERSION}.txt ./
RUN python3.12 -m pip install --no-cache-dir --disable-pip-version-check -U -r requirements.prod.${TORCH_VERSION}.txt

# Copy project source
COPY --chown=python:python src/main/python/amazonrev/*.py ./amazonrev/
Expand Down
25 changes: 20 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,28 @@ TESTDB_PROCESS_ID = $(shell docker ps | grep reviews-pg-test | awk '{print $$1}'
clean:
${GRADLE} clean

# A small delay between bringing up docker compose services and executing the database scripts
# is needed for CI workflow. (Docker compose exits before the containers are fully up.)
test-env-up:
docker compose -f docker-compose-test.yml up -d
sleep 5
bash docker-db-up.sh reviews-pg-test 1 1

test-env-down:
bash docker-db-down.sh reviews-pg-test 1 1
docker compose -f docker-compose-test.yml down

test-py:
test-py: test-env-up
MODEL_API_URL=http://localhost:5001 pdm test

test-java:
test-java: test-env-up
SPRING_DATASOURCE_URL=jdbc:postgresql://localhost:5433/reviews MODEL_API_URL=http://localhost:5001 ${GRADLE} test --rerun-tasks

test: test-env-up test-java test-py test-env-down
test-ci: test-java test-py

# In make v4.4+ can just replace this with .WAIT
test: test-env-up test-ci WAIT test-env-down
WAIT: test-java test-py

build:
${GRADLE} build
Expand All @@ -32,17 +39,25 @@ run:

deps:
${GRADLE} dependencies
pdm install

get-model:
pdm get-model

parse:
${GRADLE} runParser

embeddings:
pdm parser

docker-build:
docker-build: docker-build-java docker-build-py

docker-build-java:
${GRADLE} bootBuildImage --imageName=${NAME}/graphql-api

docker-build-py:
printf "PYTHON_ENV=${PYTHON_ENV}\nMODEL_DIR=./model\n" > .env.dockerfile
docker build -t ${NAME}/model-api --build-arg MODEL_DIR=${MODEL_DIR} -f Dockerfile .
docker build -t ${NAME}/model-api --build-arg MODEL_DIR=${MODEL_DIR} --build-arg TORCH_VERSION=${TORCH_VERSION} -f Dockerfile .

docker-login:
aws ecr get-login-password --region ${AWS_REGION} --profile ${AWS_PROFILE} | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ Open http://localhost:8080/graphiql?path=/graphql

<img src="img/screenshot-reviews.png" width=400 />

### GPU-accelerated containers

To run the Python API inside a Docker container with CUDA enabled in PyTorch, the container host should setup Docker for CUDA.

See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html

## Test

Build the Python API Docker image first using:
Expand Down
5 changes: 5 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,8 @@ dependencies {
tasks.named('test') {
useJUnitPlatform()
}

test {
// ref: https://docs.gradle.org/current/dsl/org.gradle.api.tasks.testing.logging.TestLogging.html
testLogging.showStandardStreams = true
}
2 changes: 2 additions & 0 deletions data/models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ See:
- [BLaIR-roberta-large](https://huggingface.co/hyp1231/blair-roberta-large)

Set environment variable `MODEL_DIR` appropriately in `.env`.

Note: we bypass the default Hugging Face cache directory (`~/.cache/huggingface/hub`) for ease in bundling the model files into the Python API Docker image.
6 changes: 3 additions & 3 deletions docker-db-down.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# Export and drop database tables.
# Executes against a running docker postgres container.
container="${1:-postgres}"
with_test_data="${2:-0}"
force="${3:-0}"
with_test_data=$2
force=$3
DB_PROCESS_ID=`docker ps | grep "${container}" | awk '{print $1}' | head -n 1`

if [ "${force}" ]; then
Expand All @@ -30,7 +30,7 @@ started=`date`
for scriptpath in `ls -r migrations/*.down.sql`; do
script=`basename "${scriptpath}"`
echo "Running script: ${script} at `date`..."
docker exec --user postgres -it ${DB_PROCESS_ID} psql -d ${POSTGRES_DB} -f "/export/${script}"
docker exec --user postgres ${DB_PROCESS_ID} psql -d ${POSTGRES_DB} -f "/export/${script}"
done
stopped=`date`
echo "Started at: ${started}"
Expand Down
10 changes: 7 additions & 3 deletions docker-db-up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# Executes against a running docker postgres container.
# Make sure to generate loadable data files first by running `make parse`.
container="${1:-postgres}"
with_test_data="${2:-0}"
force="${3:-0}"
with_test_data=$2
force=$3
DB_PROCESS_ID=`docker ps | grep "${container}" | awk '{print $1}' | head -n 1`

if [ "${force}" ]; then
Expand All @@ -28,6 +28,10 @@ fi

# Make sure to fix permissions on your mounted volume if docker is run as root
cp migrations/*.sql "${export_dir}" || { echo "Cannot copy new migrations to docker mount point, quitting!"; exit 1; }
if [ "${with_test_data}" ]; then
# Handle test-only database creation differences
rename --force 's/.test.sql/.sql/' "${export_dir}"/*.test.sql
fi

if [ "${force}" ]; then
wipe=1
Expand All @@ -44,7 +48,7 @@ started=`date`
for scriptpath in migrations/*.up.sql; do
script=`basename "${scriptpath}"`
echo "Running script: ${script} at `date`..."
docker exec --user postgres -it ${DB_PROCESS_ID} psql -d ${POSTGRES_DB} -f "/export/${script}"
docker exec --user postgres ${DB_PROCESS_ID} psql -d ${POSTGRES_DB} -f "/export/${script}"
done
stopped=`date`
echo "Started at: ${started}"
Expand Down
13 changes: 13 additions & 0 deletions migrations/016_create_item_embed.up.test.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- Uses postgres extension pgvector: https://github.com/pgvector/pgvector
-- Hidden size 768 pre-defined when training model: models/blair-roberta-base/config.json
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE IF NOT EXISTS item_embed (
item_id BIGINT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
title vector(768)
);
COPY item_embed FROM '/export/item_embed.csv' WITH CSV DELIMITER E'\x1e' QUOTE E'\x1f' NULL AS '' HEADER;
-- Disable approximate search for testing:
--CREATE INDEX ON item_embed USING hnsw (
-- title vector_ip_ops
--);
--ANALYZE item_embed;
2 changes: 2 additions & 0 deletions migrations/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ The export+drop and create+load steps have been combined here for convenience, a
Foreign key constraints are used, along with bigint primary keys (instead of uuid/varchar). This could be changed to support sharding or to ease complex migrations.

If you prefer, you may use the migrate tool instead of the shell scripts: https://github.com/golang-migrate/migrate

In [docker-db-up.sh](/docker-db-up.sh), *.test.sql versions of files replace their *.sql counterparts for testing setup only.
Loading

0 comments on commit e063f7e

Please sign in to comment.