-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* build: support docker builds for both cuda and cpu-only pytorch * build: add python pipeline to make deps command * refactor: python item parser to support arbitrary files * feat: add script to download model files * refactor: clean up old requirements * ci: add build and test workflow * fix: sync python lock file * ci: skip test environment tear down * ci: fix docker commands for test setup/teardown to work in a headless environment * fix: run docker python tasks inside venv to prevent lambda base image dependency conflicts * test: add small delay to test environment setup for CI * test: increase delay for test env setup for CI * fix: database migration run scripts argument handling * test: disable approximate vector search in testing only * test: display full test logs * test: remove variable cursor from paginated item recommendations test
- Loading branch information
Showing
21 changed files
with
925 additions
and
621 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
name: main | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
branches: [ "main" ] | ||
# Manual run from GitHub UI | ||
workflow_dispatch: | ||
# Wednesdays at 0400 | ||
# schedule: | ||
# - cron: '0 4 * * 3' | ||
|
||
jobs: | ||
build-and-test: | ||
timeout-minutes: 10 | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
java-version: [17] | ||
env: | ||
ENV_FILE: .env | ||
# Below should be overwritten by .env | ||
TORCH_VERSION: cpu | ||
MODEL_NAME: hyp1231/blair-roberta-base | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
|
||
- name: Load environment variables | ||
run: cat ${{ env.ENV_FILE }} >> $GITHUB_ENV | ||
|
||
- name: Use Java ${{ matrix.java-version }} | ||
uses: actions/setup-java@v2 | ||
with: | ||
java-version: ${{ matrix.java-version }} | ||
cache: 'gradle' | ||
distribution: 'liberica' | ||
|
||
- name: Setup pdm | ||
uses: pdm-project/setup-pdm@v4 | ||
- name: Restore cached venv | ||
id: cache-venv-restore | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: | | ||
.venv | ||
key: venv-${{ runner.os }}-${{ hashFiles('pdm.lock') }} | ||
restore-keys: | | ||
venv-${{ runner.os }}- | ||
venv- | ||
- name: Install dependencies | ||
run: | | ||
pdm install | ||
pdm install-${{ env.TORCH_VERSION }} | ||
- name: Save venv to cache | ||
id: cache-venv-save | ||
uses: actions/cache/save@v4 | ||
with: | ||
path: | | ||
.venv | ||
key: ${{ steps.cache-venv-restore.outputs.cache-primary-key }} | ||
|
||
- name: Restore cached model files | ||
id: cache-model-restore | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: | | ||
data/models | ||
key: models-${{ env.MODEL_NAME }} | ||
- name: Download model files | ||
run: make get-model | ||
- name: Save model files to cache | ||
id: cache-model-save | ||
uses: actions/cache/save@v4 | ||
with: | ||
path: | | ||
data/models | ||
key: ${{ steps.cache-model-restore.outputs.cache-primary-key }} | ||
|
||
- name: Install | ||
run: make deps | ||
|
||
- name: Build | ||
run: make docker-build | ||
|
||
- name: Test | ||
run: make test-ci |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
-- Uses postgres extension pgvector: https://github.com/pgvector/pgvector | ||
-- Hidden size 768 pre-defined when training model: models/blair-roberta-base/config.json | ||
CREATE EXTENSION IF NOT EXISTS vector; | ||
CREATE TABLE IF NOT EXISTS item_embed ( | ||
item_id BIGINT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, | ||
title vector(768) | ||
); | ||
COPY item_embed FROM '/export/item_embed.csv' WITH CSV DELIMITER E'\x1e' QUOTE E'\x1f' NULL AS '' HEADER; | ||
-- Disable approximate search for testing: | ||
--CREATE INDEX ON item_embed USING hnsw ( | ||
-- title vector_ip_ops | ||
--); | ||
--ANALYZE item_embed; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.