diff --git a/.github/workflows/next.yml b/.github/workflows/next.yml index 10d3e66..4560309 100644 --- a/.github/workflows/next.yml +++ b/.github/workflows/next.yml @@ -20,9 +20,10 @@ jobs: with: node-version: '18.x' - name: test + id: scraper_config run: | export SCRAPER_CONFIG=$(cat ./config.next.json | jq -r tostring) - echo $SCRAPER_CONFIG + echo "scraper_config=${SCRAPER_CONFIG}" >> $GITHUB_OUTPUT - name: Install Vercel CLI run: npm install --global vercel@latest - run: rm -rf .vercel @@ -38,5 +39,5 @@ jobs: uses: addnab/docker-run-action@v3 with: image: algolia/docsearch-scraper - options: --env-file=algolia.env -e "CONFIG=$(cat ./config.next.json | jq -r tostring)" + options: --env-file=algolia.env -e "CONFIG=${{ steps.scraper_config.outputs.scraper_config }}" run: pipenv run python -m src.index diff --git a/config.next.json b/config.next.json index b58f0e5..42ce0e3 100644 --- a/config.next.json +++ b/config.next.json @@ -15,5 +15,16 @@ "lvl4": "article h5, article td:first-child", "lvl5": "article h6", "content": "article p, article li, article td:last-child" + }, + "strip_chars": " .,;:#", + "custom_settings": { + "attributesToRetrieve": [ + "hierarchy", + "content", + "anchor", + "url", + "url_without_anchor", + "type" + ] } } \ No newline at end of file