diff --git a/.github/workflows/github_pages.yml b/.github/workflows/github_pages.yml index d8358ef9..9160bf43 100644 --- a/.github/workflows/github_pages.yml +++ b/.github/workflows/github_pages.yml @@ -16,7 +16,7 @@ jobs: uses: actions/checkout@v2 with: repository: algolia/algoliasearch-crawler-github-actions - ref: v0.6.0 + ref: v0.7.3 - name: Sleep for 30s run: sleep 30 - name: Github-pages-MAIN => Algolia crawler creation and recrawl (Push on Main branch) diff --git a/.github/workflows/netlify.yml b/.github/workflows/netlify.yml index fc08320b..a15933f2 100644 --- a/.github/workflows/netlify.yml +++ b/.github/workflows/netlify.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@v2 with: repository: algolia/algoliasearch-crawler-github-actions - ref: v0.6.0 + ref: v0.7.3 - name: Sleep for 30s run: sleep 30 - name: Netlify-PR => Algolia crawler creation and recrawl on preview (Pull Request) diff --git a/.github/workflows/vercel_pr.yml b/.github/workflows/vercel_pr.yml index 08d3bd31..c674ec2c 100644 --- a/.github/workflows/vercel_pr.yml +++ b/.github/workflows/vercel_pr.yml @@ -16,13 +16,13 @@ jobs: uses: dorshinar/get-deployment-url@master timeout-minutes: 1 with: - token: ${{ secrets.GIT_HUB_TOKEN }} + token: ${{ github.token }} # checkout the private repo containing the action to run - name: Checkout GitHub Action Repo uses: actions/checkout@v2 with: repository: algolia/algoliasearch-crawler-github-actions - ref: v0.6.0 + ref: v0.7.3 - name: Vercel-PR => Algolia crawler creation and recrawl on preview (Pull Request) uses: ./ id: crawler_pr diff --git a/.github/workflows/vercel_push.yml b/.github/workflows/vercel_push.yml index 3ceba6d5..283668e8 100644 --- a/.github/workflows/vercel_push.yml +++ b/.github/workflows/vercel_push.yml @@ -16,7 +16,7 @@ jobs: uses: actions/checkout@v2 with: repository: algolia/algoliasearch-crawler-github-actions - ref: v0.6.0 + ref: v0.7.3 - name: Vercel-MAIN => Algolia crawler creation and recrawl on preview (Push on Main branch) uses: ./ id: crawler_push diff --git a/action.yml b/action.yml index 52724c9f..aefbfe87 100644 --- a/action.yml +++ b/action.yml @@ -21,6 +21,7 @@ inputs: crawler-name: description: 'Name of the crawler' required: true + default: '[Github] ${{ github.repository }} ${{ github.ref }}' algolia-app-id: description: 'Algolia Application ID' required: true diff --git a/build/index.js b/build/index.js index 2e90354c..faa9c48e 100644 --- a/build/index.js +++ b/build/index.js @@ -402,7 +402,8 @@ var CRAWLER_USER_ID = core.getInput('crawler-user-id'); var CRAWLER_API_KEY = core.getInput('crawler-api-key'); var CRAWLER_API_BASE_URL = core.getInput('crawler-api-base-url'); var GITHUB_TOKEN = core.getInput('github-token'); -var CRAWLER_NAME = core.getInput('crawler-name').replace(/\//g, '-'); +var CRAWLER_NAME = core.getInput('crawler-name').replace(/[ /]/g, '-'); +var INDEX_NAME = CRAWLER_NAME.replace(/[/~,[\]`&|;$*\\]/g, ''); var ALGOLIA_APP_ID = core.getInput('algolia-app-id'); var ALGOLIA_API_KEY = core.getInput('algolia-api-key'); var SITE_URL = core.getInput('site-url'); @@ -418,7 +419,6 @@ function getConfig() { appId: ALGOLIA_APP_ID, apiKey: ALGOLIA_API_KEY, indexPrefix: 'crawler_', - maxUrls: 50, rateLimit: 8, startUrls: [SITE_URL], ignoreQueryParams: ['source', 'utm_*'], @@ -427,7 +427,7 @@ function getConfig() { ignoreRobotsTxtRules: false, actions: [ { - indexName: CRAWLER_NAME + "_index", + indexName: INDEX_NAME + "_index", pathsToMatch: [SITE_URL + "**"], recordExtractor: { __type: 'function', diff --git a/src/index.ts b/src/index.ts index 4e749fa0..85eedf9d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,7 +13,11 @@ const CRAWLER_API_BASE_URL = core.getInput('crawler-api-base-url'); const GITHUB_TOKEN = core.getInput('github-token'); // CRAWLER CONFIGURATION -const CRAWLER_NAME = core.getInput('crawler-name').replace(/\//g, '-'); +const CRAWLER_NAME = core.getInput('crawler-name'); +const INDEX_NAME = CRAWLER_NAME.replace(/[ /]/g, '-').replace( + /[/~,[\]`&|;$*\\]/g, + '' +); const ALGOLIA_APP_ID = core.getInput('algolia-app-id'); const ALGOLIA_API_KEY = core.getInput('algolia-api-key'); const SITE_URL = core.getInput('site-url'); @@ -40,7 +44,6 @@ function getConfig(): ConfigJson { appId: ALGOLIA_APP_ID, apiKey: ALGOLIA_API_KEY, indexPrefix: 'crawler_', - maxUrls: 50, // @todo TO BE REMOVED rateLimit: 8, startUrls: [SITE_URL], ignoreQueryParams: ['source', 'utm_*'], @@ -49,7 +52,7 @@ function getConfig(): ConfigJson { ignoreRobotsTxtRules: false, actions: [ { - indexName: `${CRAWLER_NAME}_index`, + indexName: `${INDEX_NAME}_index`, pathsToMatch: [`${SITE_URL}**`], recordExtractor: { __type: 'function',