From bcf94d367db8c9d72007ad4ce3c9d471b6c94b85 Mon Sep 17 00:00:00 2001 From: The Open Buddhist University Date: Fri, 23 Jun 2023 12:18:22 +0700 Subject: [PATCH] archive external urls too (#127) --- .github/workflows/archive.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/archive.yml b/.github/workflows/archive.yml index ba4cc0221f..bd9a4700d1 100644 --- a/.github/workflows/archive.yml +++ b/.github/workflows/archive.yml @@ -5,6 +5,9 @@ on: - cron: "40 3 15 5,11 *" jobs: Archive: + env: + LOGFILE: "Links/7_lychee main_content prod(^content).txt" + GH_TOKEN: ${{ secrets.BUILD_ACTION_TOKEN }} runs-on: ubuntu-latest steps: - name: Checkout the Code @@ -13,9 +16,21 @@ jobs: ref: main - name: Install Dependencies run: | + cd scripts/archivable_urls + RUNID=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/repos/buddhist-uni/buddhist-uni.github.io/actions/workflows/9334935/runs" -q '.workflow_runs[0].id') + echo "Last runid was $RUNID" + gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/repos/buddhist-uni/buddhist-uni.github.io/actions/runs/$RUNID/logs" > logs.zip + unzip logs.zip "$LOGFILE" + mv "$LOGFILE" "lycheeout.txt" + python extracturls.py + python filterurls.py # creates scripts/archivable_urls/filteredurls.txt cd ~ printf "${{ secrets.ARCHIVE_ORG_AUTH }}" > archive.org.auth pip install tqdm - - name: Run the Site Archiver + - name: Archive Archivable External Links + shell: bash + run: | + python -c "from scripts.archive_site import *; urls = Path('scripts/archivable_urls/filteredurls.txt').read_text().split(); archive_urls(urls)" + - name: Archive Internal Pages run: | python scripts/archive_site.py