Skip to content

Archive.org Saver

Archive.org Saver #11

Workflow file for this run

name: Archive.org Saver
on:
workflow_dispatch:
schedule:
- cron: "40 3 26 5,11 *"
jobs:
Archive:
env:
LOGFILE: "Links/7_lychee main_content prod(^content).txt"
GH_TOKEN: ${{ secrets.BUILD_ACTION_TOKEN }}
runs-on: ubuntu-latest
steps:
- name: Checkout the Code
uses: actions/checkout@v3
with:
ref: main
- name: Install Dependencies
run: |
cd scripts/archivable_urls
RUNID=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/repos/buddhist-uni/buddhist-uni.github.io/actions/workflows/9334935/runs" -q '.workflow_runs[0].id')
echo "Last runid was $RUNID"
gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/repos/buddhist-uni/buddhist-uni.github.io/actions/runs/$RUNID/logs" > logs.zip
unzip logs.zip "$LOGFILE"
mv "$LOGFILE" "lycheeout.txt"
python extracturls.py
python filterurls.py # creates scripts/archivable_urls/filteredurls.txt
cd ~
printf "${{ secrets.ARCHIVE_ORG_AUTH }}" > archive.org.auth
pip install tqdm titlecase pyyaml
- name: Archive Archivable External Links
shell: bash
run: |
cd scripts
python -c "from archivedotorg import *; urls = Path('archivable_urls/filteredurls.txt').read_text().split(); archive_urls(urls)"
- name: Archive Internal Pages
run: |
cd scripts
python archive_site.py