# Scrape Templates
---
### ▶▶ Run All
---

In [1]:
! python -m pip install -r ../requirements.txt

Defaulting to user installation because normal site-packages is not writeable


In [2]:
# Add the src directory to the path

import sys

src_path = "../src/"

sys.path.append(src_path)
print(f"📘 Added src directory to the path: {src_path}")

📘 Added src directory to the path: ../src/


In [3]:
# Get username

import os
import re

username = os.environ.get('Username')

if not username or not re.match(r"e\d{7}", username): # ask username if not staff id
    username = input("Enter your staff e-number: ")

print(f'🔹 Username: {username}')

🔹 Username: e9402338


In [4]:
# Load VU Template site

import requests
import getpass
from requests_ntlm import HttpNtlmAuth
from utils import templates_url, StopExecution

print(f"🔵 Creating session for user: {username}")
session = requests.Session()

while True:
  password = getpass.getpass("Enter your staff password: ")
  session.auth = HttpNtlmAuth(username, password)

  if password == '':
    password = None  # Clear password
    print("🟤 Session aborted by user")
    raise StopExecution
  
  password = None  # Clear password
  print("🔵 Attempting authentication...")
  
  try:
    response = session.get(templates_url)
    
    if response.status_code == 200:
      print(f"🟢 Authenticated user: {username}")
      print(f"🟢 Successfully loaded the VU Template site")
      break
      
    elif response.status_code == 401:
      print("🔴 Authorization failed.")
      print("🟠 Please check your credentials and try again.")
      continue
      
    else:
      print(f"🔴 Unexpected response: {response.status_code}")
      raise StopExecution
      
  except requests.exceptions.ConnectionError as e:
    if "Failed to resolve" in str(e) or "[Errno 11001] getaddrinfo failed" in str(e):
      print(
        "🔴 Unable to connect to VU intranet.\n"
        "🟠 Please ensure you are:\n"
        "    > Connected directly to a VU network, or\n"
        "    > Connected remotely using Cisco AnyConnect")
      raise StopExecution


🔵 Creating session for user: e9402338
🔵 Attempting authentication...
🟢 Authenticated user: e9402338
🟢 Successfully loaded the VU Template site


In [5]:
# Parse HTML of webpage

from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")

print("🟢 Successfully parsed HTML page")

🟢 Successfully parsed HTML page


In [6]:
# Find matching templates

from utils import template_titles

anchors = soup.find_all('a', href=True)
# print(anchors)

def clean_text(text):
  # strip leading and trailing whitespace
  # replace multiple whitespace with single space
  return ' '.join(text.strip().split())

matching_hrefs = []
for a in anchors:
    parent_tr = a.find_parent('tr')
    sibling_td = parent_tr.find('td')
    template_title = clean_text(sibling_td.get_text())
    if (template_title in template_titles):
      # print(template_title, '\n', a['href'], '\n')
      matching_hrefs.append(a['href'])
            
# print(matching_hrefs)
print("🟢 Found", len(matching_hrefs), "matching templates")

🟢 Found 10 matching templates


In [8]:
# Download template files

from datetime import datetime
import os
import shutil
import urllib.parse
from utils import Paths, base_url, StopExecution

def download_file(url, file_path):
  with session.get(url, stream=True) as r:
    with open(file_path, 'wb') as f:
      shutil.copyfileobj(r.raw, f)

# create VU Templates directory, if it doesnt exist yet
os.makedirs(Paths.VU_Templates, exist_ok=True)

if len(matching_hrefs) == 0:
  print("🔴 Found no matching files to download")
  raise StopExecution
      
for href in matching_hrefs:
  file_name = urllib.parse.unquote(os.path.basename(href))
  file_path = os.path.join(Paths.VU_Templates, file_name)
  download_file(base_url + href, file_path)
  print(f"🔹 Downloaded {file_name} ...")

# Save a timestamp to a file
with open(os.path.join(Paths.VU_Templates, "last_downloaded.txt"), "w") as f:
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  f.write(timestamp)
  
print(f'\n🟢 Completed downloading templates to: {Paths.VU_Templates}')
print(f'🔹 Last downloaded on: {timestamp}')

🔹 Downloaded VETLearningandAssessmentPlan.docx ...
🔹 Downloaded VET Learning Activity template v1.0.docx ...
🔹 Downloaded WrittenAssesssmentTemplate v2.0.docx ...
🔹 Downloaded VETUnitGuide.docx ...
🔹 Downloaded VETAssessorGuide.docx ...
🔹 Downloaded VETAssessmentMapping(CurrentUoC).docx ...
🔹 Downloaded TAFEAssessmentCoverSheet.pdf ...
🔹 Downloaded PracticalObservationAssessmentTemplatePortrait v2.0.docx ...
🔹 Downloaded PracticalObservationAssessmentTemplateLandscape v2.0.docx ...
🔹 Downloaded SkillsRecognitionAssessorKit.docx ...

🟢 Completed downloading templates to: ../Templates/VU/
🔹 Last downloaded on: 2025-03-27 10:57:01
