In [1]:
import json
import requests
import re
import os
import threading
import time

from concurrent.futures import ThreadPoolExecutor, as_completed

from utils.download import *
from utils.cookies import load_cookie_dict
from utils.CLI import print_progress_bar
from utils.API_scraping import sizeof_fmt

In [2]:

course_file_path = "./meta-downloads/course-files.json"
with open(course_file_path, "r") as f:
  course_files = json.load(f)

lecture_link_path = "./meta-downloads/lecture-links.json"
with open(lecture_link_path, "r") as f:
  lecture_links = json.load(f)

canvas_cookie_file = "canvas-cookies.json"
request_cookies = load_cookie_dict(canvas_cookie_file)

In [3]:
"""
Of format:
{
  Coursename: [
    [
      "Filename",
      "Filesize",
      "File download Link"
    ],
    ...
  ],
}

Dict maps courses to course files
-> each course maps to an array of files.
  -> Each file array has a file tuple of stucture (name, size, link)
"""
print(json.dumps(course_files, indent=2))

{
  "NPB 173 001 SQ 2025": [
    [
      "NPB173_lecture1_2025.pdf",
      "2.1MB",
      "https://canvas.ucdavis.edu/files/27274322/download?download_frd=1"
    ],
    [
      "NPB173_lecture2_2025.pdf",
      "2.9MB",
      "https://canvas.ucdavis.edu/files/27281704/download?download_frd=1"
    ],
    [
      "NPB173_lecture3_2025.pdf",
      "11.8MB",
      "https://canvas.ucdavis.edu/files/27324438/download?download_frd=1"
    ]
  ],
  "ECS 170 001 SQ 2025": [
    [
      "1.Class-intro.pdf",
      "2.4MB",
      "https://canvas.ucdavis.edu/files/27264716/download?download_frd=1"
    ],
    [
      "2. Intelligent Agents.pdf",
      "708.0KB",
      "https://canvas.ucdavis.edu/files/27285854/download?download_frd=1"
    ],
    [
      "3. Introduction to Search.pdf",
      "569.3KB",
      "https://canvas.ucdavis.edu/files/27301932/download?download_frd=1"
    ],
    [
      "4. Uninformed Search.pdf",
      "1.3MB",
      "https://canvas.ucdavis.edu/files/27320262/download?downloa

In [4]:
"""
Of format: (name, original_url, download_url)
{
  Coursename: [
    [
      "Lecture name",
      "Lecture URL",
      "Lecture download URL"
    ],
    ...
  ],
}

Dict maps courses to course lectures
-> each course maps to an array of lectures.
  -> Each lecture array has a lecture tuple of stucture (name, OG link, download link)
"""

print(json.dumps(lecture_links, indent=2))

{
  "NPB 173 001 SQ 2025": [
    [
      "NPB-173: 2025-04-01 08:57",
      "/media/NPB-173%3A+2025-04-01+08%3A57/1_a0adrqp5/374210312",
      "https://www.kaltura.com/api_v3/service/attachment_attachmentasset/action/serve/attachmentAssetId/1_5g8ce8s5?ks=djJ8MTc3MDQwMXy1EC0ljwVlxHJU4vPqJYZ2Q7kEbnkDsTIOQ7QEyMgZY9dNarysUnmLMqa8b6YsvSMqdQ9KnkDOIx-fB7mQB2cIlVZsfEMBE5hKYwJ_IbKVCSTyYj-VdhNyb7_-xyTgyyXN8hoPbgcMMf7dXTMiem_AtIBUvXn5TU-N_cNW9G4OwcnGXkde5X_jRNFK9Al5gTLmkH8YD7Mn4kBus5YrweFTV5nLDeRsftGNtRNv1TJUCfGFrrltVpus3pmQDbVLbEFPH5cElH2xIqcDXNWd36STfba0vJ6N_sbczVThCI6spl45i05sqlcA-sOAE4y2HnsvbZih-Pv0LRDPQ62UlXTiSEXxq8TZ8lhWpGI7X2RJB9GAbrupT4F2JYMuxmS4cmQcWD7iPbg2IQbcvMoZmNDfz2AjFUThGQCgC7x4r40tXPu9XtDEeWmlvDliGCZ3QyMEI3IWaH2MnJhM4O5TW5XOkYA6MsHbLuwEtmxRCVKdQK732PByb8NEPlpRWnqYW2f3K535AHkFDMDyzPgNopHU8ZLhoMbc2k5s4ScD3zSX9Q=="
    ],
    [
      "NPB-173: 2025-04-03 08:57",
      "/media/NPB-173%3A+2025-04-03+08%3A57/1_xg32i4yt/374210312",
      "https://www.kaltura.com/api_v3/service/attachment

In [5]:

total_files = 0
no_files = []
for coursename, coursefiles in course_files.items():
  total_files += len(coursefiles)
  if len(coursefiles) == 0:
    no_files.append(coursename[:8].strip())
print(total_files)
print(f"Classes where no resources where found, {len(no_files)}: {no_files}") # Might be a good idea to take files specifically from these.

1409
Classes where no resources where found, 10: ['ECS 189G', 'ECS 154A', 'PSC 120', 'CMN 152V', 'ECS 050', 'MAT 021B', 'PSC 001Y', 'PSC 100Y', 'PSC 130', 'PSC 103A']


In [6]:

total_lectures = 0
no_lectures = []
for coursename, course_lectures in lecture_links.items():
  total_lectures += len(course_lectures)
  if len(course_lectures) == 0:
    no_lectures.append(coursename[:8].strip())
print(total_lectures)
print(f"Classes with a Media Gallery but no lectures, {len(no_lectures)}: {no_lectures}") # Might be a good idea to take files specifically from these.

417
Classes with a Media Gallery but no lectures, 13: ['ECS 170', 'ECS 189G', 'ECH 080', 'EAE 001', 'ENL 003', 'MAT 022A', 'MAT 107', 'MAT 022A', 'PHI 112', 'PHY 009A', 'PSC 103A', 'WLD 943', 'WLD 941']


In [8]:

# Feel free to define your own interface. I would recommend it even.


# Type carefully if using this
download_file_dict = {}
for i, (coursename, coursefiles) in enumerate(course_files.items()):
  print(f"{i+1}/{len(course_files)} - ", end="")

  if len(coursefiles) == 0:
    print(f"No files for {coursename[:8].strip()}")
    continue

  print(f"{coursename[:8].strip()}: Pick which of the {len(coursefiles)} to download")
  time.sleep(0.2) # helps the output work better. VS Code breaking TUI
  choice = input("Check files? y/n, or e to exit  - ")
  if 'e' in choice.lower() or 'b' in choice.lower():
    break

  if 'y' in choice.lower():
    # (name, size, link)
    for coursefile in coursefiles:
      print(f"\t{coursefile[0]:<40} {coursefile[1] if coursefile[1] else 'Unknown' :>6}")
    time.sleep(0.2)
    download_select = input("Download ALL (a), REGEX PATTERN (r),  PDF/PPTX selector (p), or SELECT (s). ")

    download_files = []
    if 'a' in download_select.lower():
      download_files = coursefiles
    elif 'r' in download_select.lower():
      download_files = regex_selector(coursefiles)
    elif 'p' in download_select.lower():
      download_files = pdf_pptx_selector(coursefiles)
    elif 's' in download_select.lower():
      download_files = prompt_selector(coursefiles)
    else: 
      print("No valid selector found, moving on")
    download_file_dict[coursename[:8].strip()] = download_files
    print(f"\tSelected {len(download_files)}: {[file[0] for file in download_files]}")




1/47 - NPB 173: Pick which of the 3 to download
	NPB173_lecture1_2025.pdf                  2.1MB
	NPB173_lecture2_2025.pdf                  2.9MB
	NPB173_lecture3_2025.pdf                 11.8MB
	Selected 3: ['NPB173_lecture1_2025.pdf', 'NPB173_lecture2_2025.pdf', 'NPB173_lecture3_2025.pdf']
2/47 - ECS 170: Pick which of the 5 to download
	1.Class-intro.pdf                         2.4MB
	2. Intelligent Agents.pdf                708.0KB
	3. Introduction to Search.pdf            569.3KB
	4. Uninformed Search.pdf                  1.3MB
	UCD_ECS_170_001_SQ_2025_Python_Tutorial_0403.ipynb 506.3KB
	Download: 1.Class-intro.pdf? y
	Download: 2. Intelligent Agents.pdf? y
	Download: 3. Introduction to Search.pdf? y
	Download: 4. Uninformed Search.pdf? y
	Download: UCD_ECS_170_001_SQ_2025_Python_Tutorial_0403.ipynb? n
	Selected 4: ['1.Class-intro.pdf', '2. Intelligent Agents.pdf', '3. Introduction to Search.pdf', '4. Uninformed Search.pdf']
3/47 - No files for ECS 189G
4/47 - No files for ECS 154

In [None]:
def parse_size(size_str):
  if not size_str:
    return 0
  size_str = size_str.strip().upper()
  match = re.match(r"([\d.]+)([KMGT]?B)", size_str)
  if not match:
      return 0
  size, unit = match.groups()
  size = float(size)
  units = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4}
  return size * units[unit]

total_size = 0
zero_resources = []
large = []
for class_name, file_list in download_file_dict.items():
  total_size += sum(parse_size(s[1]) for s in file_list)

print("\n-----")
print(f"Total file download would be: {sizeof_fmt(total_size)}")
print(json.dumps(download_file_dict, indent=2))


-----
Total file download would be: 1.6GB
{
  "NPB 173": [
    [
      "NPB173_lecture1_2025.pdf",
      "2.1MB",
      "https://canvas.ucdavis.edu/files/27274322/download?download_frd=1"
    ],
    [
      "NPB173_lecture2_2025.pdf",
      "2.9MB",
      "https://canvas.ucdavis.edu/files/27281704/download?download_frd=1"
    ],
    [
      "NPB173_lecture3_2025.pdf",
      "11.8MB",
      "https://canvas.ucdavis.edu/files/27324438/download?download_frd=1"
    ]
  ],
  "ECS 170": [
    [
      "1.Class-intro.pdf",
      "2.4MB",
      "https://canvas.ucdavis.edu/files/27264716/download?download_frd=1"
    ],
    [
      "2. Intelligent Agents.pdf",
      "708.0KB",
      "https://canvas.ucdavis.edu/files/27285854/download?download_frd=1"
    ],
    [
      "3. Introduction to Search.pdf",
      "569.3KB",
      "https://canvas.ucdavis.edu/files/27301932/download?download_frd=1"
    ],
    [
      "4. Uninformed Search.pdf",
      "1.3MB",
      "https://canvas.ucdavis.edu/files/273202

In [13]:
print_lock = threading.Lock()

file_futures = []

total_resources = sum(len(resources) for resources in download_file_dict.values())
with ThreadPoolExecutor(max_workers=10) as executor:
  for coursename, course_resources in download_file_dict.items():
    for resource in course_resources:
      # resource = (name, OG link, download link)
      name, _, download_link = resource
      if name == "Error" or download_link is None:
        # print(resource)
        continue

      if download_link and download_link[0] == "/": # If a relative URL
        download_link = "https://canvas.ucdavis.edu" + download_link

      future = executor.submit(
          file_download,
          'course-downloads',
          coursename[:8].strip(),
          f"{name}",
          download_link,
          request_cookies
      )
      file_futures.append(future)

  for i, future in enumerate(as_completed(file_futures)):
    try:
      result = future.result()
      with print_lock:
        print("Download result:", result)
        print_progress_bar(i + 1, total_resources, prefix=" "*25)
    except Exception as e:
      with print_lock:
        print("Error during download:", e)


Download result: True
Download result: True     |----------------------------------------------------------------------------------------------------| 0.1% 
Download result: True     |----------------------------------------------------------------------------------------------------| 0.2% 
Download result: True     |----------------------------------------------------------------------------------------------------| 0.3% 
Download result: True     |----------------------------------------------------------------------------------------------------| 0.5% 
Download result: True     |----------------------------------------------------------------------------------------------------| 0.6% 
Download result: True     |----------------------------------------------------------------------------------------------------| 0.7% 
Download result: True     |----------------------------------------------------------------------------------------------------| 0.8% 
Download result: True     |------

In [None]:

lecture_futures = []
with ThreadPoolExecutor(max_workers=10) as executor:
  for coursename, course_lectures in lecture_links.items():
    for lecture in course_lectures:
      # lecture = (name, OG link, download link)
      name, _, download_link = lecture
      if name == "Error" or download_link is None:
        # print(lecture)
        continue

      future = executor.submit(
          file_download,
          'course-lectures',
          coursename[:8].strip(),
          f"{name}.txt",
          download_link,
          request_cookies
      )
      lecture_futures.append(future)

  for future in as_completed(lecture_futures):
    try:
      result = future.result()
      with print_lock:
        print("Download result:", result)
        print_progress_bar(i + 1, len(lecture_links))
    except Exception as e:
      with print_lock:
        print("Error during download:", e)

['Error', '/media/BIS-002A%3A+2024-02-23+10%3A57/1_ysuo7sz2/328158312', 'https://aggievideo.canvas.ucdavis.edu/media/BIS-002A%3A+2024-02-23+10%3A57/1_ysuo7sz2/328158312']
['Error', '/media/ECN-001A%3A+2023-10-24+18%3A07/1_0od9s2hv/318463342', 'https://aggievideo.canvas.ucdavis.edu/media/ECN-001A%3A+2023-10-24+18%3A07/1_0od9s2hv/318463342']
['Error', '/media/ECN-001A%3A+2023-11-02+18%3A07/1_mfyf7ngt/318463342', 'https://aggievideo.canvas.ucdavis.edu/media/ECN-001A%3A+2023-11-02+18%3A07/1_mfyf7ngt/318463342']
['Error', '/media/ECN-001A%3A+2023-11-21+18%3A07/1_59lh4oc2/318463342', 'https://aggievideo.canvas.ucdavis.edu/media/ECN-001A%3A+2023-11-21+18%3A07/1_59lh4oc2/318463342']
['Error', '/media/ECS-122A%3A+2024-10-31+19%3A37/1_39a17rud/358323452', 'https://aggievideo.canvas.ucdavis.edu/media/ECS-122A%3A+2024-10-31+19%3A37/1_39a17rud/358323452']
['Error', '/media/ECS-132%3A+2024-10-31+15%3A07/1_gp94u2xa/358333622', 'https://aggievideo.canvas.ucdavis.edu/media/ECS-132%3A+2024-10-31+15%3A07