# Update Superconductor VAE from GitHub

Run this notebook to pull the latest code from GitHub into your Google Drive repo.
Preserves checkpoints and training logs. Invalidates tensor cache if data changed.

**Keep this notebook on Drive** — reuse it anytime you need to update.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# === CONFIGURATION ===
# Update this path if your repo is in a different Drive location
REPO_PATH = "/content/drive/My Drive/Colab Notebooks/SuperconductorVAE/superconductor-vae"
GITHUB_URL = "https://github.com/jamesconde/superconductor-vae.git"
BRANCH = "main"

In [None]:
import os
import shutil
from pathlib import Path

repo = Path(REPO_PATH)
is_git_repo = (repo / '.git').exists()

if is_git_repo:
    # --- Fast path: git pull ---
    print("Git repo detected — pulling latest changes...")
    !cd "{REPO_PATH}" && git fetch origin {BRANCH} && git reset --hard origin/{BRANCH}
    print("Done!")

else:
    # --- First time or tarball extraction: clone fresh, preserve outputs ---
    print("No .git directory — cloning fresh from GitHub...")
    print("Preserving outputs/ (checkpoints, logs, norm_stats)...")

    # Save outputs we want to keep
    preserve_dir = Path("/content/_preserved_outputs")
    preserve_dir.mkdir(exist_ok=True)
    outputs_dir = repo / "outputs"

    preserved_files = []
    if outputs_dir.exists():
        for f in outputs_dir.iterdir():
            if f.is_file() and f.suffix in ('.pt', '.json', '.csv', '.log'):
                dest = preserve_dir / f.name
                print(f"  Preserving: {f.name} ({f.stat().st_size / 1e6:.1f} MB)")
                shutil.copy2(str(f), str(dest))
                preserved_files.append(f.name)

    # Also preserve tensor cache if it exists
    cache_dir = repo / "data" / "processed" / "cache"
    preserved_cache = False
    if cache_dir.exists():
        print(f"  Preserving tensor cache...")
        shutil.copytree(str(cache_dir), "/content/_preserved_cache", dirs_exist_ok=True)
        preserved_cache = True

    # Remove old repo and clone
    if repo.exists():
        shutil.rmtree(str(repo))
    !git clone --branch {BRANCH} {GITHUB_URL} "{REPO_PATH}"

    # Restore outputs
    (repo / "outputs").mkdir(exist_ok=True)
    for fname in preserved_files:
        src = preserve_dir / fname
        dest = repo / "outputs" / fname
        shutil.copy2(str(src), str(dest))
        print(f"  Restored: {fname}")

    # Restore cache
    if preserved_cache:
        cache_dest = repo / "data" / "processed" / "cache"
        cache_dest.mkdir(parents=True, exist_ok=True)
        shutil.copytree("/content/_preserved_cache", str(cache_dest), dirs_exist_ok=True)
        print("  Restored tensor cache")

    # Cleanup temp
    shutil.rmtree(str(preserve_dir), ignore_errors=True)
    shutil.rmtree("/content/_preserved_cache", ignore_errors=True)

    print(f"\nDone! Repo is now a git clone at: {REPO_PATH}")
    print("Future updates will use fast 'git pull'.")

In [None]:
# === Optional: Invalidate tensor cache ===
# Run this cell if the dataset CSV or preprocessing config changed.
# Training will rebuild the cache on next run (~2 min).

cache_meta = Path(REPO_PATH) / "data/processed/cache/cache_meta.json"
if cache_meta.exists():
    cache_meta.unlink()
    print("Tensor cache invalidated — will rebuild on next training run.")
else:
    print("No cache to invalidate (already fresh or doesn't exist).")

In [None]:
# === Verify ===
print("Repo contents:")
!ls "{REPO_PATH}/"
print()
print("Latest commit:")
!cd "{REPO_PATH}" && git log --oneline -3
print()
print("Outputs:")
!ls -lh "{REPO_PATH}/outputs/" 2>/dev/null | head -10
print()
print("Now close this notebook and open train_colab.ipynb to start training.")