In [8]:
import torch
torch.cuda.is_available()

True

In [9]:
import subprocess
import sys

print("Installing required packages...")

# Install torch first with proper index
print("  Installing torch...", end=" ", flush=True)
subprocess.check_call([
    sys.executable, "-m", "pip", "install", 
    "--index-url", "https://download.pytorch.org/whl/cpu",
    "torch", "-q"
])
print("✓")

# Install other packages
packages = [
    "tiktoken",
    "psutil",
    "regex",
    "pytest",
    "numpy",
    "einops",
    "datasets",
    "tqdm"
]

for package in packages:
    print(f"  Installing {package}...", end=" ", flush=True)
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
    print("✓")

print("\n✓ All packages installed successfully!")

# Verify torch installation
import torch
print(f"✓ PyTorch version: {torch.__version__}")
print(f"✓ CUDA available: {torch.cuda.is_available()}")

Installing required packages...
  Installing torch... ✓
  Installing tiktoken... ✓
  Installing psutil... ✓
  Installing regex... ✓
  Installing pytest... ✓
  Installing numpy... ✓
  Installing einops... ✓
  Installing datasets... ✓
  Installing tqdm... ✓

✓ All packages installed successfully!
✓ PyTorch version: 2.9.0+cu128
✓ CUDA available: True


In [10]:
from tqdm import tqdm
tqdm.pandas()

# Configure tqdm to work better in notebooks
import sys
if 'ipykernel' in sys.modules:
    from tqdm.notebook import tqdm as tqdm_notebook
    print("✓ Running in Jupyter - using notebook-optimized progress bars")

✓ Running in Jupyter - using notebook-optimized progress bars


In [11]:
%cd /content
!rm -rf cs288-sp26-a2
!git clone https://github.com/andersvestrum/cs288-sp26-a2.git
%cd cs288-sp26-a2

/content
Cloning into 'cs288-sp26-a2'...
remote: Enumerating objects: 183, done.[K
remote: Counting objects: 100% (183/183), done.[K
remote: Compressing objects: 100% (129/129), done.[K
remote: Total 183 (delta 81), reused 151 (delta 49), pack-reused 0 (from 0)[K
Receiving objects: 100% (183/183), 2.89 MiB | 29.26 MiB/s, done.
Resolving deltas: 100% (81/81), done.
/content/cs288-sp26-a2


In [None]:
!python part4/setup_datasets.py


CS288 Part 4 - Dataset Setup

Downloading TinyStories dataset...
Total stories: 2,119,719
  Processed 100,000 stories...
  Processed 200,000 stories...
  Processed 300,000 stories...
  Processed 400,000 stories...
  Processed 500,000 stories...
  Processed 600,000 stories...
  Processed 700,000 stories...
  Processed 800,000 stories...
  Processed 900,000 stories...
  Processed 1,000,000 stories...
  Processed 1,100,000 stories...
  Processed 1,200,000 stories...
  Processed 1,300,000 stories...
  Processed 1,400,000 stories...
  Processed 1,500,000 stories...
  Processed 1,600,000 stories...
  Processed 1,700,000 stories...
  Processed 1,800,000 stories...
  Processed 1,900,000 stories...
  Processed 2,000,000 stories...
  Processed 2,100,000 stories...

Saved to: /content/cs288-sp26-a2/part4/fixtures/tinystories_full.txt
File size: 1845.1 MB
Also created 100k subset: /content/cs288-sp26-a2/part4/fixtures/tinystories_100k.txt

Downloading SQuAD v1.1 dataset...
Training examples: 87,5

In [None]:
# Generate augmented QA training data first
!python part4/augment_qa_data.py

# Train with --small config (uses augmented in-domain QA data)
!python part4/train_baseline.py --small

python3: can't open file '/content/cs288-sp26-a2/part4/augment_qa_data.py': [Errno 2] No such file or directory
CS288 Part 4 - Baseline Training

Configuration: small
Device: cuda

STEP 1: Training BPE Tokenizer
Input: /content/cs288-sp26-a2/part4/fixtures/tinystories_100k.txt
Vocab size: 4096
Special tokens: ['<|endoftext|>', '<|pad|>']

Tokenizer trained!
  Vocab size: 4096
  Merges: 3838

Test encoding:
  Input:   'Once upon a time, there was a little girl.'
  Tokens:  11 tokens
  Decoded: 'Once upon a time, there was a little girl.'

STEP 2: Pretraining Language Model

Model architecture:
  d_model: 256
  num_layers: 6
  num_heads: 8
  d_ff: 1024
  context_length: 512
  Parameters: 8,391,936

Training data:
  File: /content/cs288-sp26-a2/part4/fixtures/tinystories_100k.txt
  Documents: 88670
  Batches/epoch: 2771

Training for 3 epoch(s)...
  Training 3 epochs, 2771 batches/epoch
  Epoch 1/3: loss=2.7517 (201.2s)
  Epoch 2/3: loss=1.9493 (199.7s)
  Epoch 3/3: loss=1.8095 (199.9s)



In [None]:
from google.colab import drive
drive.mount('/content/drive')

import shutil, os

src_dir = '/content/cs288-sp26-a2/part4/outputs'
dst_dir = '/content/drive/MyDrive/cs288-a2-outputs'
os.makedirs(dst_dir, exist_ok=True)

for name in ['finetuned_predictions.json', 'prompting_predictions.json']:
    src = os.path.join(src_dir, name)
    dst = os.path.join(dst_dir, name)
    shutil.copy2(src, dst)
    print(f"✓ Copied {name} to Google Drive")

print(f"\nFiles saved to: {dst_dir}")

Mounted at /content/drive
✓ Copied finetuned_predictions.json to Google Drive
✓ Copied prompting_predictions.json to Google Drive

Files saved to: /content/drive/MyDrive/cs288-a2-outputs
