# 🧪 Transfer Learning Assignment (PyTorch)

Автопроверка включена. Заполняйте поля в самом первом блоке.
Где требуется — **не меняйте имена переменных**.

In [1]:
# @title 1) Student Info & Config
# All code comments are in English.

# === ОБЯЗАТЕЛЬНО ЗАПОЛНИТЬ ===
full_name = "Doe John"     # например: "Тощев Александр"
student_group = "11-111"      # например: "208"
assignment_id = "HW_MISMATCH_01"
assert full_name != "Фамилия Имя", "Заполните full_name"
assert student_group != "Группа", "Заполните student_group"
print("✔ Student Info OK")

# Typical human accuracy (benchmark) for MNIST may be ~97-99%.
HUMAN_ACCURACY = 98.0  # @param {type:"number"}

print("Student:", full_name)
print("Human reference accuracy (%):", HUMAN_ACCURACY)

from datetime import datetime, timezone, timedelta

# Установите окна приёма (пример):

start_at_iso = "2025-10-20T09:00-04:00"  #@param {type:"string"}
due_at_iso   = "2025-11-03T23:59-04:00"  #@param {type:"string"}
start_dt = datetime.fromisoformat(start_at_iso)
due_dt   = datetime.fromisoformat(due_at_iso)
# Для протокола: время сдачи берём текущее (можно заменить на mtime файла)
import os
from datetime import datetime, timezone

# 📅 Add submission date based on file modification time
try:
    nb_path = __file__ if "__file__" in globals() else "Transfer_Learning_Assignment.ipynb"
    mtime = os.path.getmtime(nb_path)
    submission_dt = datetime.fromtimestamp(mtime, tz=timezone.utc)
except Exception:
    submission_dt = datetime.utcnow().replace(tzinfo=timezone.utc)

def penalty_fraction(start_dt, due_dt, submission_dt):
    """Возвращает долю штрафа [0..1].
    0 — без штрафа (<= due_dt). Линейно растёт от due_dt к due_dt + (due_dt - start_dt).
    Не выходит за 1.0.
    """
    if submission_dt <= due_dt:
        return 0.0
    total = (due_dt - start_dt).total_seconds()
    late  = (submission_dt - due_dt).total_seconds()
    if total <= 0:
        return 1.0 if late > 0 else 0.0
    return min(1.0, max(0.0, late / total))

print(f"Окно приёма: {start_dt.isoformat()} — {due_dt.isoformat()} (UTC)")
print(f"Время сдачи: {submission_dt.isoformat()} (UTC)")

# Init raw score holder
raw_score = 0.0
max_points = 100


✔ Student Info OK
Student: Doe John
Human reference accuracy (%): 98.0
Окно приёма: 2025-10-20T09:00:00-04:00 — 2025-11-03T23:59:00-04:00 (UTC)
Время сдачи: 2025-10-27T10:45:16.230701+00:00 (UTC)


  submission_dt = datetime.utcnow().replace(tzinfo=timezone.utc)


In [2]:
# @title 2) Environment Check
import torch, torchvision
print("Torch:", torch.__version__)
print("Torchvision:", torchvision.__version__)
print("CUDA available:", torch.cuda.is_available())

Torch: 2.8.0+cu126
Torchvision: 0.23.0+cu126
CUDA available: True


In [3]:
# @title 3) Setup & Utilities
import torch, torch.nn as nn, torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from datetime import datetime
import time, json, math, random, os

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_WORKERS = 2
BATCH_SIZE = # YOUR CODE HERE
NUM_EPOCHS_BASE = # YOUR CODE HERE
SEED = 42
random.seed(SEED); torch.manual_seed(SEED)

print("Device:", DEVICE)

def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

@torch.no_grad()
def evaluate(model, dl, criterion):
    model.eval()
    total, correct, loss_sum = 0, 0, 0.0
    for x, y in dl:
        x, y = x.to(DEVICE), y.to(DEVICE)
        logits = model(x)
        loss = criterion(logits, y)
        loss_sum += loss.item() * y.size(0)
        pred = logits.argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return loss_sum/total, correct/total

def train_epoch(model, dl, optimizer, criterion, scheduler=None):
    model.train()
    loss_sum, total = 0.0, 0
    for x, y in dl:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()
        loss_sum += loss.item() * y.size(0)
        total += y.size(0)
    return loss_sum/total

def run_experiment(name, model, train_dl, val_dl, optimizer, criterion, scheduler=None, epochs=NUM_EPOCHS_BASE):
    t0 = time.time()
    history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
    best = {'val_acc': 0.0, 'state': None}
    for ep in range(1, epochs+1):
        tr_loss = train_epoch(model, train_dl, optimizer, criterion, scheduler)
        val_loss, val_acc = evaluate(model, val_dl, criterion)
        history['train_loss'].append(tr_loss)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        if val_acc > best['val_acc']:
            best['val_acc'] = val_acc
            best['state'] = {k: v.detach().cpu() for k, v in model.state_dict().items()}
        print(f"[{name}] Epoch {ep}/{epochs} | train_loss={tr_loss:.4f} val_loss={val_loss:.4f} val_acc={val_acc:.4f}")
    elapsed = time.time() - t0
    return history, best, elapsed

Device: cuda


In [4]:
# @title 4) Data (CIFAR10 or ImageFolder)
from torchvision import transforms, datasets

DATASET = "CIFAR10"  # @param ["CIFAR10", "IMAGEFOLDER"]
train_tfms = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
val_tfms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
])

if DATASET == "CIFAR10":
    train_ds = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_tfms)
    val_ds   = datasets.CIFAR10(root='./data', train=False, download=True, transform=val_tfms)
    NUM_CLASSES = 10
else:
    # Put your data into data/train and data/val with class subfolders
    train_ds = datasets.ImageFolder('data/train', transform=train_tfms)
    val_ds   = datasets.ImageFolder('data/val', transform=val_tfms)
    NUM_CLASSES = len(train_ds.classes)

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_dl   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

len_train, len_val = len(train_ds), len(val_ds)
print("Classes:", NUM_CLASSES, "| Train size:", len_train, "| Val size:", len_val)

100%|██████████| 170M/170M [01:08<00:00, 2.49MB/s]


Classes: 10 | Train size: 50000 | Val size: 10000


## 5) Task 1 — **Baseline from Scratch** (max 15 pts)
Train **ResNet18 without pretrained weights** or your small CNN.
We score by validation accuracy thresholds.

In [5]:
# @title Run Task 1 (Scratch)
import torch.nn as nn
from torchvision import models

def build_scratch_model(num_classes=NUM_CLASSES):
    m = models.resnet18(weights=None)
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

criterion = nn.CrossEntropyLoss()
scratch_model = build_scratch_model().to(DEVICE)
opt = # YOUR CODE HERE
hist_scratch, best_scratch, time_scratch = run_experiment("scratch", scratch_model, train_dl, val_dl, opt, criterion, epochs=NUM_EPOCHS_BASE)
scratch_acc = max(hist_scratch['val_acc'])

# Scoring (thresholds for CIFAR10 quick run)
t1 = 0
if scratch_acc >= 0.35: t1 = 15
elif scratch_acc >= 0.25: t1 = 10
elif scratch_acc >= 0.15: t1 = 5
else: t1 = 0

raw_score += t1
print(f"Task1 val_acc={scratch_acc:.4f} → +{t1} pts (raw_score={raw_score})")

[scratch] Epoch 1/3 | train_loss=1.4484 val_loss=1.1164 val_acc=0.6025
[scratch] Epoch 2/3 | train_loss=0.9191 val_loss=0.8314 val_acc=0.7080
[scratch] Epoch 3/3 | train_loss=0.7054 val_loss=0.6511 val_acc=0.7822
Task1 val_acc=0.7822 → +15 pts (raw_score=15.0)


## 6) Task 2 — **Feature Extraction** (max 30 pts)
Freeze all convolutional layers of a pretrained ResNet18 and train only the classifier head.

In [6]:
# @title Run Task 2 (Feature Extraction)
def build_feature_extractor(num_classes=NUM_CLASSES):
    m = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    for p in m.parameters():
        p.requires_grad = False
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

fe_model = build_feature_extractor().to(DEVICE)
fe_params = [p for p in fe_model.parameters() if p.requires_grad]
opt_fe = # YOUR CODE HERE
hist_fe, best_fe, time_fe = run_experiment("feature_extract", fe_model, train_dl, val_dl, opt_fe, criterion, epochs=NUM_EPOCHS_BASE)
fe_acc = max(hist_fe['val_acc'])

# Scoring: expect higher than scratch
t2 = 0
if fe_acc >= 0.60: t2 = 30
elif fe_acc >= 0.50: t2 = 22
elif fe_acc >= 0.40: t2 = 15
else: t2 = 5

raw_score += t2
print(f"Task2 val_acc={fe_acc:.4f} → +{t2} pts (raw_score={raw_score})")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 223MB/s]


[feature_extract] Epoch 1/3 | train_loss=0.9338 val_loss=0.7106 val_acc=0.7571
[feature_extract] Epoch 2/3 | train_loss=0.7291 val_loss=0.6771 val_acc=0.7653
[feature_extract] Epoch 3/3 | train_loss=0.7016 val_loss=0.6627 val_acc=0.7722
Task2 val_acc=0.7722 → +30 pts (raw_score=45.0)


## 7) Task 3 — **Partial Fine-Tuning** (max 30 pts)
Unfreeze `layer4` + head, use discriminative LRs and OneCycleLR; light augment already added.

In [7]:
# @title Run Task 3 (Partial Fine-Tuning)
def build_partial_ft(num_classes=NUM_CLASSES):
    m = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    for p in m.parameters(): p.requires_grad = False
    for p in m.layer4.parameters(): p.requires_grad = True
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

pft_model = build_partial_ft().to(DEVICE)
params = [
    {'params': [p for n,p in pft_model.named_parameters() if p.requires_grad and not n.startswith('fc')], 'lr': 1e-4},
    {'params': pft_model.fc.parameters(), 'lr': 1e-3},
]
opt_pft = # YOUR CODE HERE
steps_per_epoch = max(1, len(train_dl))
sched_pft = OneCycleLR(opt_pft, max_lr=[1e-4, 1e-3], epochs=NUM_EPOCHS_BASE, steps_per_epoch=steps_per_epoch)
hist_pft, best_pft, time_pft = run_experiment("partial_finetune", pft_model, train_dl, val_dl, opt_pft, criterion, scheduler=sched_pft, epochs=NUM_EPOCHS_BASE)
pft_acc = max(hist_pft['val_acc'])

t3 = 0
if pft_acc >= 0.65: t3 = 30
elif pft_acc >= 0.55: t3 = 22
elif pft_acc >= 0.45: t3 = 15
else: t3 = 5

raw_score += t3
print(f"Task3 val_acc={pft_acc:.4f} → +{t3} pts (raw_score={raw_score})")

[partial_finetune] Epoch 1/3 | train_loss=0.8432 val_loss=0.3972 val_acc=0.8642
[partial_finetune] Epoch 2/3 | train_loss=0.3250 val_loss=0.2891 val_acc=0.9008
[partial_finetune] Epoch 3/3 | train_loss=0.2177 val_loss=0.2650 val_acc=0.9094
Task3 val_acc=0.9094 → +30 pts (raw_score=75.0)


## 8) Task 4 — **Full Fine-Tuning** (max 15 pts)
Unfreeze the whole model; use OneCycleLR (or cosine annealing).

In [8]:
# @title Run Task 4 (Full Fine-Tuning)
def build_full_ft(num_classes=NUM_CLASSES):
    m = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    m.fc =  # YOUR CODE HERE
    return m

fft_model = build_full_ft().to(DEVICE)
opt_fft = optim.AdamW(fft_model.parameters(), lr=5e-4, weight_decay=1e-4)
sched_fft = OneCycleLR(opt_fft, max_lr=5e-4, epochs=NUM_EPOCHS_BASE, steps_per_epoch=max(1, len(train_dl)))
hist_fft, best_fft, time_fft = run_experiment("full_finetune", fft_model, train_dl, val_dl, opt_fft, criterion, scheduler=sched_fft, epochs=NUM_EPOCHS_BASE)
fft_acc = max(hist_fft['val_acc'])

t4 = 0
if fft_acc >= 0.68: t4 = 15
elif fft_acc >= 0.58: t4 = 11
elif fft_acc >= 0.48: t4 = 7
else: t4 = 3

raw_score += t4
print(f"Task4 val_acc={fft_acc:.4f} → +{t4} pts (raw_score={raw_score})")

[full_finetune] Epoch 1/3 | train_loss=0.5848 val_loss=0.4217 val_acc=0.8570
[full_finetune] Epoch 2/3 | train_loss=0.2858 val_loss=0.2204 val_acc=0.9242
[full_finetune] Epoch 3/3 | train_loss=0.1223 val_loss=0.1389 val_acc=0.9521
Task4 val_acc=0.9521 → +15 pts (raw_score=90.0)


## 9) Bonus (up to +10)
- Domain shift or Grad-CAM analysis.
- Set `bonus_points` (0..10) below if completed and documented in the notebook.

In [9]:
# @title Bonus (manual)
bonus_points = 0.0  # set 0..10 after completing bonus work
raw_score = min(100.0, raw_score + float(bonus_points))
print("raw_score (with optional bonus, capped at 100) →", raw_score)

raw_score (with optional bonus, capped at 100) → 90.0


In [10]:
# @title 10) Summary of Runs
summary = {
    "scratch_acc": float(max(hist_scratch['val_acc'])),
    "feature_extract_acc": float(max(hist_fe['val_acc'])),
    "partial_ft_acc": float(max(hist_pft['val_acc'])),
    "full_ft_acc": float(max(hist_fft['val_acc'])),
    "epochs_per_task": int(NUM_EPOCHS_BASE),
    "device": DEVICE,
}
print(json.dumps(summary, indent=2))

{
  "scratch_acc": 0.7822,
  "feature_extract_acc": 0.7722,
  "partial_ft_acc": 0.9094,
  "full_ft_acc": 0.9521,
  "epochs_per_task": 3,
  "device": "cuda"
}


In [11]:
# @title 11) Finalize & Grade (Penalty + JSON)
import json

# применяем штраф
try:
    pf = penalty_fraction(start_dt, due_dt, submission_dt)
except NameError:
    from datetime import timezone
    pf = 0.0
# ✅ Итоговый результат
max_points=100
final_score = max(0.0, raw_score * (1.0 - min(1.0, pf)))

print(f"Сырой балл: {raw_score}/{max_points}")
print(f"Штраф (доля): {pf:.4f}")
print(f"Итоговый балл после штрафа: {final_score:.2f}/{max_points}")

# Последняя строка — JSON, который читает harness
final = {
    "name": full_name,
    "group": student_group,
    "assignment": assignment_id,
    "score": float(final_score)
}

print(json.dumps(final, ensure_ascii=False))

Сырой балл: 90.0/100
Штраф (доля): 0.0000
Итоговый балл после штрафа: 90.00/100
{"name": "Doe John", "group": "11-111", "assignment": "HW_MISMATCH_01", "score": 90.0}
