# **Day 1 – Dataset Curation & Blurring Pipeline**

### **Mount Google Drive and Setup Paths**

In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Set project root directory
project_root = '/content/drive/MyDrive/ImageSharpening_KD'
data_root = os.path.join(project_root, 'data')

# Dataset directories
dirs = [
    'whole_dataset',
    'sharp/train/train', 'sharp/train/test', 'sharp/benchmark',
    'blurry/train/train', 'blurry/train/test', 'blurry/benchmark'
]

# Create folder structure
for d in dirs:
    os.makedirs(os.path.join(data_root, d), exist_ok=True)

print("Folder structure created.")


Mounted at /content/drive
Folder structure created.


### **Download DIV2K Dataset**

In [None]:
!wget -O DIV2K_train_HR.zip https://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_train_HR.zip
!unzip -q DIV2K_train_HR.zip -d temp_DIV2K

import shutil

# Move all original PNGs to whole_dataset
whole_dataset_path = os.path.join(data_root, 'whole_dataset')
for file in os.listdir('temp_DIV2K/DIV2K_train_HR'):
    shutil.move(f'temp_DIV2K/DIV2K_train_HR/{file}', whole_dataset_path)

print("DIV2K images moved to whole_dataset/")


--2025-06-24 15:13:54--  https://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_train_HR.zip
Resolving data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)... 129.132.52.178, 2001:67c:10ec:36c2::178
Connecting to data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)|129.132.52.178|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3530603713 (3.3G) [application/zip]
Saving to: ‘DIV2K_train_HR.zip’


2025-06-24 15:16:53 (18.9 MB/s) - ‘DIV2K_train_HR.zip’ saved [3530603713/3530603713]

DIV2K images moved to whole_dataset/


### **Blur and Crop to Patches (256x256)**

In [None]:
import cv2
import numpy as np
from PIL import Image
import random
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Configuration
PATCH_SIZE = 256
JPEG_QUALITY = 90

# Load image paths
image_paths = sorted([os.path.join(whole_dataset_path, img) for img in os.listdir(whole_dataset_path) if img.endswith('.png')])

# Split: 90% train_set, 10% benchmark_set
train_paths, benchmark_paths = train_test_split(image_paths, test_size=0.1, random_state=42)

# Further split train: 80% train, 20% test
train_split, test_split = train_test_split(train_paths, test_size=0.2, random_state=42)

def process_and_save(image_path, sharp_dir, blurry_dir):
    img = cv2.imread(image_path)
    h, w, _ = img.shape

    # Crop into non-overlapping 256x256 patches
    for i in range(0, h - PATCH_SIZE + 1, PATCH_SIZE):
        for j in range(0, w - PATCH_SIZE + 1, PATCH_SIZE):
            patch = img[i:i+PATCH_SIZE, j:j+PATCH_SIZE]

            # Save sharp patch
            sharp_img = Image.fromarray(cv2.cvtColor(patch, cv2.COLOR_BGR2RGB))

            # Generate blurry patch via down-up scaling
            downscaled = cv2.resize(patch, (PATCH_SIZE//4, PATCH_SIZE//4), interpolation=cv2.INTER_CUBIC)
            upscaled = cv2.resize(downscaled, (PATCH_SIZE, PATCH_SIZE), interpolation=cv2.INTER_CUBIC)
            blurry_img = Image.fromarray(cv2.cvtColor(upscaled, cv2.COLOR_BGR2RGB))

            # Use image name + location as ID
            name = os.path.basename(image_path).replace('.png', f'_{i}_{j}.jpg')
            sharp_img.save(os.path.join(sharp_dir, name), quality=JPEG_QUALITY)
            blurry_img.save(os.path.join(blurry_dir, name), quality=JPEG_QUALITY)

# Process each split
for path in tqdm(train_split, desc="Processing training patches"):
    process_and_save(path,
                     os.path.join(data_root, 'sharp/train/train'),
                     os.path.join(data_root, 'blurry/train/train'))

for path in tqdm(test_split, desc="Processing testing patches"):
    process_and_save(path,
                     os.path.join(data_root, 'sharp/train/test'),
                     os.path.join(data_root, 'blurry/train/test'))

for path in tqdm(benchmark_paths, desc="Processing benchmark patches"):
    process_and_save(path,
                     os.path.join(data_root, 'sharp/benchmark'),
                     os.path.join(data_root, 'blurry/benchmark'))

print("✅ All sharp-blurry patches created and stored.")


Processing training patches: 100%|██████████| 576/576 [13:12<00:00,  1.38s/it]
Processing testing patches: 100%|██████████| 144/144 [03:16<00:00,  1.37s/it]
Processing benchmark patches: 100%|██████████| 80/80 [02:06<00:00,  1.58s/it]

✅ All sharp-blurry patches created and stored.





### **Dataset Structure & Image Count Validation**

In [None]:
import os

# Define dataset folders to check
folder_check_list = [
    'sharp/train/train',
    'sharp/train/test',
    'sharp/benchmark',
    'blurry/train/train',
    'blurry/train/test',
    'blurry/benchmark'
]

print("🔍 Verifying dataset storage in Google Drive...\n")

total_images = 0
for folder in folder_check_list:
    path = os.path.join(data_root, folder)
    images = [f for f in os.listdir(path) if f.endswith('.jpg')]
    count = len(images)
    total_images += count
    print(f"📁 {folder:<25} → {count} images")

print("\n✅ Total image patches saved:", total_images)

# Optional: check if drive path really exists (no temp fallback)
if "/content/drive" in data_root and os.path.exists(data_root):
    print("✅ Data saved permanently in Google Drive ✔️")
else:
    print("❌ WARNING: Dataset not saved in Drive! Check mount path.")

# Optional: assert all folders have data
empty_folders = [f for f in folder_check_list if len(os.listdir(os.path.join(data_root, f))) == 0]
if empty_folders:
    print("\n⚠️ Empty folders found:")
    for f in empty_folders:
        print(f"   - {f}")
else:
    print("✅ All folders contain image data.")


🔍 Verifying dataset storage in Google Drive...

📁 sharp/train/train         → 20111 images
📁 sharp/train/test          → 5005 images
📁 sharp/benchmark           → 2842 images
📁 blurry/train/train        → 20111 images
📁 blurry/train/test         → 5005 images
📁 blurry/benchmark          → 2842 images

✅ Total image patches saved: 55916
✅ Data saved permanently in Google Drive ✔️
✅ All folders contain image data.


# **DAY 2 – SwinIR Teacher Inference**

### **Enable GPU**

In [None]:
import torch
torch.cuda.is_available(), torch.cuda.get_device_name(0)


(True, 'Tesla T4')

In [None]:
!pip install -q basicsr einops opencv-python scikit-image Pillow tqdm

# Clone only if not already cloned
import os
if not os.path.exists('/content/SwinIR'):
    !git clone https://github.com/JingyunLiang/SwinIR.git
%cd /content/SwinIR


/content/SwinIR


### **Set Up Paths & Output Folders**

In [None]:
from google.colab import drive
import os

# Mount Drive
drive.mount('/content/drive')

# Project paths
project_root = '/content/drive/MyDrive/ImageSharpening_KD'
data_root = os.path.join(project_root, 'data')
output_root = os.path.join(project_root, 'outputs/teacher_output')

# Ensure output directories exist
os.makedirs(os.path.join(output_root, 'train'), exist_ok=True)
os.makedirs(os.path.join(output_root, 'test'), exist_ok=True)
os.makedirs(os.path.join(output_root, 'benchmark'), exist_ok=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from basicsr.archs.swinir_arch import SwinIR

model = SwinIR(
    upscale=4,
    in_chans=3,
    img_size=64,
    window_size=8,
    img_range=1.0,
    depths=[6, 6, 6, 6, 6, 6],
    embed_dim=180,
    num_heads=[6, 6, 6, 6, 6, 6],
    mlp_ratio=2,
    upsampler='nearest+conv',  # ✅ THIS is the key fix
    resi_connection='1conv'
)


In [None]:
# Load checkpoint
checkpoint = torch.load(model_path, map_location=device)
checkpoint = checkpoint.get('params_ema', checkpoint)  # fallback if no 'params_ema'

model.load_state_dict(checkpoint, strict=True)
model.eval().to(device)

print("✅ Model loaded with 'nearest+conv' upsampler.")


✅ Model loaded with 'nearest+conv' upsampler.


In [None]:
import os
from PIL import Image
from tqdm import tqdm
import torch
import torchvision.transforms as transforms
import numpy as np
from skimage.metrics import structural_similarity as compare_ssim
from skimage.metrics import peak_signal_noise_ratio as compare_psnr

### **Define Paths & Transforms**

In [None]:
project_root = '/content/drive/MyDrive/ImageSharpening_KD'
data_root = os.path.join(project_root, 'data')
output_root = os.path.join(project_root, 'outputs/teacher_output')
log_dir = os.path.join(project_root, 'logs')
os.makedirs(log_dir, exist_ok=True)

input_folders = {
    'train': os.path.join(data_root, 'blurry/train/train'),
    'test': os.path.join(data_root, 'blurry/train/test'),
    'benchmark': os.path.join(data_root, 'blurry/benchmark')
}

gt_folders = {
    'train': os.path.join(data_root, 'sharp/train/train'),
    'test': os.path.join(data_root, 'sharp/train/test'),
    'benchmark': os.path.join(data_root, 'sharp/benchmark')
}

output_folders = {
    'train': os.path.join(output_root, 'train'),
    'test': os.path.join(output_root, 'test'),
    'benchmark': os.path.join(output_root, 'benchmark')
}

# Create output folders if they don't exist
for folder in output_folders.values():
    os.makedirs(folder, exist_ok=True)

In [None]:
to_tensor = transforms.ToTensor()
to_pil = transforms.ToPILImage()


### **Define the Inference Function**

In [None]:
def run_inference(input_dir, output_dir, gt_dir, max_images=100, log_file=None):
    files = sorted([f for f in os.listdir(input_dir) if f.endswith('.jpg')])[:max_images]

    total_ssim = 0
    total_psnr = 0

    for fname in tqdm(files, desc=f"Processing {os.path.basename(input_dir)}"):
        # Load input and GT
        blurry = Image.open(os.path.join(input_dir, fname)).convert('RGB')
        sharp_gt = Image.open(os.path.join(gt_dir, fname)).convert('RGB')

        # Convert to tensor
        blurry_tensor = to_tensor(blurry).unsqueeze(0).to(device)

        # Run SwinIR model
        with torch.no_grad():
            pred_tensor = model(blurry_tensor).squeeze().cpu().clamp(0, 1)

        # Convert output to image
        pred_img = to_pil(pred_tensor)
        pred_img.save(os.path.join(output_dir, fname), quality=95)

        # 🔁 Resize GT to match SwinIR output size
        sharp_gt_resized = sharp_gt.resize(pred_img.size, Image.BICUBIC)

        # Convert both to numpy for SSIM/PSNR
        pred_np = np.array(pred_img)
        gt_np = np.array(sharp_gt_resized)

        # Calculate metrics
        ssim_val = compare_ssim(pred_np, gt_np, channel_axis=-1)
        psnr_val = compare_psnr(gt_np, pred_np)

        total_ssim += ssim_val
        total_psnr += psnr_val

    avg_ssim = total_ssim / len(files)
    avg_psnr = total_psnr / len(files)

    print(f"\n📊 {os.path.basename(output_dir).capitalize()} Results → SSIM: {avg_ssim:.4f} | PSNR: {avg_psnr:.2f} dB")

    if log_file:
        with open(log_file, 'w') as f:
            f.write(f"SSIM: {avg_ssim:.4f}\nPSNR: {avg_psnr:.2f} dB\n")

    return avg_ssim, avg_psnr


### **Run Inference on All Sets**

In [None]:
# Safe batch sizes for Colab Free Tier
run_inference(
    input_folders['train'], output_folders['train'], gt_folders['train'], max_images=500
)

run_inference(
    input_folders['test'], output_folders['test'], gt_folders['test'], max_images=200
)

run_inference(
    input_folders['benchmark'],
    output_folders['benchmark'],
    gt_folders['benchmark'],
    max_images=100,
    log_file=os.path.join(log_dir, 'teacher_benchmark_scores.txt')
)


Processing train: 100%|██████████| 500/500 [23:03<00:00,  2.77s/it]



📊 Train Results → SSIM: 0.7238 | PSNR: 24.61 dB


Processing test: 100%|██████████| 200/200 [09:34<00:00,  2.87s/it]



📊 Test Results → SSIM: 0.7851 | PSNR: 28.57 dB


Processing benchmark: 100%|██████████| 100/100 [05:12<00:00,  3.13s/it]


📊 Benchmark Results → SSIM: 0.7549 | PSNR: 26.53 dB





(0.7549359185850991, 26.530614450982355)

# **Day 3 – Student Model (Mini-UNet) + Baseline L1 Training**

In [None]:
import os
import sys

# Check if Drive is already mounted
if not os.path.ismount('/content/drive'):
    from google.colab import drive
    drive.mount('/content/drive')
    print("✅ Drive mounted successfully.")
else:
    print("📁 Drive already mounted.")

# Append code folder only if not already added
code_path = '/content/drive/MyDrive/ImageSharpening_KD/code'
if code_path not in sys.path:
    sys.path.append(code_path)
    print(f"✅ Code path added: {code_path}")
else:
    print(f"📂 Code path already in sys.path: {code_path}")


📁 Drive already mounted.
📂 Code path already in sys.path: /content/drive/MyDrive/ImageSharpening_KD/code


In [None]:
!python /content/drive/MyDrive/ImageSharpening_KD/code/train_student.py


Epoch 1/5 - Avg L1 Loss: 0.2897
Epoch 2/5 - Avg L1 Loss: 0.1643
Epoch 3/5 - Avg L1 Loss: 0.1167
Epoch 4/5 - Avg L1 Loss: 0.0822
Epoch 5/5 - Avg L1 Loss: 0.0788


# **Day 4 – Knowledge Distillation: Student Learns from Teacher**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/ImageSharpening_KD/code')

!python /content/drive/MyDrive/ImageSharpening_KD/code/train_distill.py


Mounted at /content/drive
Epoch 1/10 - Total Loss: 0.2547
Epoch 2/10 - Total Loss: 0.1497
Epoch 3/10 - Total Loss: 0.0949
Epoch 4/10 - Total Loss: 0.0711
Epoch 5/10 - Total Loss: 0.0648
Epoch 6/10 - Total Loss: 0.0572
Epoch 7/10 - Total Loss: 0.0579
Epoch 8/10 - Total Loss: 0.0569
Epoch 9/10 - Total Loss: 0.0563
Epoch 10/10 - Total Loss: 0.0546
Epoch 11/10 - Total Loss: 0.0511
Epoch 12/10 - Total Loss: 0.0524
Epoch 13/10 - Total Loss: 0.0484
Epoch 14/10 - Total Loss: 0.0504
Epoch 15/10 - Total Loss: 0.0463


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/ImageSharpening_KD/code')

!python /content/drive/MyDrive/ImageSharpening_KD/code/evaluate_student_kd.py


Mounted at /content/drive
Evaluating: 100% 2842/2842 [21:51<00:00,  2.17it/s]

📊 Student Evaluation on Benchmark:
✅ SSIM: 0.6469
✅ PSNR: 24.33 dB


## **Another appraoch for increasing SSIM SCore**

In [None]:
from google.colab import drive
import sys
import os

# ✅ Mount Google Drive
if not os.path.ismount('/content/drive'):
    drive.mount('/content/drive')
else:
    print("Drive already mounted ✅")

# ✅ Append code directory
code_path = '/content/drive/MyDrive/ImageSharpening_KD/code'
if code_path not in sys.path:
    sys.path.append(code_path)
    print("Code path added ✅")


Mounted at /content/drive
Code path added ✅


In [None]:
# Required for perceptual loss
!pip install --quiet torch torchvision


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m127.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!python /content/drive/MyDrive/ImageSharpening_KD/code/train_distill_vgg.py


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100% 528M/528M [00:03<00:00, 182MB/s]
Epoch 1/75 - Loss: 0.5438
Epoch 2/75 - Loss: 0.3896
Epoch 3/75 - Loss: 0.3226
Epoch 4/75 - Loss: 0.2765
Epoch 5/75 - Loss: 0.2436
Epoch 6/75 - Loss: 0.2219
Epoch 7/75 - Loss: 0.2145
Epoch 8/75 - Loss: 0.2123
Epoch 9/75 - Loss: 0.2083
Epoch 10/75 - Loss: 0.2060
💾 Saved checkpoint: /content/drive/MyDrive/ImageSharpening_KD/models/student_kd_vgg_epoch10.pt
Epoch 11/75 - Loss: 0.2041
Epoch 12/75 - Loss: 0.1969
Epoch 13/75 - Loss: 0.1997
Epoch 14/75 - Loss: 0.2004
Epoch 15/75 - Loss: 0.1976
Epoch 16/75 - Loss: 0.1993
Epoch 17/75 - Loss: 0.1978
Epoch 18/75 - Loss: 0.1945
Epoch 19/75 - Loss: 0.1924
Epoch 20/75 - Loss: 0.1946
💾 Saved checkpoint: /content/drive/MyDrive/ImageSharpening_KD/models/student_kd_vgg_epoch20.pt
Epoch 21/75 - Loss: 0.1901
Epoch 22/75 - Loss: 0.1944
Epoch 23/75 - Loss: 0.1909
Epoch 24/75 - Loss: 0.1893
Epoch 

In [None]:
!python /content/drive/MyDrive/ImageSharpening_KD/code/evaluate_student_kd.py


Evaluating: 100% 2842/2842 [1:09:25<00:00,  1.47s/it]

📊 Student Evaluation on Benchmark:
✅ SSIM: 0.6560
✅ PSNR: 25.02 dB


In [None]:
from google.colab import drive
import sys, os

drive.mount('/content/drive')

# Re-append code path if needed
code_path = '/content/drive/MyDrive/ImageSharpening_KD/code'
if code_path not in sys.path:
    sys.path.append(code_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
sys.path.append('/content/drive/MyDrive/ImageSharpening_KD/code')

!python /content/drive/MyDrive/ImageSharpening_KD/code/batch_test_show.py

🔁 Processing blurry images...
1.jpg → SSIM: 0.6614
Figure(1200x400)
2.jpg → SSIM: 0.7611
Figure(1200x400)
3.jpg → SSIM: 0.7052
Figure(1200x400)
4.jpg → SSIM: 0.6793
Figure(1200x400)
5.jpg → SSIM: 0.7354
Figure(1200x400)


### **Exporting SwinIR Project Without Dataset**