In [1]:
import os
import glob
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm # Progress bar

# ================= CONFIGURATION =================
# Path to the CORRUPTED testing videos
TEST_DATA_DIR = '/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/testing_videos'

# Path where we will save the CLEANED videos
CLEAN_DATA_DIR = '/kaggle/working/cleaned_testing_videos'

MODEL_PATH = '/kaggle/input/flipercorrectorvlg/pytorch/default/1/rotnet_model(1).pth'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# =================================================

def clean_dataset():
    print(f"Processing on: {DEVICE}")
    
    # 1. Load the Trained RotNet
    model = models.resnet18(pretrained=False) # No need to download weights again
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2) # Matches our binary training
    
    model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
    model = model.to(DEVICE)
    model.eval()
    
    # Standard transform for the model input
    # Note: We do NOT augment here, just resize/norm
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # 2. Find all images
    # We walk through the directory to keep structure
    image_paths = sorted(glob.glob(os.path.join(TEST_DATA_DIR, '**', '*.jpg'), recursive=True))
    print(f"Found {len(image_paths)} frames to process.")
    
    # 3. Processing Loop
    flip_count = 0
    
    for img_path in tqdm(image_paths, desc="Cleaning"):
        # A. Setup paths
        # Get relative path (e.g., "01/frame_0001.jpg") to maintain structure
        rel_path = os.path.relpath(img_path, TEST_DATA_DIR)
        save_path = os.path.join(CLEAN_DATA_DIR, rel_path)
        
        # Create folder if not exists
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        
        # B. Predict Rotation
        image = Image.open(img_path).convert('RGB')
        input_tensor = preprocess(image).unsqueeze(0).to(DEVICE)
        
        with torch.no_grad():
            outputs = model(input_tensor)
            _, predicted = torch.max(outputs, 1)
            label = predicted.item()
            
        # C. Fix and Save
        # Label 0 = Upright (Keep as is)
        # Label 1 = Flipped (Needs 180 rotation to fix)
        
        if label == 1:
            # It was detected as Upside Down, so we rotate it -180 (or 180) to fix
            fixed_image = image.transpose(Image.FLIP_TOP_BOTTOM) 
            flip_count += 1
        else:
            fixed_image = image
            
        # Save the fixed image
        fixed_image.save(save_path)

    print("-" * 30)
    print("Cleaning Complete!")
    print(f"Total Images: {len(image_paths)}")
    print(f"Images Flipped/Fixed: {flip_count}")
    print(f"Cleaned dataset saved to: {CLEAN_DATA_DIR}")

if __name__ == "__main__":
    clean_dataset()

Processing on: cuda




Found 11706 frames to process.


Cleaning: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11706/11706 [02:28<00:00, 78.80it/s]

------------------------------
Cleaning Complete!
Total Images: 11706
Images Flipped/Fixed: 1195
Cleaned dataset saved to: /kaggle/working/cleaned_testing_videos





In [2]:
import os
import shutil
import glob
import re
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import sys

# ================= CONFIGURATION =================
# 1. WHERE ARE YOUR NOISY FRAMES?
# Adjust this to the root folder containing '01', '02', etc.
INPUT_ROOT = "/kaggle/working/cleaned_testing_videos" 

# 2. WHERE TO SAVE CLEAN FRAMES?
OUTPUT_ROOT = "/kaggle/working/denoised_dataset_test"

# 3. SETTINGS (The Winning Formula)
NOISE_SIGMA = 40 / 255.0  
BATCH_SIZE = 16
NUM_WORKERS = 4
# =================================================

# --- UTILS ---
def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

def install_and_setup():
    if not os.path.exists("fastdvdnet"):
        print("üõ†Ô∏è Cloning FastDVDnet...")
        os.system("git clone https://github.com/m-tassano/fastdvdnet.git")
        os.system("pip install tensorboardX")
    
    if not os.path.exists("fastdvdnet/model/model.pth"):
        os.makedirs("fastdvdnet/model", exist_ok=True)
        os.system("wget -O fastdvdnet/model/model.pth https://github.com/m-tassano/fastdvdnet/raw/master/model.pth")

# --- DATASET ---
class FrameSequenceDataset(Dataset):
    def __init__(self, frame_paths):
        self.frame_paths = frame_paths
        self.total = len(frame_paths)
        
    def __len__(self):
        return self.total
    
    def __getitem__(self, idx):
        # Sliding Window of 5 frames
        indices = [max(0, min(self.total - 1, idx + offset)) for offset in range(-2, 3)]
        
        frames = []
        for i in indices:
            path = self.frame_paths[i]
            img = cv2.imread(path)
            if img is None:
                img = np.zeros((360, 640, 3), dtype=np.uint8) # Fallback size
            
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = img.astype(np.float32) / 255.0
            frames.append(img)
            
        stack = np.concatenate(frames, axis=2) # (H, W, 15)
        tensor = torch.from_numpy(stack).permute(2, 0, 1) # (15, H, W)
        return tensor

# --- MAIN LOOP ---
def run_mass_cleaning():
    install_and_setup()
    
    # Import Model
    sys.path.append("fastdvdnet")
    try:
        from models import FastDVDnet
    except ImportError:
        from fastdvdnet.models import FastDVDnet

    # Find all video folders (01, 02, ... 21)
    video_folders = sorted(glob.glob(os.path.join(INPUT_ROOT, "*")))
    # Filter to ensure they are directories
    video_folders = [f for f in video_folders if os.path.isdir(f)]
    
    print(f"üåç Found {len(video_folders)} videos to clean.")

    # Setup Model Once
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = FastDVDnet(num_input_frames=5)
    
    state_dict = torch.load("fastdvdnet/model/model.pth", map_location=device)
    new_state = {k.replace('module.', ''): v for k, v in state_dict.items()}
    model.load_state_dict(new_state)
    
    if torch.cuda.device_count() > 1:
        print(f"üî• Dual GPU Active")
        model = nn.DataParallel(model)
        
    model.to(device)
    model.eval()

    # --- LOOP OVER VIDEOS ---
    for vid_path in video_folders:
        vid_id = os.path.basename(vid_path)
        print(f"\nüé¨ Processing Video: {vid_id}")
        
        # 1. Get Frames
        files = glob.glob(os.path.join(vid_path, "*"))
        files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        files.sort(key=lambda x: natural_sort_key(os.path.basename(x)))
        
        if not files:
            print(f"‚ö†Ô∏è Skipping {vid_id} (No images found)")
            continue
            
        # 2. Setup Output Folder
        save_dir = os.path.join(OUTPUT_ROOT, vid_id)
        if os.path.exists(save_dir): shutil.rmtree(save_dir)
        os.makedirs(save_dir)
        
        # 3. Process
        dataset = FrameSequenceDataset(files)
        loader = DataLoader(
            dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True
        )
        
        with torch.no_grad():
            for batch_idx, data in enumerate(tqdm(loader, desc=f"Cleaning {vid_id}")):
                data = data.to(device)
                B, C, H, W = data.shape
                
                noise_sigma = torch.full((B, 1, H, W), NOISE_SIGMA).to(device)
                
                clean_batch = model(data, noise_sigma)
                clean_batch = clean_batch.permute(0, 2, 3, 1).cpu().numpy()
                
                for i in range(B):
                    img = np.clip(clean_batch[i] * 255, 0, 255).astype(np.uint8)
                    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                    
                    # Standardized Name: frame_0000.jpg
                    global_idx = batch_idx * BATCH_SIZE + i
                    save_name = f"frame_{global_idx:04d}.jpg"
                    
                    cv2.imwrite(os.path.join(save_dir, save_name), img_bgr)
                    
    print(f"\n‚úÖ‚úÖ‚úÖ ALL VIDEOS CLEANED! Saved to: {OUTPUT_ROOT}")

if __name__ == "__main__":
    run_mass_cleaning()

üõ†Ô∏è Cloning FastDVDnet...


Cloning into 'fastdvdnet'...


Collecting tensorboardX
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Downloading tensorboardx-2.6.4-py3-none-any.whl (87 kB)
   ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 87.2/87.2 kB 4.3 MB/s eta 0:00:00
Installing collected packages: tensorboardX
Successfully installed tensorboardX-2.6.4


--2026-01-02 15:07:48--  https://github.com/m-tassano/fastdvdnet/raw/master/model.pth
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/m-tassano/fastdvdnet/master/model.pth [following]
--2026-01-02 15:07:49--  https://raw.githubusercontent.com/m-tassano/fastdvdnet/master/model.pth
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9971551 (9.5M) [application/octet-stream]
Saving to: ‚Äòfastdvdnet/model/model.pth‚Äô

     0K .......... .......... .......... .......... ..........  0% 8.15M 1s
    50K .......... .......... .......... .......... ..........  1% 9.41M 1s
   100K .......... .......... ....

üåç Found 21 videos to clean.

üé¨ Processing Video: 01


Cleaning 01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 32/32 [00:26<00:00,  1.22it/s]



üé¨ Processing Video: 02


Cleaning 02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 76/76 [00:56<00:00,  1.34it/s]



üé¨ Processing Video: 03


Cleaning 03: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:34<00:00,  1.35it/s]



üé¨ Processing Video: 04


Cleaning 04: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 60/60 [00:44<00:00,  1.36it/s]



üé¨ Processing Video: 05


Cleaning 05: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 63/63 [00:46<00:00,  1.35it/s]



üé¨ Processing Video: 06


Cleaning 06: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [00:29<00:00,  1.34it/s]



üé¨ Processing Video: 07


Cleaning 07: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 37/37 [00:28<00:00,  1.29it/s]



üé¨ Processing Video: 08


Cleaning 08: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:02<00:00,  1.06it/s]



üé¨ Processing Video: 09


Cleaning 09: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 23/23 [00:18<00:00,  1.25it/s]



üé¨ Processing Video: 10


Cleaning 10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 46/46 [00:34<00:00,  1.34it/s]



üé¨ Processing Video: 11


Cleaning 11: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:23<00:00,  1.30it/s]



üé¨ Processing Video: 12


Cleaning 12: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 46/46 [00:34<00:00,  1.34it/s]



üé¨ Processing Video: 13


Cleaning 13: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 33/33 [00:25<00:00,  1.29it/s]



üé¨ Processing Video: 14


Cleaning 14: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 31/31 [00:24<00:00,  1.28it/s]



üé¨ Processing Video: 15


Cleaning 15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 46/46 [00:35<00:00,  1.30it/s]



üé¨ Processing Video: 16


Cleaning 16: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:35<00:00,  1.34it/s]



üé¨ Processing Video: 17


Cleaning 17: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 27/27 [00:20<00:00,  1.30it/s]



üé¨ Processing Video: 18


Cleaning 18: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:14<00:00,  1.25it/s]



üé¨ Processing Video: 19


Cleaning 19: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15/15 [00:12<00:00,  1.20it/s]



üé¨ Processing Video: 20


Cleaning 20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:13<00:00,  1.29it/s]



üé¨ Processing Video: 21


Cleaning 21: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:05<00:00,  1.13s/it]


‚úÖ‚úÖ‚úÖ ALL VIDEOS CLEANED! Saved to: /kaggle/working/denoised_dataset_test





In [3]:
import os
import shutil
import glob
import re
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import sys

# ================= CONFIGURATION =================
# 1. WHERE ARE YOUR NOISY FRAMES?
# Adjust this to the root folder containing '01', '02', etc.
INPUT_ROOT = "/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos" 

# 2. WHERE TO SAVE CLEAN FRAMES?
OUTPUT_ROOT = "/kaggle/working/denoised_dataset_train"

# 3. SETTINGS (The Winning Formula)
NOISE_SIGMA = 40 / 255.0  
BATCH_SIZE = 16
NUM_WORKERS = 4
# =================================================

# --- UTILS ---
def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

def install_and_setup():
    if not os.path.exists("fastdvdnet"):
        print("üõ†Ô∏è Cloning FastDVDnet...")
        os.system("git clone https://github.com/m-tassano/fastdvdnet.git")
        os.system("pip install tensorboardX")
    
    if not os.path.exists("fastdvdnet/model/model.pth"):
        os.makedirs("fastdvdnet/model", exist_ok=True)
        os.system("wget -O fastdvdnet/model/model.pth https://github.com/m-tassano/fastdvdnet/raw/master/model.pth")

# --- DATASET ---
class FrameSequenceDataset(Dataset):
    def __init__(self, frame_paths):
        self.frame_paths = frame_paths
        self.total = len(frame_paths)
        
    def __len__(self):
        return self.total
    
    def __getitem__(self, idx):
        # Sliding Window of 5 frames
        indices = [max(0, min(self.total - 1, idx + offset)) for offset in range(-2, 3)]
        
        frames = []
        for i in indices:
            path = self.frame_paths[i]
            img = cv2.imread(path)
            if img is None:
                img = np.zeros((360, 640, 3), dtype=np.uint8) # Fallback size
            
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = img.astype(np.float32) / 255.0
            frames.append(img)
            
        stack = np.concatenate(frames, axis=2) # (H, W, 15)
        tensor = torch.from_numpy(stack).permute(2, 0, 1) # (15, H, W)
        return tensor

# --- MAIN LOOP ---
def run_mass_cleaning():
    install_and_setup()
    
    # Import Model
    sys.path.append("fastdvdnet")
    try:
        from models import FastDVDnet
    except ImportError:
        from fastdvdnet.models import FastDVDnet

    # Find all video folders (01, 02, ... 21)
    video_folders = sorted(glob.glob(os.path.join(INPUT_ROOT, "*")))
    # Filter to ensure they are directories
    video_folders = [f for f in video_folders if os.path.isdir(f)]
    
    print(f"üåç Found {len(video_folders)} videos to clean.")

    # Setup Model Once
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = FastDVDnet(num_input_frames=5)
    
    state_dict = torch.load("fastdvdnet/model/model.pth", map_location=device)
    new_state = {k.replace('module.', ''): v for k, v in state_dict.items()}
    model.load_state_dict(new_state)
    
    if torch.cuda.device_count() > 1:
        print(f"üî• Dual GPU Active")
        model = nn.DataParallel(model)
        
    model.to(device)
    model.eval()

    # --- LOOP OVER VIDEOS ---
    for vid_path in video_folders:
        vid_id = os.path.basename(vid_path)
        print(f"\nüé¨ Processing Video: {vid_id}")
        
        # 1. Get Frames
        files = glob.glob(os.path.join(vid_path, "*"))
        files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        files.sort(key=lambda x: natural_sort_key(os.path.basename(x)))
        
        if not files:
            print(f"‚ö†Ô∏è Skipping {vid_id} (No images found)")
            continue
            
        # 2. Setup Output Folder
        save_dir = os.path.join(OUTPUT_ROOT, vid_id)
        if os.path.exists(save_dir): shutil.rmtree(save_dir)
        os.makedirs(save_dir)
        
        # 3. Process
        dataset = FrameSequenceDataset(files)
        loader = DataLoader(
            dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True
        )
        
        with torch.no_grad():
            for batch_idx, data in enumerate(tqdm(loader, desc=f"Cleaning {vid_id}")):
                data = data.to(device)
                B, C, H, W = data.shape
                
                noise_sigma = torch.full((B, 1, H, W), NOISE_SIGMA).to(device)
                
                clean_batch = model(data, noise_sigma)
                clean_batch = clean_batch.permute(0, 2, 3, 1).cpu().numpy()
                
                for i in range(B):
                    img = np.clip(clean_batch[i] * 255, 0, 255).astype(np.uint8)
                    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                    
                    # Standardized Name: frame_0000.jpg
                    global_idx = batch_idx * BATCH_SIZE + i
                    save_name = f"frame_{global_idx:04d}.jpg"
                    
                    cv2.imwrite(os.path.join(save_dir, save_name), img_bgr)
                    
    print(f"\n‚úÖ‚úÖ‚úÖ ALL VIDEOS CLEANED! Saved to: {OUTPUT_ROOT}")

if __name__ == "__main__":
    run_mass_cleaning()

üåç Found 16 videos to clean.

üé¨ Processing Video: 01


Cleaning 01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 41/41 [00:30<00:00,  1.32it/s]



üé¨ Processing Video: 02


Cleaning 02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:37<00:00,  1.34it/s]



üé¨ Processing Video: 03


Cleaning 03: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 49/49 [00:37<00:00,  1.32it/s]



üé¨ Processing Video: 04


Cleaning 04: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 31/31 [00:24<00:00,  1.27it/s]



üé¨ Processing Video: 05


Cleaning 05: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 45/45 [00:33<00:00,  1.35it/s]



üé¨ Processing Video: 06


Cleaning 06: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 45/45 [00:33<00:00,  1.33it/s]



üé¨ Processing Video: 07


Cleaning 07: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 31/31 [00:24<00:00,  1.27it/s]



üé¨ Processing Video: 08


Cleaning 08: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 53/53 [00:39<00:00,  1.35it/s]



üé¨ Processing Video: 09


Cleaning 09: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 45/45 [00:33<00:00,  1.33it/s]



üé¨ Processing Video: 10


Cleaning 10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 46/46 [00:34<00:00,  1.34it/s]



üé¨ Processing Video: 11


Cleaning 11: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 48/48 [00:36<00:00,  1.32it/s]



üé¨ Processing Video: 12


Cleaning 12: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:08<00:00,  1.20it/s]



üé¨ Processing Video: 13


Cleaning 13: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 22/22 [00:17<00:00,  1.28it/s]



üé¨ Processing Video: 14


Cleaning 14: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 31/31 [00:24<00:00,  1.28it/s]



üé¨ Processing Video: 15


Cleaning 15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 22/22 [00:17<00:00,  1.23it/s]



üé¨ Processing Video: 16


Cleaning 16: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15/15 [00:12<00:00,  1.22it/s]


‚úÖ‚úÖ‚úÖ ALL VIDEOS CLEANED! Saved to: /kaggle/working/denoised_dataset_train





In [4]:
!cd "/kaggle/working/"

In [5]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

# CONFIG
# Use your Neural Cleaned videos for best flow calculation
SOURCE_DIR = '/kaggle/working/denoised_dataset_train'
DEST_DIR = '/kaggle/working/training_optical_flow'

def extract_optical_flow():
    if not os.path.exists(DEST_DIR): os.makedirs(DEST_DIR)
    
    print("Generating Optical Flow Maps...")
    
    for vid in tqdm(sorted(os.listdir(SOURCE_DIR))):
        vid_path = os.path.join(SOURCE_DIR, vid)
        save_path = os.path.join(DEST_DIR, vid)
        if not os.path.isdir(vid_path): continue
        os.makedirs(save_path, exist_ok=True)
        
        frames = sorted(glob.glob(os.path.join(vid_path, '*.jpg')))
        prev_frame = cv2.imread(frames[0])
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        
        # Save first flow as black (no motion) to keep frame count same
        h, w = prev_gray.shape
        blank_flow = np.zeros((h, w, 3), dtype=np.uint8)
        cv2.imwrite(os.path.join(save_path, os.path.basename(frames[0])), blank_flow)
        
        for i in range(1, len(frames)):
            curr_frame = cv2.imread(frames[i])
            curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
            
            # Calculate Dense Optical Flow (Farneback)
            flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 
                                                0.5, 3, 15, 3, 5, 1.2, 0)
            
            # Visualize Flow as RGB Image
            # Magnitude and Angle
            mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
            
            # HSV encoding
            hsv = np.zeros_like(prev_frame)
            hsv[..., 1] = 255
            # Hue = Angle, Value = Magnitude (Speed)
            hsv[..., 0] = ang * 180 / np.pi / 2
            hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
            
            # Convert to RGB for saving
            rgb_flow = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
            
            cv2.imwrite(os.path.join(save_path, os.path.basename(frames[i])), rgb_flow)
            
            prev_gray = curr_gray

if __name__ == "__main__":
    extract_optical_flow()

Generating Optical Flow Maps...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 16/16 [11:05<00:00, 41.59s/it]


In [6]:
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import gc

# ================= CONFIGURATION =================
# Path to your PRE-GENERATED Flow images
FLOW_TRAIN_DIR = '/kaggle/working/training_optical_flow' 
MODEL_SAVE_PATH = 'st_autoencoder_flow.pth'

BATCH_SIZE = 32  # Increased batch size since we are just loading images now
EPOCHS = 15
CLIP_LEN = 16
IMG_SIZE = 128
CHANNELS = 3     # RGB Flow maps
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# =================================================

# --- 1. DATASET (Loads Pre-Computed Flow) ---
class PrecomputedFlowDataset(Dataset):
    def __init__(self, root_dir, transform=None, clip_length=16):
        self.clips = []
        self.transform = transform
        
        # Find video folders inside the flow directory
        # e.g. /training_optical_flow/01/, /training_optical_flow/02/
        video_folders = sorted([f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))])
        
        for vid in video_folders:
            vid_path = os.path.join(root_dir, vid)
            frames = sorted(glob.glob(os.path.join(vid_path, '*.jpg'))) # or *.png
            
            # Ensure enough frames for input + target
            if len(frames) < 2 * clip_length: continue
            
            # Stride 1 or 2 (Using 2 here to match previous logic)
            for i in range(0, len(frames) - (2 * clip_length) + 1, 2): 
                input_paths = frames[i : i + clip_length]
                target_paths = frames[i + clip_length : i + (2 * clip_length)]
                self.clips.append((input_paths, target_paths))
                
    def __len__(self): return len(self.clips)

    def __getitem__(self, idx):
        input_paths, target_paths = self.clips[idx]
        
        def load_clip(paths):
            # Load images. They are already RGB Flow maps.
            # Convert to RGB to ensure 3 channels
            clip = [Image.open(p).convert('RGB') for p in paths]
            
            if self.transform:
                clip = [self.transform(img) for img in clip]
                
            # Stack: List of (C, H, W) -> (T, C, H, W)
            # Permute to (C, T, H, W) for 3D Conv
            return torch.stack(clip, dim=0).permute(1, 0, 2, 3)
            
        return load_clip(input_paths), load_clip(target_paths)

# --- 2. MODEL (Same 3-Channel ST-AutoEncoder) ---
class STAutoEncoder_Flow(nn.Module):
    def __init__(self): 
        super(STAutoEncoder_Flow, self).__init__()
        # ENCODER (Input = 3 Channels)
        self.conv1 = nn.Conv3d(3, 32, 3, padding=1); self.bn1 = nn.BatchNorm3d(32); self.pool1 = nn.MaxPool3d(2, 2)
        self.conv2 = nn.Conv3d(32, 48, 3, padding=1); self.bn2 = nn.BatchNorm3d(48); self.pool2 = nn.MaxPool3d(2, 2)
        self.conv3 = nn.Conv3d(48, 64, 3, padding=1); self.bn3 = nn.BatchNorm3d(64); self.pool3 = nn.MaxPool3d(2, 2)
        self.conv4 = nn.Conv3d(64, 64, 3, padding=1); self.bn4 = nn.BatchNorm3d(64)
        self.relu = nn.LeakyReLU(0.1)
        
        # DECODER - Reconstruction
        self.rec_deconv1 = nn.ConvTranspose3d(64, 48, 3, 2, 1, 1); self.rec_bn1 = nn.BatchNorm3d(48)
        self.rec_deconv2 = nn.ConvTranspose3d(48, 32, 3, 2, 1, 1); self.rec_bn2 = nn.BatchNorm3d(32)
        self.rec_deconv3 = nn.ConvTranspose3d(32, 32, 3, 2, 1, 1); self.rec_bn3 = nn.BatchNorm3d(32)
        self.rec_final = nn.Conv3d(32, 3, 3, padding=1) 
        
        # DECODER - Prediction
        self.pred_deconv1 = nn.ConvTranspose3d(64, 48, 3, 2, 1, 1); self.pred_bn1 = nn.BatchNorm3d(48)
        self.pred_deconv2 = nn.ConvTranspose3d(48, 32, 3, 2, 1, 1); self.pred_bn2 = nn.BatchNorm3d(32)
        self.pred_deconv3 = nn.ConvTranspose3d(32, 32, 3, 2, 1, 1); self.pred_bn3 = nn.BatchNorm3d(32)
        self.pred_final = nn.Conv3d(32, 3, 3, padding=1)
        
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Encoder
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)
        l = self.relu(self.bn4(self.conv4(x))) 
        
        # Reconstruction
        r = self.relu(self.rec_bn1(self.rec_deconv1(l)))
        r = self.relu(self.rec_bn2(self.rec_deconv2(r)))
        r = self.relu(self.rec_bn3(self.rec_deconv3(r)))
        r = self.sigmoid(self.rec_final(r))
        
        # Prediction
        p = self.relu(self.pred_bn1(self.pred_deconv1(l)))
        p = self.relu(self.pred_bn2(self.pred_deconv2(p)))
        p = self.relu(self.pred_bn3(self.pred_deconv3(p)))
        p = self.sigmoid(self.pred_final(p))
        
        return r, p

# --- 3. TRAINING LOOP ---
def train_precomputed_flow():
    torch.cuda.empty_cache(); gc.collect()
    print(f"Training STAE on Precomputed Flow Maps ({DEVICE})...")
    
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor() 
    ])
    
    # Using the new Dataset class
    dataset = PrecomputedFlowDataset(FLOW_TRAIN_DIR, transform=transform, clip_length=CLIP_LEN)
    
    # We can now use more workers because we aren't using OpenCV on CPU
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
    
    model = STAutoEncoder_Flow()
    if torch.cuda.device_count() > 1: model = nn.DataParallel(model)
    model = model.to(DEVICE)
    
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.MSELoss()
    
    for epoch in range(EPOCHS):
        model.train()
        loop = tqdm(loader, desc=f"Ep {epoch+1}/{EPOCHS}")
        epoch_loss = 0
        
        for inp, tgt in loop:
            inp, tgt = inp.to(DEVICE), tgt.to(DEVICE)
            optimizer.zero_grad()
            
            rec, pred = model(inp)
            
            loss = criterion(rec, inp) + criterion(pred, tgt)
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            loop.set_postfix(loss=loss.item())
            
    # Save
    state = model.module.state_dict() if isinstance(model, nn.DataParallel) else model.state_dict()
    torch.save(state, MODEL_SAVE_PATH)
    print(f"‚úÖ DONE. Model saved to {MODEL_SAVE_PATH}")

if __name__ == "__main__":
    train_precomputed_flow()

Training STAE on Precomputed Flow Maps (cuda)...


Ep 1/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:05<00:00,  1.35s/it, loss=0.0133]
Ep 2/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:04<00:00,  1.35s/it, loss=0.0056]
Ep 3/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:03<00:00,  1.34s/it, loss=0.00383]
Ep 4/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:03<00:00,  1.34s/it, loss=0.00266]
Ep 5/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:02<00:00,  1.34s/it, loss=0.00301]
Ep 6/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:02<00:00,  1.33s/it, loss=0.00269]
Ep 7/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:02<00:00,  1.33s/it, loss=0.00257]
Ep 8/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:03<00:00,  1.34s/it, loss=0.00201]
Ep 9/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:02<00:00,  1.33s/it, loss=0.00183]
Ep 10/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:02<00:00,  1.33s/it, loss=0.0023]
Ep 11/15: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 137/137 [03:03<00:00,  1.

‚úÖ DONE. Model saved to st_autoencoder_flow.pth





In [7]:
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

# CONFIG
# Use your Neural Cleaned videos for best flow calculation
SOURCE_DIR = '/kaggle/working/denoised_dataset_test'
DEST_DIR = '/kaggle/working/testing_optical_flow'

def extract_optical_flow():
    if not os.path.exists(DEST_DIR): os.makedirs(DEST_DIR)
    
    print("Generating Optical Flow Maps...")
    
    for vid in tqdm(sorted(os.listdir(SOURCE_DIR))):
        vid_path = os.path.join(SOURCE_DIR, vid)
        save_path = os.path.join(DEST_DIR, vid)
        if not os.path.isdir(vid_path): continue
        os.makedirs(save_path, exist_ok=True)
        
        frames = sorted(glob.glob(os.path.join(vid_path, '*.jpg')))
        prev_frame = cv2.imread(frames[0])
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        
        # Save first flow as black (no motion) to keep frame count same
        h, w = prev_gray.shape
        blank_flow = np.zeros((h, w, 3), dtype=np.uint8)
        cv2.imwrite(os.path.join(save_path, os.path.basename(frames[0])), blank_flow)
        
        for i in range(1, len(frames)):
            curr_frame = cv2.imread(frames[i])
            curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
            
            # Calculate Dense Optical Flow (Farneback)
            flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 
                                                0.5, 3, 15, 3, 5, 1.2, 0)
            
            # Visualize Flow as RGB Image
            # Magnitude and Angle
            mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
            
            # HSV encoding
            hsv = np.zeros_like(prev_frame)
            hsv[..., 1] = 255
            # Hue = Angle, Value = Magnitude (Speed)
            hsv[..., 0] = ang * 180 / np.pi / 2
            hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
            
            # Convert to RGB for saving
            rgb_flow = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
            
            cv2.imwrite(os.path.join(save_path, os.path.basename(frames[i])), rgb_flow)
            
            prev_gray = curr_gray

if __name__ == "__main__":
    extract_optical_flow()

Generating Optical Flow Maps...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 21/21 [13:52<00:00, 39.65s/it]
