In [1]:
import cv2
import mediapipe as mp
import numpy as np
import open3d as o3d
import os

# Create folder to save captured images
save_path = "captured_images"
os.makedirs(save_path, exist_ok=True)

# Initialize MediaPipe
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

# For storing images
captured_images = []
max_images = 30

# Open webcam
cap = cv2.VideoCapture(0)

print("[INFO] Press 's' to start tracking and auto-capturing")

start_capture = False
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(rgb)

    if results.pose_landmarks:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        if start_capture and len(captured_images) < max_images:
            img_name = os.path.join(save_path, f"frame_{frame_count}.jpg")
            cv2.imwrite(img_name, frame)
            captured_images.append(img_name)
            frame_count += 1
            cv2.putText(frame, f"Captured {len(captured_images)} / {max_images}", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow("Camera Feed", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('s'):
        start_capture = True
        print("[INFO] Tracking started. Move around the person.")
    elif key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

print("[INFO] Image capture complete. Generating point cloud...")



Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
[INFO] Press 's' to start tracking and auto-capturing
[INFO] Tracking started. Move around the person.
[INFO] Tracking started. Move around the person.
[INFO] Image capture complete. Generating point cloud...


In [2]:
import cv2
import os
import subprocess
import shutil
import open3d as o3d
import time

# === CONFIG ===
image_dir = "colmap_images"
output_dir = "colmap_output"
num_images = 30
capture_duration_sec = 5
interval = capture_duration_sec / num_images
colmap_bin = r"C:\Users\vedhr\CODES\COLMAP\bin\colmap.exe"  # <-- Update if needed

# === CLEAN OLD FOLDERS ===
if os.path.exists(image_dir):
    shutil.rmtree(image_dir)
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(image_dir)
os.makedirs(output_dir)

# === STEP 1: WAIT FOR USER TO START AUTO-CAPTURE ===
print("[INFO] Webcam live. Press 's' to start auto-capture of 30 images in 5 seconds.")
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("[ERROR] Could not access webcam.")
    exit()

count = 0
capture_started = False
start_time = None

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Show instructions on the frame
    display_text = "Press 's' to start auto-capture" if not capture_started else f"Capturing: {count}/{num_images}"
    cv2.putText(frame, display_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow("Webcam Preview", frame)

    key = cv2.waitKey(1) & 0xFF

    if not capture_started and key == ord('s'):
        print("[INFO] Starting auto-capture...")
        capture_started = True
        start_time = time.time()

    if capture_started:
        elapsed = time.time() - start_time
        if count < num_images:
            if elapsed >= count * interval:
                filename = os.path.join(image_dir, f"img_{count:03d}.jpg")
                cv2.imwrite(filename, frame)
                print(f"[INFO] Captured: {filename}")
                count += 1
        else:
            print("[✅] Done capturing images.")
            break

    if key == ord('q'):
        print("[INFO] Capture cancelled by user.")
        cap.release()
        cv2.destroyAllWindows()
        exit()

cap.release()
cv2.destroyAllWindows()

# === STEP 2: RUN COLMAP (CUDA ENABLED) ===
database_path = os.path.join(output_dir, "database.db")
sparse_path = os.path.join(output_dir, "sparse")
dense_path = os.path.join(output_dir, "dense")
fused_ply = os.path.join(dense_path, "fused.ply")

def run_colmap():
    print("[INFO] Running COLMAP (feature_extractor)...")
    subprocess.run([
        colmap_bin, "feature_extractor",
        "--database_path", database_path,
        "--image_path", image_dir,
        "--ImageReader.single_camera", "1",
        "--SiftExtraction.use_gpu", "1"
    ], check=True)

    print("[INFO] Running COLMAP (exhaustive_matcher)...")
    subprocess.run([
        colmap_bin, "exhaustive_matcher",
        "--database_path", database_path,
        "--SiftMatching.use_gpu", "1"
    ], check=True)

    print("[INFO] Running COLMAP (mapper)...")
    os.makedirs(sparse_path, exist_ok=True)
    subprocess.run([
        colmap_bin, "mapper",
        "--database_path", database_path,
        "--image_path", image_dir,
        "--output_path", sparse_path
    ], check=True)

    print("[INFO] Running COLMAP (image_undistorter)...")
    os.makedirs(dense_path, exist_ok=True)
    subprocess.run([
        colmap_bin, "image_undistorter",
        "--image_path", image_dir,
        "--input_path", os.path.join(sparse_path, "0"),
        "--output_path", dense_path,
        "--output_type", "COLMAP"
    ], check=True)

    print("[INFO] Running COLMAP (patch_match_stereo)...")
    subprocess.run([
        colmap_bin, "patch_match_stereo",
        "--workspace_path", dense_path,
        "--workspace_format", "COLMAP",
        "--PatchMatchStereo.geom_consistency", "true",
        "--PatchMatchStereo.max_image_size", "2000"
    ], check=True)

    print("[INFO] Running COLMAP (stereo_fusion)...")
    subprocess.run([
        colmap_bin, "stereo_fusion",
        "--workspace_path", dense_path,
        "--workspace_format", "COLMAP",
        "--input_type", "geometric",
        "--output_path", fused_ply
    ], check=True)

try:
    run_colmap()
    print(f"[✅] Point cloud saved to: {fused_ply}")
except subprocess.CalledProcessError:
    print("[❌] COLMAP failed. Check image quality and try again.")
    exit()

# === STEP 3: VISUALIZE POINT CLOUD ===
if os.path.exists(fused_ply):
    print("[INFO] Opening point cloud viewer...")
    pcd = o3d.io.read_point_cloud(fused_ply)
    o3d.visualization.draw_geometries([pcd])
else:
    print("[❌] Point cloud file (fused.ply) not found.")


[INFO] Webcam live. Press 's' to start auto-capture of 30 images in 5 seconds.
[INFO] Starting auto-capture...
[INFO] Captured: colmap_images\img_000.jpg
[INFO] Captured: colmap_images\img_001.jpg
[INFO] Captured: colmap_images\img_002.jpg
[INFO] Captured: colmap_images\img_003.jpg
[INFO] Captured: colmap_images\img_004.jpg
[INFO] Captured: colmap_images\img_005.jpg
[INFO] Captured: colmap_images\img_006.jpg
[INFO] Captured: colmap_images\img_007.jpg
[INFO] Captured: colmap_images\img_008.jpg
[INFO] Captured: colmap_images\img_009.jpg
[INFO] Captured: colmap_images\img_010.jpg
[INFO] Captured: colmap_images\img_011.jpg
[INFO] Captured: colmap_images\img_012.jpg
[INFO] Captured: colmap_images\img_013.jpg
[INFO] Captured: colmap_images\img_014.jpg
[INFO] Captured: colmap_images\img_015.jpg
[INFO] Captured: colmap_images\img_016.jpg
[INFO] Captured: colmap_images\img_017.jpg
[INFO] Captured: colmap_images\img_018.jpg
[INFO] Captured: colmap_images\img_019.jpg
[INFO] Captured: colmap_image

In [1]:
import cv2
import os
import subprocess
import shutil
import open3d as o3d
import time

# === CONFIG ===
image_dir = "colmap_images"
output_dir = "colmap_output"
num_images = 30
capture_duration_sec = 10  # Increased to reduce blur
interval = capture_duration_sec / num_images
colmap_bin = r"C:\Users\vedhr\CODES\COLMAP\bin\colmap.exe"

# === CLEANUP ===
if os.path.exists(image_dir):
    shutil.rmtree(image_dir)
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(image_dir)
os.makedirs(output_dir)

# === WEBCAM CAPTURE WITH TRIGGER ===
print(f"[INFO] Webcam live. Press 's' to start auto-capture of {num_images} images in {capture_duration_sec} seconds.")
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("[ERROR] Webcam could not be opened.")
    exit()

count = 0
capture_started = False
start_time = None

while True:
    ret, frame = cap.read()
    if not ret:
        print("[ERROR] Frame capture failed.")
        break

    height, width, _ = frame.shape
    cv2.putText(frame, f"Resolution: {width}x{height}", (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1)

    if not capture_started:
        cv2.putText(frame, "Press 's' to start auto-capture", (10, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    else:
        cv2.putText(frame, f"Capturing: {count}/{num_images}", (10, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

    cv2.imshow("Live Feed", frame)
    key = cv2.waitKey(1) & 0xFF

    if not capture_started and key == ord('s'):
        print("[INFO] Starting auto-capture...")
        capture_started = True
        start_time = time.time()

    if capture_started:
        elapsed = time.time() - start_time
        if count < num_images and elapsed >= count * interval:
            filename = os.path.join(image_dir, f"img_{count:03d}.jpg")
            cv2.imwrite(filename, frame)
            print(f"[INFO] Captured: {filename}")
            count += 1
        elif count >= num_images:
            print("[✅] Done capturing.")
            break

    if key == ord('q'):
        print("[INFO] Quit by user.")
        cap.release()
        cv2.destroyAllWindows()
        exit()

cap.release()
cv2.destroyAllWindows()

# === VERIFY IMAGE COUNT ===
actual_images = len(os.listdir(image_dir))
if actual_images < 10:
    print(f"[❌] Only {actual_images} images captured. Need at least 10.")
    exit()

# === COLMAP PATHS ===
database_path = os.path.join(output_dir, "database.db")
sparse_path = os.path.join(output_dir, "sparse")
dense_path = os.path.join(output_dir, "dense")
fused_ply = os.path.join(dense_path, "fused.ply")

# === COLMAP PIPELINE ===
def run_colmap():
    print("[INFO] Running feature extraction...")
    subprocess.run([
        colmap_bin, "feature_extractor",
        "--database_path", database_path,
        "--image_path", image_dir,
        "--ImageReader.single_camera", "1",
        "--SiftExtraction.use_gpu", "1"
    ], check=True)

    print("[INFO] Matching features...")
    subprocess.run([
        colmap_bin, "exhaustive_matcher",
        "--database_path", database_path,
        "--SiftMatching.use_gpu", "1"
    ], check=True)

    print("[INFO] Building sparse map...")
    os.makedirs(sparse_path, exist_ok=True)
    subprocess.run([
        colmap_bin, "mapper",
        "--database_path", database_path,
        "--image_path", image_dir,
        "--output_path", sparse_path
    ], check=True)

    print("[INFO] Undistorting images...")
    os.makedirs(dense_path, exist_ok=True)
    subprocess.run([
        colmap_bin, "image_undistorter",
        "--image_path", image_dir,
        "--input_path", os.path.join(sparse_path, "0"),
        "--output_path", dense_path,
        "--output_type", "COLMAP"
    ], check=True)

    print("[INFO] Running dense stereo...")
    subprocess.run([
        colmap_bin, "patch_match_stereo",
        "--workspace_path", dense_path,
        "--workspace_format", "COLMAP",
        "--PatchMatchStereo.geom_consistency", "true"
    ], check=True)

    print("[INFO] Fusing depth maps...")
    subprocess.run([
        colmap_bin, "stereo_fusion",
        "--workspace_path", dense_path,
        "--workspace_format", "COLMAP",
        "--input_type", "geometric",
        "--output_path", fused_ply
    ], check=True)

try:
    run_colmap()
    print(f"[✅] Point cloud saved to: {fused_ply}")
except subprocess.CalledProcessError as e:
    print(f"[❌] COLMAP failed: {e}")
    exit()

# === VISUALIZE RESULT ===
if os.path.exists(fused_ply):
    print("[INFO] Visualizing fused point cloud...")
    pcd = o3d.io.read_point_cloud(fused_ply)
    o3d.visualization.draw_geometries([pcd])
else:
    print("[❌] Fused point cloud not found.")


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
[INFO] Webcam live. Press 's' to start auto-capture of 30 images in 10 seconds.
[INFO] Starting auto-capture...
[INFO] Captured: colmap_images\img_000.jpg
[INFO] Captured: colmap_images\img_001.jpg
[INFO] Captured: colmap_images\img_002.jpg
[INFO] Captured: colmap_images\img_003.jpg
[INFO] Captured: colmap_images\img_004.jpg
[INFO] Captured: colmap_images\img_005.jpg
[INFO] Captured: colmap_images\img_006.jpg
[INFO] Captured: colmap_images\img_007.jpg
[INFO] Captured: colmap_images\img_008.jpg
[INFO] Captured: colmap_images\img_009.jpg
[INFO] Captured: colmap_images\img_010.jpg
[INFO] Captured: colmap_images\img_011.jpg
[INFO] Captured: colmap_images\img_012.jpg
[INFO] Captured: colmap_images\img_013.jpg
[INFO] Captured: colmap_images\img_014.jpg
[INFO] Captured: colmap_images\img_015.jpg
[INFO] Captured: colmap_images\

In [None]:
# run_realtime_gpu.py
import cv2
import torch
import numpy as np
import argparse
import time
import os

# Import the model definitions from the project's files
from model import Finetunemodel

def get_args():
    """Parses and returns command-line arguments."""
    parser = argparse.ArgumentParser(description="Real-Time SCI Enhancement on GPU")
    parser.add_argument('--model', type=str, default='./weights/medium.pt',
                        help='Path to the pre-trained SCI model file (.pt)')
    parser.add_argument('--webcam_id', type=int, default=0,
                        help='ID of the webcam to use (usually 0)')
    parser.add_argument('--width', type=int, default=640, help='Width of the webcam frame')
    parser.add_argument('--height', type=int, default=480, help='Height of the webcam frame')
    return parser.parse_args()

def preprocess_frame(frame, device):
    """Converts an OpenCV frame to a PyTorch tensor and moves it to the specified device."""
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_normalized = frame_rgb.astype(np.float32) / 255.0
    frame_transposed = np.transpose(frame_normalized, (2, 0, 1))
    tensor = torch.from_numpy(frame_transposed).unsqueeze(0)
    # --- MODIFIED: Move the tensor to the GPU ---
    return tensor.to(device)

def postprocess_frame(tensor):
    """Converts a PyTorch tensor back to a displayable OpenCV frame (NumPy)."""
    # .cpu() moves the tensor back to the CPU for NumPy and OpenCV operations
    output = tensor.squeeze(0).cpu().detach().numpy()
    output = np.transpose(output, (1, 2, 0))
    output = np.clip(output * 255.0, 0, 255.0).astype(np.uint8)
    frame_bgr = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
    return frame_bgr

def main():
    args = get_args()

    # --- ADDED: Device selection logic ---
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print("Using device: CUDA (NVIDIA GPU)")
    else:
        device = torch.device("cpu")
        print("Using device: CPU")

    print(f"Loading model: {args.model}")
    if not os.path.exists(args.model):
        print(f"Error: Model file not found at {args.model}")
        return

    model = Finetunemodel(args.model)
    # --- ADDED: Move the model to the GPU ---
    model.to(device)
    model.eval()
    print("Model loaded successfully.")

    cap = cv2.VideoCapture(args.webcam_id)
    if not cap.isOpened():
        print(f"Error: Could not open webcam with ID {args.webcam_id}.")
        return
        
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
    print("Webcam initialized. Press 'q' in the display window to quit.")

    with torch.no_grad():
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Error: Failed to grab frame.")
                break

            start_time = time.time()
            
            # --- MODIFIED: Pass the device to the preprocessing function ---
            input_tensor = preprocess_frame(frame, device)
            
            # The model and tensor are both on the GPU, so this runs on the GPU
            _, enhanced_tensor = model(input_tensor)
            
            enhanced_frame = postprocess_frame(enhanced_tensor)
            end_time = time.time()
            
            fps = 1 / (end_time - start_time) if (end_time - start_time) > 0 else 0

            cv2.putText(enhanced_frame, f"FPS: {int(fps)}", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

            combined_frame = np.hstack((frame, enhanced_frame))
            cv2.imshow('Original vs. SCI Enhanced (GPU)', combined_frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    print("Exiting...")
    cap.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    main()