# 🏠 Room Video to 3D Analysis Pipeline

Analyzes full room rotation video to detect all doors and windows.

**Requirements:**
- Full 360° rotation video starting and ending at the same door
- Good lighting and steady camera movement

## Setup

In [None]:
# Install packages
!pip install -q opencv-python numpy matplotlib pillow tqdm scikit-image

# Imports
import cv2
import numpy as np
import os
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from google.colab import files
import shutil
from skimage import feature, filters, morphology
from scipy import ndimage

print("✅ Setup complete!")

## Upload Video

In [None]:
# Upload video
print("📤 Upload your room video (full 360° rotation)")
uploaded = files.upload()

if uploaded:
    filename = list(uploaded.keys())[0]
    video_path = "/content/room_video.mp4"
    shutil.move(filename, video_path)
    print(f"✅ Video saved to: {video_path}")
else:
    print("❌ No video uploaded")

## Extract Frames (More Dense Sampling)

In [None]:
def extract_frames(video_path, num_frames=60):  # Increased to 60 frames
    """Extract frames at regular intervals for full room coverage"""
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration = total_frames / fps
    interval = max(1, total_frames // num_frames)
    
    print(f"📹 Video info: {duration:.1f}s, {total_frames} total frames")
    print(f"📸 Extracting frame every {interval} frames ({duration/num_frames:.1f}s)")
    
    frames = []
    for i in range(0, total_frames, interval):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frames.append({
                'frame_num': i,
                'time': i / fps,
                'image': cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            })
            if len(frames) >= num_frames:
                break
    
    cap.release()
    print(f"✅ Extracted {len(frames)} frames for analysis")
    
    # Show first, middle, and last frames
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    key_frames = [0, len(frames)//2, len(frames)-1]
    titles = ["First Frame (Start Door)", "Middle Frame", "Last Frame (Same Door)"]
    
    for idx, (frame_idx, title) in enumerate(zip(key_frames, titles)):
        axes[idx].imshow(frames[frame_idx]['image'])
        axes[idx].set_title(f"{title}\nt={frames[frame_idx]['time']:.1f}s")
        axes[idx].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return frames

frames = extract_frames(video_path, num_frames=60)

## High-Confidence Door & Window Detection

In [None]:
def detect_door_features(image):
    """Comprehensive door detection with multiple feature checks"""
    height, width = image.shape[:2]
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    
    # Enhanced preprocessing
    denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(denoised)
    
    # Multi-scale edge detection
    edges_fine = cv2.Canny(enhanced, 30, 90)
    edges_coarse = cv2.Canny(enhanced, 50, 150)
    edges_combined = cv2.bitwise_or(edges_fine, edges_coarse)
    
    # Detect strong vertical lines (door frames)
    lines = cv2.HoughLinesP(edges_combined, 1, np.pi/180, 80, 
                           minLineLength=height*0.3, maxLineGap=20)
    
    vertical_lines = []
    horizontal_lines = []
    
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.abs(np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi)
            line_length = np.sqrt((x2-x1)**2 + (y2-y1)**2)
            
            if angle > 85 or angle < 5:  # Vertical lines
                vertical_lines.append((x1, y1, x2, y2, line_length))
            elif 85 < angle < 95:  # Horizontal lines
                horizontal_lines.append((x1, y1, x2, y2, line_length))
    
    # Create feature maps
    vertical_map = np.zeros((height, width), dtype=np.uint8)
    for x1, y1, x2, y2, _ in vertical_lines:
        cv2.line(vertical_map, (x1, y1), (x2, y2), 255, 3)
    
    # Morphological operations to connect door components
    kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
    connected = cv2.morphologyEx(edges_combined, cv2.MORPH_CLOSE, kernel_close)
    
    # Fill regions between vertical lines
    filled = cv2.bitwise_or(connected, vertical_map)
    kernel_fill = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
    filled = cv2.morphologyEx(filled, cv2.MORPH_CLOSE, kernel_fill)
    
    return filled, vertical_lines, edges_combined

def refine_door_boundaries(image, initial_bbox):
    """Use GrabCut-like algorithm to refine door boundaries for precise segmentation"""
    x, y, w, h = initial_bbox
    height, width = image.shape[:2]
    
    # Expand bbox slightly for GrabCut initialization
    margin = 10
    x1 = max(0, x - margin)
    y1 = max(0, y - margin)
    x2 = min(width, x + w + margin)
    y2 = min(height, y + h + margin)
    
    # Create mask for GrabCut
    mask = np.zeros((height, width), np.uint8)
    mask[y1:y2, x1:x2] = cv2.GC_PR_FGD  # Probable foreground
    mask[y:y+h, x:x+w] = cv2.GC_FGD     # Definite foreground
    
    # Initialize foreground and background models
    bgd_model = np.zeros((1, 65), np.float64)
    fgd_model = np.zeros((1, 65), np.float64)
    
    # Apply GrabCut
    rect = (x1, y1, x2-x1, y2-y1)
    try:
        cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 3, cv2.GC_INIT_WITH_MASK)
    except:
        # If GrabCut fails, return original bbox
        return initial_bbox
    
    # Extract foreground
    mask2 = np.where((mask == cv2.GC_FGD) | (mask == cv2.GC_PR_FGD), 255, 0).astype('uint8')
    
    # Find precise contour
    contours, _ = cv2.findContours(mask2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        # Get the largest contour
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)
        
        # Validate the refined bbox
        if w > 50 and h > 100:  # Minimum door size
            return [x, y, w, h]
    
    return initial_bbox

def find_door_edges(image, bbox):
    """Find precise door edges using edge detection within bbox"""
    x, y, w, h = bbox
    
    # Extract door region
    door_region = image[y:y+h, x:x+w]
    gray_region = cv2.cvtColor(door_region, cv2.COLOR_RGB2GRAY)
    
    # Apply edge detection
    edges = cv2.Canny(gray_region, 50, 150)
    
    # Find vertical edges (door frame)
    vertical_kernel = np.array([[-1, 0, 1]], dtype=np.float32)
    vertical_edges = cv2.filter2D(gray_region, -1, vertical_kernel)
    vertical_edges = np.abs(vertical_edges)
    
    # Find leftmost and rightmost strong edges
    edge_threshold = np.max(vertical_edges) * 0.3
    strong_edges = vertical_edges > edge_threshold
    
    # Find bounds
    left_bound = x
    right_bound = x + w
    
    # Scan from left to find first strong vertical edge
    for i in range(w//4):  # Only scan first quarter
        if np.sum(strong_edges[:, i]) > h * 0.5:  # Strong vertical edge
            left_bound = x + i
            break
    
    # Scan from right to find last strong vertical edge
    for i in range(w-1, 3*w//4, -1):  # Only scan last quarter
        if np.sum(strong_edges[:, i]) > h * 0.5:  # Strong vertical edge
            right_bound = x + i
            break
    
    # Return refined bbox
    refined_width = right_bound - left_bound
    if refined_width > 50:  # Minimum door width
        return [left_bound, y, refined_width, h]
    
    return bbox

def calculate_door_confidence(bbox, vertical_lines, image_shape, is_edge_frame=False):
    """Calculate confidence score for door detection"""
    x, y, w, h = bbox
    height, width = image_shape[:2]
    
    confidence = 0.0
    
    # Feature 1: Aspect ratio (doors are tall)
    aspect_ratio = h / w if w > 0 else 0
    if 1.8 < aspect_ratio < 3.5:
        confidence += 0.25
    elif 1.5 < aspect_ratio < 4.0:
        confidence += 0.15
    
    # Feature 2: Height relative to image
    height_ratio = h / height
    if height_ratio > 0.5:
        confidence += 0.2
    elif height_ratio > 0.4:
        confidence += 0.15
    
    # Feature 3: Bottom position (doors reach floor)
    bottom_position = (y + h) / height
    if bottom_position > 0.9:
        confidence += 0.25
    elif bottom_position > 0.85:
        confidence += 0.2
    
    # Feature 4: Vertical lines within bbox
    vertical_count = 0
    for vx1, vy1, vx2, vy2, length in vertical_lines:
        # Check if vertical line is within door bbox
        if (x <= vx1 <= x+w and x <= vx2 <= x+w and
            y <= vy1 <= y+h and y <= vy2 <= y+h and
            length > h * 0.5):
            vertical_count += 1
    
    if vertical_count >= 2:  # At least 2 strong vertical lines
        confidence += 0.3
    elif vertical_count >= 1:
        confidence += 0.15
    
    # Special handling for first/last frames - be more lenient
    if is_edge_frame:
        # If it looks like a door (tall, reaches floor), boost confidence
        if aspect_ratio > 1.5 and bottom_position > 0.8 and height_ratio > 0.35:
            confidence += 0.15  # Edge frame bonus
    
    return min(confidence, 1.0)

def detect_doors_windows_confident(image, confidence_threshold=0.9, is_edge_frame=False):
    """Detect doors and windows with high confidence and precise boundaries"""
    height, width = image.shape[:2]
    
    # Get door features
    door_mask, vertical_lines, edges = detect_door_features(image)
    
    # Find contours
    contours, _ = cv2.findContours(door_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    doors = []
    windows = []
    
    # Adjust threshold for edge frames
    effective_threshold = confidence_threshold - 0.1 if is_edge_frame else confidence_threshold
    
    for contour in contours:
        area = cv2.contourArea(contour)
        if area < width * height * 0.02:  # Minimum size
            continue
        
        x, y, w, h = cv2.boundingRect(contour)
        initial_bbox = [x, y, w, h]
        
        # Calculate door confidence
        confidence = calculate_door_confidence(initial_bbox, vertical_lines, image.shape, is_edge_frame)
        
        if confidence >= effective_threshold:
            # Refine the bounding box for precise door boundaries
            refined_bbox = find_door_edges(image, initial_bbox)
            
            # Further refine with GrabCut for very precise segmentation
            if confidence >= 0.85:  # Only for high confidence doors
                refined_bbox = refine_door_boundaries(image, refined_bbox)
            
            x, y, w, h = refined_bbox
            doors.append({
                'bbox': [x, y, x+w, y+h],
                'confidence': confidence,
                'width': w,
                'height': h,
                'aspect_ratio': h/w if w > 0 else 0
            })
        elif confidence >= 0.5 and h/w < 1.5:  # Potential window
            bottom_pos = (y + h) / height
            if bottom_pos < 0.8 and y > height * 0.1:
                windows.append({
                    'bbox': [x, y, x+w, y+h],
                    'confidence': confidence * 0.8,  # Lower confidence for windows
                    'width': w,
                    'height': h
                })
    
    # Remove overlapping detections
    doors = remove_overlapping(doors, iou_threshold=0.3)
    windows = remove_overlapping(windows, iou_threshold=0.3)
    
    return doors, windows, door_mask

def remove_overlapping(detections, iou_threshold=0.3):
    """Remove overlapping detections, keeping highest confidence"""
    if not detections:
        return []
    
    detections = sorted(detections, key=lambda x: x['confidence'], reverse=True)
    keep = []
    
    for det in detections:
        overlap = False
        for kept in keep:
            if calculate_iou(det['bbox'], kept['bbox']) > iou_threshold:
                overlap = True
                break
        if not overlap:
            keep.append(det)
    
    return keep

def calculate_iou(box1, box2):
    """Calculate intersection over union"""
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    
    return intersection / union if union > 0 else 0

## Analyze All Frames

In [None]:
# Process all frames with high confidence threshold
print("🚀 Analyzing all frames with 90% confidence threshold...")
print("📌 First and last 5 frames will be scanned more thoroughly for the main door\n")

all_detections = []
confidence_threshold = 0.9

for i, frame in enumerate(tqdm(frames, desc="Processing frames")):
    # Check if this is an edge frame (first or last 5 frames)
    is_edge_frame = (i < 5) or (i >= len(frames) - 5)
    
    doors, windows, _ = detect_doors_windows_confident(frame['image'], confidence_threshold, is_edge_frame)
    all_detections.append({
        'frame_num': frame['frame_num'],
        'time': frame['time'],
        'doors': doors,
        'windows': windows
    })

# Statistics
door_counts = [len(d['doors']) for d in all_detections]
window_counts = [len(d['windows']) for d in all_detections]
total_doors = sum(door_counts)
total_windows = sum(window_counts)
frames_with_doors = sum(1 for c in door_counts if c > 0)
frames_with_windows = sum(1 for c in window_counts if c > 0)

print(f"\n📊 Detection Summary (≥90% confidence):")
print(f"  Total door detections: {total_doors}")
print(f"  Frames with doors: {frames_with_doors}/{len(frames)} ({frames_with_doors/len(frames)*100:.0f}%)")
print(f"  Total window detections: {total_windows}")
print(f"  Frames with windows: {frames_with_windows}/{len(frames)} ({frames_with_windows/len(frames)*100:.0f}%)")

# Check first and last frame
first_door = len(all_detections[0]['doors']) > 0
last_door = len(all_detections[-1]['doors']) > 0

# Check if the same door (compare bounding boxes)
same_door = False
if first_door and last_door:
    first_bbox = all_detections[0]['doors'][0]['bbox']
    last_bbox = all_detections[-1]['doors'][0]['bbox']
    
    # Calculate similarity (doors should be in similar position)
    x_diff = abs((first_bbox[0] + first_bbox[2])/2 - (last_bbox[0] + last_bbox[2])/2)
    y_diff = abs((first_bbox[1] + first_bbox[3])/2 - (last_bbox[1] + last_bbox[3])/2)
    
    # If centers are within 20% of image width/height, consider it the same door
    img_width = frames[0]['image'].shape[1]
    img_height = frames[0]['image'].shape[0]
    same_door = (x_diff < img_width * 0.2) and (y_diff < img_height * 0.2)

print(f"\n🚪 Door continuity check:")
print(f"  First frame has door: {'✅' if first_door else '❌'}")
print(f"  Last frame has door: {'✅' if last_door else '❌'}")
print(f"  Same door detected: {'✅' if same_door else '❌ (different positions)' if first_door and last_door else '❌'}")

# If no door in first/last frames, show them for debugging
if not first_door or not last_door:
    print("\n⚠️ Main door not detected in edge frames! Showing frames for review...")
    
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    if not first_door:
        axes[0].imshow(frames[0]['image'])
        axes[0].set_title("First Frame - No door detected")
        axes[0].axis('off')
    else:
        axes[0].axis('off')
        axes[0].text(0.5, 0.5, "First frame OK", ha='center', va='center')
    
    if not last_door:
        axes[1].imshow(frames[-1]['image'])
        axes[1].set_title("Last Frame - No door detected")
        axes[1].axis('off')
    else:
        axes[1].axis('off')
        axes[1].text(0.5, 0.5, "Last frame OK", ha='center', va='center')
    
    plt.show()

## Visualize Detection Timeline

In [None]:
# Plot detection timeline
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

times = [f['time'] for f in frames]

# Door detections
ax1.plot(times, door_counts, 'r-', linewidth=2)
ax1.fill_between(times, door_counts, alpha=0.3, color='red')
ax1.set_ylabel('Number of Doors')
ax1.set_title('Door Detections Throughout 360° Room Scan')
ax1.grid(True, alpha=0.3)
ax1.set_ylim(bottom=0)

# Mark first and last frames
ax1.axvline(x=times[0], color='green', linestyle='--', alpha=0.7, label='Start')
ax1.axvline(x=times[-1], color='green', linestyle='--', alpha=0.7, label='End')
ax1.legend()

# Window detections
ax2.plot(times, window_counts, 'b-', linewidth=2)
ax2.fill_between(times, window_counts, alpha=0.3, color='blue')
ax2.set_ylabel('Number of Windows')
ax2.set_xlabel('Time (seconds)')
ax2.set_title('Window Detections Throughout 360° Room Scan')
ax2.grid(True, alpha=0.3)
ax2.set_ylim(bottom=0)

plt.tight_layout()
plt.show()

## Show High-Confidence Detections

In [None]:
def visualize_detection(image, doors, windows, title=""):
    """Visualize detection results"""
    plt.figure(figsize=(10, 8))
    plt.imshow(image)
    
    # Draw doors (red)
    for door in doors:
        bbox = door['bbox']
        rect = patches.Rectangle((bbox[0], bbox[1]), 
                               bbox[2]-bbox[0], bbox[3]-bbox[1],
                               linewidth=3, edgecolor='red', facecolor='none')
        plt.gca().add_patch(rect)
        plt.text(bbox[0], bbox[1]-5, f"Door {door['confidence']:.2f}", 
                color='red', fontweight='bold', fontsize=12,
                bbox=dict(boxstyle="round,pad=0.3", facecolor="yellow", alpha=0.7))
    
    # Draw windows (blue)
    for window in windows:
        bbox = window['bbox']
        rect = patches.Rectangle((bbox[0], bbox[1]), 
                               bbox[2]-bbox[0], bbox[3]-bbox[1],
                               linewidth=3, edgecolor='blue', facecolor='none')
        plt.gca().add_patch(rect)
        plt.text(bbox[0], bbox[1]-5, f"Window {window['confidence']:.2f}", 
                color='blue', fontweight='bold', fontsize=12,
                bbox=dict(boxstyle="round,pad=0.3", facecolor="cyan", alpha=0.7))
    
    plt.title(title)
    plt.axis('off')
    plt.tight_layout()
    plt.show()

# Show frames with high-confidence door detections
door_frames = [(i, d) for i, d in enumerate(all_detections) if len(d['doors']) > 0]

if door_frames:
    print(f"\n🚪 Found {len(door_frames)} frames with high-confidence doors (≥90%)\n")
    
    # Show first few door detections
    for idx, (frame_idx, detection) in enumerate(door_frames[:5]):
        frame = frames[frame_idx]
        visualize_detection(frame['image'], detection['doors'], detection['windows'],
                          f"Frame {frame_idx} (t={frame['time']:.1f}s) - Door Detection #{idx+1}")
else:
    print("❌ No doors detected with ≥90% confidence!")

# Show frames with windows
window_frames = [(i, d) for i, d in enumerate(all_detections) if len(d['windows']) > 0]
if window_frames:
    print(f"\n🪟 Found {len(window_frames)} frames with windows\n")
    for idx, (frame_idx, detection) in enumerate(window_frames[:3]):
        frame = frames[frame_idx]
        visualize_detection(frame['image'], detection['doors'], detection['windows'],
                          f"Frame {frame_idx} (t={frame['time']:.1f}s) - Window Detection")

## Unique Door/Window Analysis

In [None]:
# Estimate unique doors and windows (accounting for rotation)
def estimate_unique_features(detections, frames_per_rotation=None):
    """Estimate unique doors/windows in the room"""
    if frames_per_rotation is None:
        frames_per_rotation = len(detections)
    
    # Group detections by approximate position in rotation
    rotation_segments = 8  # Divide rotation into 8 segments
    segment_size = frames_per_rotation // rotation_segments
    
    door_segments = set()
    window_segments = set()
    
    for i, det in enumerate(detections):
        segment = i // segment_size
        if det['doors']:
            door_segments.add(segment)
        if det['windows']:
            window_segments.add(segment)
    
    return len(door_segments), len(window_segments)

unique_doors, unique_windows = estimate_unique_features(all_detections)

print(f"\n🏠 Room Features Summary:")
print(f"  Estimated unique doors: {unique_doors}")
print(f"  Estimated unique windows: {unique_windows}")
print(f"\n📐 Room layout:")
print(f"  Main entrance/exit: {'✅ Detected' if first_door and last_door else '❌ Not detected'}")
if unique_doors > 1:
    print(f"  Additional doors: {unique_doors - 1}")

## Export Results

In [None]:
# Export comprehensive results
export_data = {
    'video_info': {
        'total_frames': len(frames),
        'confidence_threshold': confidence_threshold,
        'rotation': '360_degrees'
    },
    'summary': {
        'total_door_detections': total_doors,
        'total_window_detections': total_windows,
        'frames_with_doors': frames_with_doors,
        'frames_with_windows': frames_with_windows,
        'estimated_unique_doors': unique_doors,
        'estimated_unique_windows': unique_windows,
        'main_door_detected': first_door and last_door
    },
    'detections': all_detections
}

output_path = '/content/door_window_detections_360.json'
with open(output_path, 'w') as f:
    json.dump(export_data, f, indent=2)

files.download(output_path)
print(f"\n✅ Results exported to: {output_path}")
print(f"\n📋 Detection summary:")
print(f"  - {unique_doors} unique door(s) detected")
print(f"  - {unique_windows} unique window(s) detected")
print(f"  - Main door (start/end): {'✅' if first_door and last_door else '❌'}")
print(f"  - All detections use ≥90% confidence threshold")