# Thermal-RGB Super-Resolution Data Pipeline
## Part 0: Data Diagnosis & Synchronization

This notebook will:
1. Diagnose thermal data dropout issues
2. Extract AVI video properties
3. Parse time labels
4. Synchronize all data sources
5. Generate clean paired dataset for SR models

## 0. Setup & Dependencies

In [None]:
!pip install opencv-python numpy pandas matplotlib seaborn scipy tqdm Pillow -q

In [None]:
import json
import os
from pathlib import Path
from datetime import datetime, timedelta
from typing import List, Tuple, Dict, Optional
import warnings
warnings.filterwarnings('ignore')

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from scipy.interpolate import interp1d
from PIL import Image

print("All dependencies imported successfully!")

# Set matplotlib style
sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Configuration

In [None]:
# ==================== PATHS ====================
DATA_DIR = Path('./data')
THERMAL_DIR = DATA_DIR / 'thermal'
CAMERA_DIR = DATA_DIR / 'camera'

AVI_FILE = CAMERA_DIR / 'output.avi'
LABELS_FILE = CAMERA_DIR / 'output.txt'

# Output paths
OUTPUT_DIR = Path('./output')
DIAGNOSIS_DIR = OUTPUT_DIR / 'diagnosis'
SYNCHRONIZED_DIR = OUTPUT_DIR / 'synchronized'
PAIRED_DIR = OUTPUT_DIR / 'paired_data'

# Create directories
for dir_path in [DIAGNOSIS_DIR, SYNCHRONIZED_DIR, PAIRED_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)

# ==================== PARAMETERS ====================
THERMAL_RES = (32, 24)  # Original thermal resolution
TARGET_FPS = 8
FRAME_INTERVAL = 1.0 / TARGET_FPS
TIMESTAMP_TOLERANCE = FRAME_INTERVAL / 2

print(f"Data directory: {DATA_DIR}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"Target FPS: {TARGET_FPS}")
print(f"Timestamp tolerance: {TIMESTAMP_TOLERANCE:.4f}s")

## 2. Load Thermal Data with Diagnostic Info

In [None]:
class ThermalDiagnostics:
    """Load and diagnose thermal data."""
    
    def __init__(self, thermal_dir: Path, resolution: Tuple[int, int] = (32, 24)):
        self.thermal_dir = thermal_dir
        self.resolution = resolution
        self.data = []
        self.diagnostics = {}
    
    def load_all_files(self):
        """Load all thermal TXT files."""
        log_files = sorted(self.thermal_dir.glob('log*.txt'))
        print(f"Found {len(log_files)} thermal log files")
        
        total_frames = 0
        file_stats = []
        
        for file_path in tqdm(log_files, desc="Loading thermal files"):
            try:
                with open(file_path, 'r') as f:
                    lines = f.readlines()
                
                file_frames = 0
                for line_idx, line in enumerate(lines):
                    line = line.strip()
                    if not line:
                        continue
                    
                    try:
                        data_dict = json.loads(line)
                        
                        # Parse timestamp
                        time_parts = data_dict.get('time', [])
                        if len(time_parts) < 6:
                            continue
                        
                        year, month, day, hour, minute, second = time_parts[:6]
                        millisecond = time_parts[7] if len(time_parts) > 7 else 0
                        
                        try:
                            timestamp = datetime(year, month, day, hour, minute, second, 
                                               millisecond * 1000)
                        except ValueError:
                            continue
                        
                        # Parse thermal image
                        message = data_dict.get('message', [])
                        if len(message) != self.resolution[0] * self.resolution[1]:
                            continue
                        
                        thermal_image = np.array(message, dtype=np.uint8).reshape(
                            self.resolution[1], self.resolution[0]
                        )
                        
                        self.data.append({
                            'timestamp': timestamp,
                            'image': thermal_image,
                            'file': file_path.name,
                            'line': line_idx
                        })
                        
                        file_frames += 1
                    
                    except json.JSONDecodeError:
                        continue
                
                total_frames += file_frames
                file_stats.append({
                    'file': file_path.name,
                    'frames': file_frames,
                    'lines': len(lines)
                })
            
            except Exception as e:
                print(f"Error loading {file_path}: {e}")
        
        # Sort by timestamp
        self.data.sort(key=lambda x: x['timestamp'])
        
        self.diagnostics['total_frames'] = len(self.data)
        self.diagnostics['file_stats'] = file_stats
        
        return self.data
    
    def analyze_timestamps(self):
        """Analyze timestamp distribution and dropout patterns."""
        if len(self.data) < 2:
            print("Not enough data to analyze")
            return
        
        # Calculate time intervals between frames
        timestamps = [d['timestamp'] for d in self.data]
        intervals = []
        
        for i in range(1, len(timestamps)):
            delta = (timestamps[i] - timestamps[i-1]).total_seconds()
            intervals.append(delta)
        
        self.diagnostics['intervals'] = intervals
        self.diagnostics['time_range'] = (timestamps[0], timestamps[-1])
        self.diagnostics['duration'] = (timestamps[-1] - timestamps[0]).total_seconds()
        
        # Statistics
        intervals = np.array(intervals)
        self.diagnostics['interval_stats'] = {
            'mean': float(np.mean(intervals)),
            'std': float(np.std(intervals)),
            'min': float(np.min(intervals)),
            'max': float(np.max(intervals)),
            'median': float(np.median(intervals))
        }
        
        # Dropout detection
        expected_interval = 1.0 / 8  # 8 fps ideal
        threshold = expected_interval * 2  # 2x threshold
        dropouts = np.where(intervals > threshold)[0]
        
        self.diagnostics['dropout_count'] = len(dropouts)
        self.diagnostics['dropout_indices'] = dropouts.tolist()
        
        return self.diagnostics
    
    def print_diagnosis(self):
        """Print diagnostic report."""
        print("\n" + "="*60)
        print("THERMAL DATA DIAGNOSIS REPORT")
        print("="*60)
        
        print(f"\n1. FILE STATISTICS:")
        file_df = pd.DataFrame(self.diagnostics['file_stats'])
        print(file_df.to_string(index=False))
        
        print(f"\n2. TOTAL FRAMES LOADED: {self.diagnostics['total_frames']}")
        
        if 'time_range' in self.diagnostics:
            start, end = self.diagnostics['time_range']
            print(f"\n3. TIME RANGE:")
            print(f"   Start: {start}")
            print(f"   End: {end}")
            print(f"   Duration: {self.diagnostics['duration']:.2f}s")
            
            stats = self.diagnostics['interval_stats']
            print(f"\n4. FRAME INTERVAL STATISTICS (seconds):")
            print(f"   Mean: {stats['mean']:.6f}s (ideal: 0.125s for 8fps)")
            print(f"   Std: {stats['std']:.6f}s")
            print(f"   Min: {stats['min']:.6f}s")
            print(f"   Max: {stats['max']:.6f}s")
            print(f"   Median: {stats['median']:.6f}s")
            
            print(f"\n5. DROPOUT DETECTION:")
            print(f"   Dropout count (>0.25s gap): {self.diagnostics['dropout_count']}")
            
            if self.diagnostics['dropout_count'] > 0:
                print(f"   Dropout locations (frame indices):")
                dropouts = self.diagnostics['dropout_indices'][:10]  # Show first 10
                for i, idx in enumerate(dropouts):
                    gap = self.diagnostics['intervals'][idx]
                    print(f"     {i+1}. Frame {idx}->{idx+1}: {gap:.3f}s gap")
        
        print("\n" + "="*60)


# Load and diagnose thermal data
thermal_diag = ThermalDiagnostics(THERMAL_DIR, THERMAL_RES)
thermal_data = thermal_diag.load_all_files()
thermal_diag.analyze_timestamps()
thermal_diag.print_diagnosis()

## 3. Load AVI Video & Extract Properties

In [None]:
class VideoAnalyzer:
    """Analyze AVI video properties."""
    
    def __init__(self, video_path: Path):
        self.video_path = video_path
        self.cap = None
        self.fps = None
        self.frame_count = None
        self.width = None
        self.height = None
        self.duration = None
    
    def get_properties(self) -> bool:
        """Extract video properties without loading all frames."""
        if not self.video_path.exists():
            print(f"Error: Video file not found at {self.video_path}")
            return False
        
        self.cap = cv2.VideoCapture(str(self.video_path))
        
        if not self.cap.isOpened():
            print(f"Error: Could not open video file {self.video_path}")
            return False
        
        # Get properties
        self.fps = self.cap.get(cv2.CAP_PROP_FPS)
        self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.duration = self.frame_count / self.fps if self.fps > 0 else 0
        
        self.cap.release()
        return True
    
    def print_info(self):
        """Print video information."""
        print("\n" + "="*60)
        print("AVI VIDEO PROPERTIES")
        print("="*60)
        print(f"File: {self.video_path.name}")
        print(f"Resolution: {self.width} x {self.height}")
        print(f"FPS: {self.fps}")
        print(f"Total frames: {self.frame_count}")
        print(f"Duration: {self.duration:.2f}s")
        print("="*60)


video_analyzer = VideoAnalyzer(AVI_FILE)
if video_analyzer.get_properties():
    video_analyzer.print_info()
else:
    print("Failed to analyze video")

## 4. Parse Time Labels

In [None]:
class LabelParser:
    """Parse time labels from output.txt."""
    
    def __init__(self, labels_file: Path):
        self.labels_file = labels_file
        self.events = []
        self.video_start_time = None
    
    def parse_labels(self) -> bool:
        """Parse labels file."""
        if not self.labels_file.exists():
            print(f"Warning: Labels file not found at {self.labels_file}")
            return False
        
        with open(self.labels_file, 'r') as f:
            lines = f.readlines()
        
        # Parse lines
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Format: "action_YYYY-MM-DD HH:MM:SS"
            parts = line.rsplit('_', 5)  # Split from right to get datetime
            
            if len(parts) >= 2:
                action_part = '_'.join(parts[:-5]) if len(parts) > 6 else parts[0]
                datetime_str = ' '.join(parts[-5:])
                
                try:
                    # Parse datetime
                    event_time = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
                    
                    # Identify video start
                    if 'video_start' in line or 'start' in line.lower():
                        self.video_start_time = event_time
                    
                    self.events.append({
                        'action': action_part,
                        'timestamp': event_time,
                        'raw': line
                    })
                except Exception as e:
                    print(f"Warning: Could not parse line '{line}': {e}")
        
        # Sort by timestamp
        self.events.sort(key=lambda x: x['timestamp'])
        return True
    
    def print_labels(self):
        """Print parsed labels."""
        print("\n" + "="*60)
        print("PARSED TIME LABELS")
        print("="*60)
        
        if self.video_start_time:
            print(f"Video start time: {self.video_start_time}")
        
        print(f"\nTotal events: {len(self.events)}")
        print("\nEvents:")
        for i, event in enumerate(self.events):
            print(f"  {i+1}. {event['action']:15s} - {event['timestamp']}")
        
        print("="*60)


label_parser = LabelParser(LABELS_FILE)
if label_parser.parse_labels():
    label_parser.print_labels()
else:
    print("No labels file found")

## 5. Time Synchronization

In [None]:
def establish_time_reference(thermal_data: List[Dict], 
                             video_analyzer: VideoAnalyzer,
                             label_parser: LabelParser) -> Tuple[datetime, datetime, datetime]:
    """
    Establish the time reference for synchronization.
    
    Returns:
        (video_start, T_start, T_end) where:
        - video_start: Reference time from labels or thermal data
        - T_start: Start of overlapping window
        - T_end: End of overlapping window
    """
    
    # Get video start time from labels
    video_start = label_parser.video_start_time
    
    if not video_start:
        # Fallback: use first thermal frame
        video_start = thermal_data[0]['timestamp']
        print(f"Warning: Using first thermal frame as reference: {video_start}")
    
    # Get thermal time range
    thermal_start = thermal_data[0]['timestamp']
    thermal_end = thermal_data[-1]['timestamp']
    
    # Calculate RGB duration
    rgb_duration = video_analyzer.duration
    
    # RGB time range (starting from video_start)
    rgb_start = video_start
    rgb_end = rgb_start + timedelta(seconds=rgb_duration)
    
    # Find overlapping window
    T_start = max(thermal_start, rgb_start)
    T_end = min(thermal_end, rgb_end)
    
    print("\n" + "="*60)
    print("TIME SYNCHRONIZATION ANALYSIS")
    print("="*60)
    print(f"\nVideo reference time: {video_start}")
    print(f"\nThermal time range:")
    print(f"  Start: {thermal_start}")
    print(f"  End: {thermal_end}")
    print(f"  Duration: {(thermal_end - thermal_start).total_seconds():.2f}s")
    
    print(f"\nRGB time range (calculated):")
    print(f"  Start: {rgb_start}")
    print(f"  End: {rgb_end}")
    print(f"  Duration: {rgb_duration:.2f}s")
    
    if T_start < T_end:
        overlap_duration = (T_end - T_start).total_seconds()
        print(f"\nOverlapping window:")
        print(f"  Start: {T_start}")
        print(f"  End: {T_end}")
        print(f"  Duration: {overlap_duration:.2f}s")
        print(f"  Expected frames @ 8fps: {int(overlap_duration * 8)}")
    else:
        print(f"\nError: No overlapping window found!")
    
    print("="*60)
    
    return video_start, T_start, T_end


video_start, T_start, T_end = establish_time_reference(thermal_data, video_analyzer, label_parser)

## 6. Create Target Timeline & Match Frames

In [None]:
def create_target_timeline(T_start: datetime, T_end: datetime, target_fps: float = 8) -> List[datetime]:
    """Create fixed target timeline."""
    timeline = []
    current = T_start
    interval = timedelta(seconds=1.0 / target_fps)
    
    while current <= T_end:
        timeline.append(current)
        current += interval
    
    return timeline


def find_nearest_thermal(target_time: datetime, thermal_data: List[Dict], 
                       tolerance: float = TIMESTAMP_TOLERANCE) -> Optional[Dict]:
    """Find nearest thermal frame within tolerance."""
    thermal_times = [d['timestamp'] for d in thermal_data]
    
    # Binary search
    idx = np.searchsorted(thermal_times, target_time)
    
    best_idx = None
    best_diff = float('inf')
    
    for cand_idx in [idx-1, idx]:
        if 0 <= cand_idx < len(thermal_times):
            diff = abs((thermal_times[cand_idx] - target_time).total_seconds())
            if diff < tolerance and diff < best_diff:
                best_idx = cand_idx
                best_diff = diff
    
    return thermal_data[best_idx] if best_idx is not None else None


def find_nearest_rgb_frame(target_time: datetime, video_start: datetime,
                         video_analyzer: VideoAnalyzer,
                         tolerance: float = TIMESTAMP_TOLERANCE) -> Tuple[Optional[int], float]:
    """
    Find nearest RGB frame index based on time offset.
    
    Returns:
        (frame_index, time_offset_error)
    """
    target_offset = (target_time - video_start).total_seconds()
    
    # Calculate frame index
    frame_idx = int(target_offset * video_analyzer.fps)
    
    if 0 <= frame_idx < video_analyzer.frame_count:
        actual_offset = frame_idx / video_analyzer.fps
        error = abs(actual_offset - target_offset)
        
        if error < tolerance:
            return frame_idx, error
    
    return None, float('inf')


# Create target timeline
target_timeline = create_target_timeline(T_start, T_end, TARGET_FPS)
print(f"\nTarget timeline created: {len(target_timeline)} frames @ {TARGET_FPS}fps")
print(f"Time range: {target_timeline[0]} to {target_timeline[-1]}")

## 7. Synchronize & Create Paired Dataset

In [None]:
def synchronize_thermal_rgb(target_timeline: List[datetime],
                            thermal_data: List[Dict],
                            video_start: datetime,
                            video_analyzer: VideoAnalyzer,
                            tolerance: float = TIMESTAMP_TOLERANCE) -> List[Dict]:
    """
    Synchronize thermal and RGB frames.
    
    Returns:
        List of synchronized pairs
    """
    pairs = []
    skipped = {'no_thermal': 0, 'no_rgb': 0}
    
    for target_time in tqdm(target_timeline, desc="Synchronizing"):
        # Find thermal frame
        thermal_frame = find_nearest_thermal(target_time, thermal_data, tolerance)
        if thermal_frame is None:
            skipped['no_thermal'] += 1
            continue
        
        # Find RGB frame
        rgb_idx, error = find_nearest_rgb_frame(target_time, video_start, video_analyzer, tolerance)
        if rgb_idx is None:
            skipped['no_rgb'] += 1
            continue
        
        pairs.append({
            'timestamp': target_time,
            'thermal': thermal_frame,
            'rgb_frame_idx': rgb_idx,
            'thermal_error': abs((thermal_frame['timestamp'] - target_time).total_seconds()),
            'rgb_error': error
        })
    
    print(f"\nSynchronization Results:")
    print(f"  Valid pairs: {len(pairs)}")
    print(f"  Skipped (no thermal): {skipped['no_thermal']}")
    print(f"  Skipped (no RGB): {skipped['no_rgb']}")
    print(f"  Success rate: {len(pairs) / len(target_timeline) * 100:.1f}%")
    
    return pairs


sync_pairs = synchronize_thermal_rgb(target_timeline, thermal_data, video_start, video_analyzer)

## 8. Extract & Save RGB Frames

In [None]:
def extract_rgb_frames(sync_pairs: List[Dict],
                       avi_path: Path,
                       output_dir: Path) -> bool:
    """
    Extract RGB frames from AVI and save paired data.
    """
    cap = cv2.VideoCapture(str(avi_path))
    if not cap.isOpened():
        print(f"Error: Could not open {avi_path}")
        return False
    
    # Create subdirectories
    thermal_dir = output_dir / 'thermal'
    rgb_dir = output_dir / 'rgb'
    thermal_dir.mkdir(parents=True, exist_ok=True)
    rgb_dir.mkdir(parents=True, exist_ok=True)
    
    # Extract frames
    frame_cache = {}
    current_frame = 0
    ret = True
    
    metadata_list = []
    
    for pair_idx, pair in enumerate(tqdm(sync_pairs, desc="Extracting frames")):
        target_frame_idx = pair['rgb_frame_idx']
        
        # Load frame if not cached
        if target_frame_idx not in frame_cache:
            # Seek to frame if needed
            if target_frame_idx < current_frame:
                cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame_idx)
            
            # Read until we get the target frame
            while current_frame <= target_frame_idx and ret:
                ret, frame = cap.read()
                current_frame += 1
            
            if ret:
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame_cache[target_frame_idx] = frame_rgb
        
        if target_frame_idx in frame_cache:
            rgb_frame = frame_cache[target_frame_idx]
            thermal_img = pair['thermal']['image']
            
            # Save thermal and RGB
            pair_name = f"pair_{pair_idx:05d}"
            
            thermal_path = thermal_dir / f"{pair_name}_thermal.npy"
            np.save(thermal_path, thermal_img.astype(np.uint8))
            
            rgb_path = rgb_dir / f"{pair_name}_rgb.npy"
            np.save(rgb_path, rgb_frame.astype(np.uint8))
            
            metadata_list.append({
                'pair_id': pair_name,
                'timestamp': pair['timestamp'],
                'rgb_frame_idx': pair['rgb_frame_idx'],
                'thermal_file': pair['thermal']['file'],
                'thermal_error_ms': pair['thermal_error'] * 1000,
                'rgb_error_ms': pair['rgb_error'] * 1000
            })
    
    cap.release()
    
    # Save metadata
    metadata_df = pd.DataFrame(metadata_list)
    metadata_path = output_dir / 'metadata.csv'
    metadata_df.to_csv(metadata_path, index=False)
    
    print(f"\nFrames extracted successfully!")
    print(f"  Thermal frames saved to: {thermal_dir}")
    print(f"  RGB frames saved to: {rgb_dir}")
    print(f"  Metadata saved to: {metadata_path}")
    
    return True


extract_rgb_frames(sync_pairs, AVI_FILE, SYNCHRONIZED_DIR)

## 9. Generate Visualizations

In [None]:
# Visualize thermal timestamp distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Plot 1: Timestamp intervals histogram
intervals = thermal_diag.diagnostics.get('intervals', [])
if intervals:
    axes[0, 0].hist(intervals, bins=50, edgecolor='black', alpha=0.7)
    axes[0, 0].axvline(FRAME_INTERVAL, color='r', linestyle='--', label=f'Target: {FRAME_INTERVAL:.4f}s')
    axes[0, 0].set_xlabel('Interval (seconds)')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].set_title('Thermal Frame Interval Distribution')
    axes[0, 0].legend()
    axes[0, 0].set_xlim(0, 0.5)

# Plot 2: Cumulative time
if thermal_diag.diagnostics.get('intervals'):
    cumulative = np.cumsum([0] + thermal_diag.diagnostics['intervals'])
    axes[0, 1].plot(cumulative)
    axes[0, 1].set_xlabel('Frame index')
    axes[0, 1].set_ylabel('Cumulative time (seconds)')
    axes[0, 1].set_title('Cumulative Time vs Frame Index')
    axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Dropouts
dropout_indices = thermal_diag.diagnostics.get('dropout_indices', [])
if dropout_indices:
    dropout_gaps = [thermal_diag.diagnostics['intervals'][i] for i in dropout_indices[:100]]
    axes[1, 0].scatter(range(len(dropout_gaps)), dropout_gaps, alpha=0.6, s=50)
    axes[1, 0].axhline(FRAME_INTERVAL * 2, color='r', linestyle='--', label='Dropout threshold')
    axes[1, 0].set_xlabel('Dropout index')
    axes[1, 0].set_ylabel('Gap duration (seconds)')
    axes[1, 0].set_title('Frame Dropout Gaps (first 100)')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Sync error
if sync_pairs:
    thermal_errors = [p['thermal_error'] * 1000 for p in sync_pairs]  # ms
    rgb_errors = [p['rgb_error'] * 1000 for p in sync_pairs]  # ms
    axes[1, 1].hist(thermal_errors, bins=30, alpha=0.5, label='Thermal error', edgecolor='black')
    axes[1, 1].hist(rgb_errors, bins=30, alpha=0.5, label='RGB error', edgecolor='black')
    axes[1, 1].set_xlabel('Time error (milliseconds)')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].set_title('Synchronization Error Distribution')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(DIAGNOSIS_DIR / 'thermal_diagnosis.png', dpi=100, bbox_inches='tight')
plt.show()

print(f"Diagnosis plot saved to {DIAGNOSIS_DIR / 'thermal_diagnosis.png'}")

## 10. Visualize Sample Paired Frames

In [None]:
def visualize_paired_samples(sync_pairs: List[Dict],
                             output_dir: Path,
                             num_samples: int = 6):
    """
    Visualize sample thermal-RGB pairs.
    """
    thermal_dir = output_dir / 'thermal'
    rgb_dir = output_dir / 'rgb'
    
    # Select samples uniformly distributed
    if len(sync_pairs) > num_samples:
        indices = np.linspace(0, len(sync_pairs) - 1, num_samples, dtype=int)
    else:
        indices = range(len(sync_pairs))
    
    # Create grid of visualizations
    fig, axes = plt.subplots(len(indices), 2, figsize=(10, 4*len(indices)))
    if len(indices) == 1:
        axes = [axes]
    
    for plot_idx, pair_idx in enumerate(indices):
        pair = sync_pairs[pair_idx]
        pair_name = f"pair_{pair_idx:05d}"
        
        # Load thermal
        thermal_path = thermal_dir / f"{pair_name}_thermal.npy"
        thermal_img = np.load(thermal_path)
        
        # Load RGB
        rgb_path = rgb_dir / f"{pair_name}_rgb.npy"
        rgb_img = np.load(rgb_path)
        
        # Plot thermal (upscaled for visibility)
        thermal_upscaled = cv2.resize(thermal_img, (256, 256), interpolation=cv2.INTER_CUBIC)
        axes[plot_idx][0].imshow(thermal_upscaled, cmap='hot')
        axes[plot_idx][0].set_title(f'Thermal (32×24→256×256)\nT_err: {pair["thermal_error"]*1000:.1f}ms')
        axes[plot_idx][0].axis('off')
        
        # Plot RGB (resized)
        rgb_resized = cv2.resize(rgb_img, (256, 256))
        axes[plot_idx][1].imshow(rgb_resized)
        axes[plot_idx][1].set_title(f'RGB (640×320→256×256)\nR_err: {pair["rgb_error"]*1000:.1f}ms')
        axes[plot_idx][1].axis('off')
    
    plt.tight_layout()
    plt.savefig(DIAGNOSIS_DIR / 'sample_pairs.png', dpi=100, bbox_inches='tight')
    plt.show()
    
    print(f"Sample visualization saved to {DIAGNOSIS_DIR / 'sample_pairs.png'}")


visualize_paired_samples(sync_pairs, SYNCHRONIZED_DIR, num_samples=6)

## 11. Summary Report

In [None]:
# Generate summary report
report = f"""
{'='*70}
THERMAL-RGB SYNCHRONIZATION PIPELINE - SUMMARY REPORT
{'='*70}

1. DATA LOADING
   - Thermal frames loaded: {len(thermal_data)}
   - Thermal data dropout count: {thermal_diag.diagnostics.get('dropout_count', 'N/A')}
   - Video FPS: {video_analyzer.fps}
   - Video total frames: {video_analyzer.frame_count}
   - Video duration: {video_analyzer.duration:.2f}s

2. TIME SYNCHRONIZATION
   - Video start time: {video_start}
   - Synchronization window: {T_start} to {T_end}
   - Window duration: {(T_end - T_start).total_seconds():.2f}s
   - Target FPS: {TARGET_FPS}
   - Target timeline frames: {len(target_timeline)}

3. FRAME MATCHING
   - Successfully paired: {len(sync_pairs)}
   - Success rate: {len(sync_pairs) / len(target_timeline) * 100:.1f}%
   - Timestamp tolerance: ±{TIMESTAMP_TOLERANCE*1000:.1f}ms

4. THERMAL FRAME INTERVALS
   - Mean: {thermal_diag.diagnostics.get('interval_stats', {}).get('mean', 'N/A'):.6f}s
   - Std Dev: {thermal_diag.diagnostics.get('interval_stats', {}).get('std', 'N/A'):.6f}s
   - Min: {thermal_diag.diagnostics.get('interval_stats', {}).get('min', 'N/A'):.6f}s
   - Max: {thermal_diag.diagnostics.get('interval_stats', {}).get('max', 'N/A'):.6f}s

5. OUTPUT FILES
   - Thermal frames: {SYNCHRONIZED_DIR / 'thermal'}
   - RGB frames: {SYNCHRONIZED_DIR / 'rgb'}
   - Metadata: {SYNCHRONIZED_DIR / 'metadata.csv'}
   - Diagnostics: {DIAGNOSIS_DIR}

6. NEXT STEPS
   - Review diagnosis plots in {DIAGNOSIS_DIR}
   - Check synchronized frame pairs in {SYNCHRONIZED_DIR}
   - Use paired data for super-resolution training
   - Available methods: RGB-as-GT, Self-Supervised, Hybrid

{'='*70}
"""

print(report)

# Save report
with open(OUTPUT_DIR / 'REPORT.txt', 'w') as f:
    f.write(report)

print(f"\nReport saved to {OUTPUT_DIR / 'REPORT.txt'}")

## 12. Export Metadata Summary

In [None]:
# Load and display metadata
metadata_df = pd.read_csv(SYNCHRONIZED_DIR / 'metadata.csv')

print("\n" + "="*70)
print("PAIRED DATASET METADATA")
print("="*70)
print(f"\nTotal pairs: {len(metadata_df)}")
print(f"\nMetadata Statistics:")
print(metadata_df.describe())

print(f"\nFirst 10 pairs:")
print(metadata_df.head(10).to_string(index=False))

print(f"\nSynchronization error statistics:")
print(f"  Thermal error - Mean: {metadata_df['thermal_error_ms'].mean():.2f}ms, "
      f"Max: {metadata_df['thermal_error_ms'].max():.2f}ms")
print(f"  RGB error - Mean: {metadata_df['rgb_error_ms'].mean():.2f}ms, "
      f"Max: {metadata_df['rgb_error_ms'].max():.2f}ms")

print("\n" + "="*70)