In [1]:
import os
import pandas as pd
from datetime import timedelta

In [9]:
import re

def extract_image_timestamps_from_folder(image_dir, date_str):
    """
    Extracts (datetime, filename) pairs from image filenames.
    Handles:
      - 'YYYY-MM-DD HHMMSS'
      - 'IMG_YYYYMMDD_HHMMSS'
      - 'HHMMSS' (uses date_str)
    """
    image_timestamps = []
    for fname in os.listdir(image_dir):
        if fname.lower().endswith('.jpg'):
            base = os.path.splitext(fname)[0]
            # Try 'YYYY-MM-DD HHMMSS'
            try:
                ts = pd.to_datetime(base, format="%Y-%m-%d %H%M%S")
                image_timestamps.append((ts, fname))
                continue
            except Exception:
                pass
            # Try 'IMG_YYYYMMDD_HHMMSS'
            m = re.match(r'IMG_(\d{8})_(\d{6})', base)
            if m:
                dt_str = m.group(1) + m.group(2)
                try:
                    ts = pd.to_datetime(dt_str, format="%Y%m%d%H%M%S")
                    image_timestamps.append((ts, fname))
                    continue
                except Exception as e:
                    print(f"⚠️ Skipping {fname}: {e}")
            # Try 'HHMMSS' with date_str
            if len(base) == 6 and base.isdigit():
                try:
                    ts = pd.to_datetime(f"{date_str} {base}", format="%Y-%m-%d %H%M%S")
                    image_timestamps.append((ts, fname))
                    continue
                except Exception as e:
                    print(f"⚠️ Skipping {fname}: {e}")
            else:
                print(f"⚠️ Skipping {fname}: Unrecognized format")
    return sorted(image_timestamps, key=lambda x: x[0])

In [10]:
def align_image_to_tabular(image_ts_list, tabular_df, window_minutes=20):
    """
    Aligns image timestamps with sensor data within ±window/2 minutes.
    Also returns a summary with match counts per image.
    """
    half_window = timedelta(minutes=window_minutes // 2)
    aligned_data = []
    summary = []

    for ts, fname in image_ts_list:
        start = ts - half_window
        end = ts + half_window

        match = tabular_df[(tabular_df['time'] >= start) & (tabular_df['time'] <= end)].copy()

        summary.append({
            "image_filename": fname,
            "image_timestamp": ts,
            "match_count": len(match)
        })

        if not match.empty:
            match['image_filename'] = fname
            match['image_timestamp'] = ts
            aligned_data.append(match)

    aligned_df = pd.concat(aligned_data, ignore_index=True) if aligned_data else pd.DataFrame()
    summary_df = pd.DataFrame(summary)

    return aligned_df, summary_df


In [12]:
# Configuration
day_configs = [
    {"folder": "7_24_data", "date": "2019-07-24"},
    {"folder": "10_19_data", "date": "2019-10-19"},
    {"folder": "11_10_data", "date": "2019-11-10"}
]

# Paths
dataset_root = "../dataset"
merged_folder = "merged_tabular_data"
aligned_folder = "aligned_img_table"
summary_folder = "summary_img_table"

# Ensure output folders exist
os.makedirs(aligned_folder, exist_ok=True)
os.makedirs(summary_folder, exist_ok=True)

# Process each day
for config in day_configs:
    folder = config['folder']
    date_str = config['date']

    print(f"\nProcessing {folder}...")

    tabular_path = os.path.join(merged_folder, f'merged_{folder}.csv')
    image_dir = os.path.join(dataset_root, folder, 'images', 'pictures')

    if not os.path.exists(tabular_path):
        print(f"Tabular file not found: {tabular_path}")
        continue

    if not os.path.exists(image_dir):
        print(f"Image directory not found: {image_dir}")
        continue

    # Load data
    df = pd.read_csv(tabular_path, parse_dates=['time'])
    image_ts = extract_image_timestamps_from_folder(image_dir, date_str)

    if not image_ts:
        print(f"No valid images found in {image_dir}")
        continue

    aligned_df, summary_df = align_image_to_tabular(image_ts, df, window_minutes=20)

    # Save output
    aligned_csv = os.path.join(aligned_folder, f'aligned_{folder}.csv')
    summary_csv = os.path.join(summary_folder, f'alignment_summary_{folder}.csv')

    aligned_df.to_csv(aligned_csv, index=False)
    summary_df.to_csv(summary_csv, index=False)

    print(f"Saved aligned data to: {aligned_csv}")
    print(f"Saved summary to:     {summary_csv}")
    print(f"Total images processed: {len(summary_df)}")
    print(f"Avg matches per image: {summary_df['match_count'].mean():.2f}")



Processing 7_24_data...
Saved aligned data to: aligned_img_table\aligned_7_24_data.csv
Saved summary to:     summary_img_table\alignment_summary_7_24_data.csv
Total images processed: 36
Avg matches per image: 4124.92

Processing 10_19_data...
Saved aligned data to: aligned_img_table\aligned_10_19_data.csv
Saved summary to:     summary_img_table\alignment_summary_10_19_data.csv
Total images processed: 37
Avg matches per image: 9125.27

Processing 11_10_data...
Saved aligned data to: aligned_img_table\aligned_11_10_data.csv
Saved summary to:     summary_img_table\alignment_summary_11_10_data.csv
Total images processed: 33
Avg matches per image: 9846.12
