# Test Aethalometer-Filter Matcher Module

Simple test of the new modular aethalometer-filter matching system.

In [1]:
# Import the module
import sys
sys.path.append('..')

from src.data.loaders import AethalometerFilterMatcher, quick_match
import pandas as pd

print("✅ Module imported successfully!")

✅ Module imported successfully!


In [2]:
# Test 1: Using the quick_match function
print("🚀 Testing quick_match function for ETAD...")

aethalometer_path = "../FTIR_HIPS_Chem/df_Jacros_9am_resampled.pkl"
filter_db_path = "../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl"

try:
    matched_data = quick_match(aethalometer_path, filter_db_path, 'ETAD')
    print(f"\n✅ Quick match successful! Shape: {matched_data.shape}")
    print(f"Columns: {list(matched_data.columns)}")
except Exception as e:
    print(f"❌ Quick match failed: {e}")

🚀 Testing quick_match function for ETAD...
Loading complete filter dataset from ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl...
Dataset loaded successfully!
   Total measurements: 44,493
   Unique filters: 1,603
   Sites: CHTS, ETAD, INDH, USPA
   Date range: 2013-06-28 to 2024-12-08
   Data sources: ChemSpec, FTIR, HIPS
✅ Filter database loaded from: ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl
🔗 Matching aethalometer and filter data for ETAD...
✅ Aethalometer data loaded from: ../FTIR_HIPS_Chem/df_Jacros_9am_resampled.pkl
   Dataset shape: (1047, 320)
   Date range: 2022-04-13 09:00:00+03:00 to 2025-06-25 09:00:00+03:00
   No 'Site' column found - assuming single site data
📍 No 'Site' column - assuming all data is for ETAD
📂 Extracting ETAD filter measurements...
   ✅ EC_ftir: 190 measurements
      Date range: 2022-12-07 to 2024-09-21
   ✅ HIPS_Fabs: 190 measurements
      Date range: 2022-12-07 to 2024-09-21
   ✅ Iron_ChemSpec: 188 measurements
      Date ra

In [3]:
# Test 2: Using the full matcher class
print("🔧 Testing AethalometerFilterMatcher class...")

try:
    matcher = AethalometerFilterMatcher(aethalometer_path, filter_db_path)
    
    # Check available sites
    sites = matcher.get_available_sites()
    print(f"Available sites: {sites}")
    
    # Match ETAD data with custom parameters
    etad_matched = matcher.match_site_data(
        'ETAD',
        filter_parameters=['EC_ftir', 'HIPS_Fabs', 'ChemSpec_Iron_PM2.5'],
        aethalometer_columns=['IR BCc smoothed', 'Blue BCc smoothed', 'UV BCc smoothed']
    )
    
    print(f"\n✅ Custom match successful! Shape: {etad_matched.shape}")
    
except Exception as e:
    print(f"❌ Matcher class failed: {e}")

🔧 Testing AethalometerFilterMatcher class...
Loading complete filter dataset from ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl...
Dataset loaded successfully!
   Total measurements: 44,493
   Unique filters: 1,603
   Sites: CHTS, ETAD, INDH, USPA
   Date range: 2013-06-28 to 2024-12-08
   Data sources: ChemSpec, FTIR, HIPS
✅ Filter database loaded from: ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl
Available sites: ['CHTS', 'ETAD', 'INDH', 'USPA']
🔗 Matching aethalometer and filter data for ETAD...
✅ Aethalometer data loaded from: ../FTIR_HIPS_Chem/df_Jacros_9am_resampled.pkl
   Dataset shape: (1047, 320)
   Date range: 2022-04-13 09:00:00+03:00 to 2025-06-25 09:00:00+03:00
   No 'Site' column found - assuming single site data
📍 No 'Site' column - assuming all data is for ETAD
📂 Extracting ETAD filter measurements...
   ✅ EC_ftir: 190 measurements
      Date range: 2022-12-07 to 2024-09-21
   ✅ HIPS_Fabs: 190 measurements
      Date range: 2022-12-07 to 2024-09-2

In [4]:
# Test 3: Get correlation summary
if 'matched_data' in locals() and not matched_data.empty:
    print("📊 Testing correlation summary...")
    
    try:
        corr_summary = matcher.get_correlation_summary(matched_data)
        
        print(f"\n🔗 Correlation Summary:")
        for label, stats in corr_summary['correlations'].items():
            print(f"   {label}: R = {stats['r']:.3f}, R² = {stats['r2']:.3f} (n = {stats['n']})")
            
    except Exception as e:
        print(f"❌ Correlation summary failed: {e}")
else:
    print("⚠️  No matched data available for correlation analysis")

📊 Testing correlation summary...

🔗 Correlation Summary:
   IR BC vs EC: R = 0.929, R² = 0.863 (n = 175)
   Blue BC vs Fabs: R = 0.866, R² = 0.750 (n = 175)
   UV BC vs Iron: R = 0.148, R² = 0.022 (n = 174)
   EC vs Fabs: R = 0.860, R² = 0.740 (n = 175)


In [6]:
# Show final summary
print("\n🎉 Module testing complete!")
print("The AethalometerFilterMatcher module provides:")
print("   • Easy loading of aethalometer and filter data")
print("   • Automatic date matching between datasets")
print("   • Support for all sites in the database")
print("   • Flexible parameter selection")
print("   • Built-in correlation analysis")
print("\n📖 Usage examples:")
print("   # Simple usage:")
print("   matched = quick_match(aeth_path, filter_path, 'ETAD')")
print("\n   # Advanced usage:")
print("   matcher = AethalometerFilterMatcher(aeth_path, filter_path)")
print("   data = matcher.match_site_data('ETAD', filter_parameters=['EC_ftir'])")


🎉 Module testing complete!
The AethalometerFilterMatcher module provides:
   • Easy loading of aethalometer and filter data
   • Automatic date matching between datasets
   • Support for all sites in the database
   • Flexible parameter selection
   • Built-in correlation analysis

📖 Usage examples:
   # Simple usage:
   matched = quick_match(aeth_path, filter_path, 'ETAD')

   # Advanced usage:
   matcher = AethalometerFilterMatcher(aeth_path, filter_path)
   data = matcher.match_site_data('ETAD', filter_parameters=['EC_ftir'])
