# Test Aethalometer-Filter Matcher Module

Simple test of the new modular aethalometer-filter matching system.

In [1]:
# Import the module
import sys
sys.path.append('..')

from src.data.loaders import AethalometerFilterMatcher, quick_match
import pandas as pd

print("‚úÖ Module imported successfully!")

‚úÖ Module imported successfully!


In [2]:
# Test 1: Using the quick_match function
print("üöÄ Testing quick_match function for ETAD...")

aethalometer_path = "../FTIR_HIPS_Chem/df_Jacros_9am_resampled.pkl"
filter_db_path = "../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl"

try:
    matched_data = quick_match(aethalometer_path, filter_db_path, 'ETAD')
    print(f"\n‚úÖ Quick match successful! Shape: {matched_data.shape}")
    print(f"Columns: {list(matched_data.columns)}")
except Exception as e:
    print(f"‚ùå Quick match failed: {e}")

üöÄ Testing quick_match function for ETAD...
Loading complete filter dataset from ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl...
Dataset loaded successfully!
   Total measurements: 44,493
   Unique filters: 1,603
   Sites: CHTS, ETAD, INDH, USPA
   Date range: 2013-06-28 to 2024-12-08
   Data sources: ChemSpec, FTIR, HIPS
‚úÖ Filter database loaded from: ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl
üîó Matching aethalometer and filter data for ETAD...
‚úÖ Aethalometer data loaded from: ../FTIR_HIPS_Chem/df_Jacros_9am_resampled.pkl
   Dataset shape: (1047, 320)
   Date range: 2022-04-13 09:00:00+03:00 to 2025-06-25 09:00:00+03:00
   No 'Site' column found - assuming single site data
üìç No 'Site' column - assuming all data is for ETAD
üìÇ Extracting ETAD filter measurements...
   ‚úÖ EC_ftir: 190 measurements
      Date range: 2022-12-07 to 2024-09-21
   ‚úÖ HIPS_Fabs: 190 measurements
      Date range: 2022-12-07 to 2024-09-21
   ‚úÖ Iron_ChemSpec: 188 meas

In [3]:
# Test 2: Using the full matcher class
print("üîß Testing AethalometerFilterMatcher class...")

try:
    matcher = AethalometerFilterMatcher(aethalometer_path, filter_db_path)
    
    # Check available sites
    sites = matcher.get_available_sites()
    print(f"Available sites: {sites}")
    
    # Match ETAD data with custom parameters
    etad_matched = matcher.match_site_data(
        'ETAD',
        filter_parameters=['EC_ftir', 'HIPS_Fabs', 'ChemSpec_Iron_PM2.5'],
        aethalometer_columns=['IR BCc smoothed', 'Blue BCc smoothed', 'UV BCc smoothed']
    )
    
    print(f"\n‚úÖ Custom match successful! Shape: {etad_matched.shape}")
    
except Exception as e:
    print(f"‚ùå Matcher class failed: {e}")

üîß Testing AethalometerFilterMatcher class...
Loading complete filter dataset from ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl...
Dataset loaded successfully!
   Total measurements: 44,493
   Unique filters: 1,603
   Sites: CHTS, ETAD, INDH, USPA
   Date range: 2013-06-28 to 2024-12-08
   Data sources: ChemSpec, FTIR, HIPS
‚úÖ Filter database loaded from: ../FTIR_HIPS_Chem/Filter Data/unified_filter_dataset.pkl
Available sites: ['CHTS', 'ETAD', 'INDH', 'USPA']
üîó Matching aethalometer and filter data for ETAD...
‚úÖ Aethalometer data loaded from: ../FTIR_HIPS_Chem/df_Jacros_9am_resampled.pkl
   Dataset shape: (1047, 320)
   Date range: 2022-04-13 09:00:00+03:00 to 2025-06-25 09:00:00+03:00
   No 'Site' column found - assuming single site data
üìç No 'Site' column - assuming all data is for ETAD
üìÇ Extracting ETAD filter measurements...
   ‚úÖ EC_ftir: 190 measurements
      Date range: 2022-12-07 to 2024-09-21
   ‚úÖ HIPS_Fabs: 190 measurements
      Date range: 202

In [4]:
# Test 3: Get correlation summary
if 'matched_data' in locals() and not matched_data.empty:
    print("üìä Testing correlation summary...")
    
    try:
        corr_summary = matcher.get_correlation_summary(matched_data)
        
        print(f"\nüîó Correlation Summary:")
        for label, stats in corr_summary['correlations'].items():
            print(f"   {label}: R = {stats['r']:.3f}, R¬≤ = {stats['r2']:.3f} (n = {stats['n']})")
            
    except Exception as e:
        print(f"‚ùå Correlation summary failed: {e}")
else:
    print("‚ö†Ô∏è  No matched data available for correlation analysis")

üìä Testing correlation summary...

üîó Correlation Summary:
   IR BC vs EC: R = 0.929, R¬≤ = 0.863 (n = 175)
   Blue BC vs Fabs: R = 0.866, R¬≤ = 0.750 (n = 175)
   UV BC vs Iron: R = 0.148, R¬≤ = 0.022 (n = 174)
   EC vs Fabs: R = 0.860, R¬≤ = 0.740 (n = 175)


In [6]:
# Show final summary
print("\nüéâ Module testing complete!")
print("The AethalometerFilterMatcher module provides:")
print("   ‚Ä¢ Easy loading of aethalometer and filter data")
print("   ‚Ä¢ Automatic date matching between datasets")
print("   ‚Ä¢ Support for all sites in the database")
print("   ‚Ä¢ Flexible parameter selection")
print("   ‚Ä¢ Built-in correlation analysis")
print("\nüìñ Usage examples:")
print("   # Simple usage:")
print("   matched = quick_match(aeth_path, filter_path, 'ETAD')")
print("\n   # Advanced usage:")
print("   matcher = AethalometerFilterMatcher(aeth_path, filter_path)")
print("   data = matcher.match_site_data('ETAD', filter_parameters=['EC_ftir'])")


üéâ Module testing complete!
The AethalometerFilterMatcher module provides:
   ‚Ä¢ Easy loading of aethalometer and filter data
   ‚Ä¢ Automatic date matching between datasets
   ‚Ä¢ Support for all sites in the database
   ‚Ä¢ Flexible parameter selection
   ‚Ä¢ Built-in correlation analysis

üìñ Usage examples:
   # Simple usage:
   matched = quick_match(aeth_path, filter_path, 'ETAD')

   # Advanced usage:
   matcher = AethalometerFilterMatcher(aeth_path, filter_path)
   data = matcher.match_site_data('ETAD', filter_parameters=['EC_ftir'])
