## Testing assemble circuitry

In [1]:
# ===== SETUP CELL - Run this first =====
import sys
from pathlib import Path

# Get repo root (assuming notebook is in notebooks/ folder)
repo_root = Path.cwd().parents[0] if 'notebooks' in str(Path.cwd()) else Path.cwd()

# Add to Python path
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

print(f"‚úì Repo root: {repo_root}")
print(f"‚úì Python can now import from 'src/'")

‚úì Repo root: c:\Users\Bahrs\Documents\_GitHub\gas-sensor_SWCNT_film-data-classification
‚úì Python can now import from 'src/'


In [2]:
import sys
from pathlib import Path

# Make sure Python can see src/ when running notebook
PROJECT_ROOT = Path.cwd().parents[0]  # if notebook is in /notebooks
sys.path.append(str(PROJECT_ROOT / "src"))

from data.loading import load_gas_data  # pyright: ignore[reportMissingImports]
from data.assemble import build_basic_dataset, full_dataset
from data.cleaning import apply_manual_trim  # pyright: ignore[reportMissingImports]
from preprocessing.smoothing import dedrift, Exp_pd
#from data.paths import GAS_FILE_MAP

gas = 'NO2'
df = load_gas_data(gas)
df1 = build_basic_dataset(gas)
df2 = dedrift(df1, 201, Exp_pd, alpha = 0.0217)
df3 = full_dataset(Exp_pd, envelope_ind = [201], alpha = 0.0217)
print(df.shape, df1.shape, df2.shape, df3.shape)
print(df.columns[-6:].tolist(), df1.columns[-6:].tolist(), df2.columns[-6:].tolist(), df3.columns[-6:].tolist(), sep='\n')

(481998, 6) (1199, 408) (1199, 408) (3543, 408)
['V', 'I', 'MFC_target', 'flow_target_error', 'flow_carrier_error', 'meas_cycle']
['NO2', 'H2S', 'Acet', 'meas_cycle', 'gas_', 'class_']
['NO2', 'H2S', 'Acet', 'meas_cycle', 'gas_', 'class_']
['NO2', 'H2S', 'Acet', 'meas_cycle', 'gas_', 'class_']


In [3]:
from preprocessing.train_test import build_sequences_for_df
#df3[df3["gas_"] == 'NO2'].loc[:,['NO2', 'H2S', 'Acet']]
data = build_sequences_for_df(df3, look_back=100)

In [4]:
from preprocessing.train_test import create_time_series_folds
print("=" * 60)
print("CATBOOST FOLDS (2D)")
print("=" * 60)
cb_folds = create_time_series_folds(
    df3,
    model_type='catboost',
    feature_cols=402,
    target_cols=['NO2', 'H2S', 'Acet'],
    n_components=50,
    start_cycle=5
)

print(cb_folds.summary())

CATBOOST FOLDS (2D)
TimeSeriesCVSplitter: 4 folds

  Fold 0: train=2145, test=360, features=(50,)
  Fold 1: train=2505, test=360, features=(50,)
  Fold 2: train=2865, test=360, features=(50,)
  Fold 3: train=3225, test=318, features=(50,)


In [5]:
print("\n" + "=" * 60)
print("LSTM FOLDS (3D)")
print("=" * 60)
lstm_folds = create_time_series_folds(
    df3,
    model_type='lstm',
    feature_cols=402,
    target_cols=['NO2', 'H2S', 'Acet'],
    look_back=30,
    n_components=50,
    start_cycle=5
)
print(lstm_folds.summary())


LSTM FOLDS (3D)
TimeSeriesCVSplitter: 4 folds

  Fold 0: train=2058, test=273, features=(30, 50)
  Fold 1: train=2418, test=273, features=(30, 50)
  Fold 2: train=2778, test=273, features=(30, 50)
  Fold 3: train=3138, test=231, features=(30, 50)


In [6]:
# Test TensorFlow dataset conversion
print("\n" + "=" * 60)
print("TENSORFLOW DATASETS")
print("=" * 60)
tf_datasets = lstm_folds.to_tf_datasets(batch_size=32)
train_ds, test_ds = tf_datasets[0]
print(f"First fold - Train dataset: {train_ds}")
print(f"First fold - Test dataset: {test_ds}")


TENSORFLOW DATASETS
First fold - Train dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 30, 50), dtype=tf.float64, name=None), TensorSpec(shape=(None, 3), dtype=tf.float64, name=None))>
First fold - Test dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 30, 50), dtype=tf.float64, name=None), TensorSpec(shape=(None, 3), dtype=tf.float64, name=None))>


In [7]:
from preprocessing.train_test import TimeSeriesCVSplitter
# Test save/load
print("\n" + "=" * 60)
print("SAVE/LOAD TEST")
print("=" * 60)
cb_folds.save('test_folds')
loaded_folds = TimeSeriesCVSplitter.load('test_folds')
print(f"Successfully saved and loaded {len(loaded_folds)} folds")


SAVE/LOAD TEST
Successfully saved and loaded 4 folds


In [11]:
def test_imports():
    """Test that all modules can be imported."""
    print("Testing imports...")
    
    try:
        from src.data.loading import load_gas_data
        from src.data.cleaning import apply_manual_trim
        from src.data.assemble import build_basic_dataset, full_dataset
        from src.preprocessing.smoothing import Exp_pd, Savitzky_Golay, dedrift
        from src.preprocessing.train_test import create_time_series_folds
        from src.models.catboost_model import build_catboost_classifier, build_catboost_regressor
        from src.models.lstm_model import build_lstm
        from src.models.optuna_objectives import (
            LSTMRegressorObjective,
            CatBoostClassifierObjective
        )
        print("‚úì All imports successful")
        return True
    except ImportError as e:
        print(f"‚úó Import failed: {e}")
        return False


def test_data_loading():
    """Test data loading pipeline."""
    print("\nTesting data loading...")
    
    try:
        from src.data.assemble import full_dataset
        from src.preprocessing.smoothing import Exp_pd
        
        df = full_dataset(
            dedrifting_func=Exp_pd,
            envelope_ind=[201],
            alpha=0.0217
        )
        
        assert df.shape[1] == 408, f"Expected 408 columns, got {df.shape[1]}"
        assert 'NO2' in df.columns, "Missing NO2 column"
        assert 'class_' in df.columns, "Missing class_ column"
        assert 'meas_cycle' in df.columns, "Missing meas_cycle column"
        
        print(f"‚úì Data loaded successfully: {df.shape}")
        print(f"  Gases: {df['gas_'].unique()}")
        print(f"  Cycles: {df['meas_cycle'].min()} - {df['meas_cycle'].max()}")
        return True
    except Exception as e:
        print(f"‚úó Data loading failed: {e}")
        return False


def test_cv_splitting():
    """Test CV splitting for both model types."""
    print("\nTesting CV splitting...")
    
    try:
        from src.data.assemble import full_dataset
        from src.preprocessing.smoothing import Exp_pd
        from src.preprocessing.train_test import create_time_series_folds
        
        df = full_dataset(
            dedrifting_func=Exp_pd,
            envelope_ind=[201],
            alpha=0.0217
        )
        
        # Test CatBoost folds
        catboost_folds = create_time_series_folds(
            df,
            model_type='catboost',
            task_type='regressor',
            n_components=50,
            start_cycle=7,
            test_size=1
        )
        print(f"‚úì CatBoost folds: {len(catboost_folds)} folds")
        
        # Test LSTM folds
        lstm_folds = create_time_series_folds(
            df,
            model_type='lstm',
            task_type='regressor',
            look_back=30,
            n_components=50,
            start_cycle=7,
            test_size=1
        )
        print(f"‚úì LSTM folds: {len(lstm_folds)} folds")
        
        return True
    except Exception as e:
        print(f"‚úó CV splitting failed: {e}")
        return False


def test_model_building():
    """Test model building."""
    print("\nTesting model building...")
    
    try:
        from src.models.catboost_model import build_catboost_regressor
        from src.models.lstm_model import build_lstm
        
        # Test CatBoost
        catboost_model = build_catboost_regressor(iterations=10, verbose=False)
        print("‚úì CatBoost model built")
        
        # Test LSTM
        lstm_model = build_lstm(
            input_shape=(30, 50),  # (look_back, n_features)
            output_shape=3,  # 3 gas concentrations
            n_layers=2,
            n_units=32
        )
        lstm_model.build(input_shape=(None, 30, 50))  # Build the model first
        print("‚úì LSTM model built")
        print(f"  LSTM parameters: {lstm_model.count_params():,}")
        
        return True
    except Exception as e:
        print(f"‚úó Model building failed: {e}")
        return False


def test_config_loading():
    """Test config file loading."""
    print("\nTesting config loading...")
    
    try:
        import yaml
        
        config_files = [
            'configs/config_lstm_regression.yaml',
            'configs/config_catboost_classification.yaml',
            'configs/config_catboost_regression.yaml'
        ]
        
        for config_file in config_files:
            config_path = PROJECT_ROOT / config_file
            if not config_path.exists():
                print(f"‚ö† Config file not found: {config_file}")
                continue
                
            with open(config_path, 'r') as f:
                config = yaml.safe_load(f)
            
            assert 'experiment' in config, f"Missing 'experiment' in {config_file}"
            assert 'data' in config, f"Missing 'data' in {config_file}"
            print(f"‚úì Config loaded: {config_file}")
        
        return True
    except Exception as e:
        print(f"‚úó Config loading failed: {e}")
        return False


def main():
    print("="*60)
    print("TESTING SETUP")
    print("="*60)
    
    tests = [
        ("Imports", test_imports),
        ("Data Loading", test_data_loading),
        ("CV Splitting", test_cv_splitting),
        ("Model Building", test_model_building),
        ("Config Loading", test_config_loading),
    ]
    
    results = []
    for name, test_func in tests:
        try:
            success = test_func()
            results.append((name, success))
        except Exception as e:
            print(f"‚úó Test '{name}' crashed: {e}")
            results.append((name, False))
    
    print("\n" + "="*60)
    print("SUMMARY")
    print("="*60)
    
    for name, success in results:
        status = "‚úì PASS" if success else "‚úó FAIL"
        print(f"{status}: {name}")
    
    all_passed = all(success for _, success in results)
    
    if all_passed:
        print("\nüéâ All tests passed! Your setup is ready.")
        return 0
    else:
        print("\n‚ùå Some tests failed. Check the errors above.")
        return 1




In [12]:
main()

TESTING SETUP
Testing imports...
‚úì All imports successful

Testing data loading...
‚úì Data loaded successfully: (3543, 408)
  Gases: ['NO2' 'H2S' 'Acet']
  Cycles: 0 - 9

Testing CV splitting...
‚úì CatBoost folds: 2 folds
‚úì LSTM folds: 2 folds

Testing model building...
‚úì CatBoost model built
‚úì LSTM model built
  LSTM parameters: 13,811

Testing config loading...
‚úì Config loaded: configs/config_lstm_regression.yaml
‚úì Config loaded: configs/config_catboost_classification.yaml
‚úì Config loaded: configs/config_catboost_regression.yaml

SUMMARY
‚úì PASS: Imports
‚úì PASS: Data Loading
‚úì PASS: CV Splitting
‚úì PASS: Model Building
‚úì PASS: Config Loading

üéâ All tests passed! Your setup is ready.


0