# Fermentation Gas Intelligence System - Exploration Notebook

This notebook demonstrates the usage of the Fermentation Gas Intelligence System.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path().parent))

from src.preprocessing import PreprocessingPipeline, MissingHandler, OutlierHandler, Resampler
from src.analytics import PandasAnalytics, NumPyOperations
from src.features import FeatureEngineering
from src.models import PhasePredictor
from src.anomaly import AnomalyDetector


## Load Data


In [None]:
# Load data
data = pd.read_csv('../data/gas_sensors_full_scale_dataset.csv')
print(f"Data shape: {data.shape}")
print(f"Columns: {data.columns.tolist()}")
data.head()


## Preprocessing


In [None]:
# Create preprocessing pipeline
pipeline = PreprocessingPipeline([
    MissingHandler(method='both'),
    OutlierHandler(method='iqr', action='clip'),
    Resampler(freq='5T', time_col='timestamp_index')
])

# Process data
processed_data = pipeline.fit_transform(data)
print(f"Processed data shape: {processed_data.shape}")


## Analytics


In [None]:
# Compute batch metrics
batch_metrics = PandasAnalytics.compute_batch_metrics(processed_data)
print(batch_metrics.head())

# Compute rolling statistics
data_with_rolling = PandasAnalytics.compute_rolling_statistics(processed_data)
print("Rolling statistics computed")


## Feature Engineering


In [None]:
# Create features
feature_data = FeatureEngineering.create_all_features(processed_data)
print(f"Original features: {processed_data.shape[1]}")
print(f"Features after engineering: {feature_data.shape[1]}")


## Visualization


In [None]:
# Plot CO2 over time
plt.figure(figsize=(12, 6))
plt.plot(processed_data['timestamp_index'], processed_data['co2_ppm'])
plt.xlabel('Time')
plt.ylabel('CO2 (ppm)')
plt.title('CO2 Over Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## Anomaly Detection


In [None]:
# Detect anomalies
detector = AnomalyDetector()
anomalies = detector.detect_all(processed_data)
print(f"Detected {len(anomalies)} anomalies")
if len(anomalies) > 0:
    print(anomalies['anomaly_type'].value_counts())
