In [1]:
# main.py
from gel_data_generator import GelColumnRunGenerator
import matplotlib.pyplot as plt
import seaborn as sns

def main():
    # Initialize generator with realistic parameters
    config = {
        'run_duration_hrs': {'min': 6, 'max': 10},  # 6-10 hour runs
        'batches_per_run': {'min': 4, 'max': 6},    # 4-6 batches per run
        'batch_duration_mins': {'min': 60, 'max': 180},  # 1-3 hour batches
        'degradation_per_run': 0.015,  # 1.5% degradation per run
        'pressure_noise': 0.3,
        'run_break_hours': {'min': 2, 'max': 6}  # 2-6 hours between runs
    }
    
    generator = GelColumnRunGenerator(config)
    
    # Generate 14 days of data
    dataset = generator.generate_complete_dataset(total_days=14, resolution='high')
    
    # Access different data levels
    run_schedule = dataset['run_schedule']
    batch_data = dataset['batch_data']
    time_series = dataset['time_series']
    
    # Save to files
    run_schedule.to_csv('data/synthetic/column_runs_schedule.csv', index=False)
    batch_data.to_csv('data/synthetic/batch_level_data.csv', index=False)
    time_series.to_csv('data/synthetic/high_res_time_series.csv', index=False)
    
    # Analyze
    print("\n=== Dataset Summary ===")
    print(f"Time range: {time_series['timestamp'].min()} to {time_series['timestamp'].max()}")
    print(f"Total runs: {run_schedule['run_id'].max()}")
    print(f"Total batches: {batch_data['batch_id'].max()}")
    print(f"Anomaly rate: {time_series['anomaly'].mean():.2%}")
    
    # Plot degradation trend
    plot_degradation_trend(time_series)
    
    return dataset

def plot_degradation_trend(time_series):
    """Visualize gel degradation over time"""
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Plot 1: Gel score vs runs
    run_avg = time_series.groupby('run_id')['gel_score'].mean().reset_index()
    axes[0, 0].plot(run_avg['run_id'], run_avg['gel_score'], 'b-', marker='o')
    axes[0, 0].set_xlabel('Column Run Number')
    axes[0, 0].set_ylabel('Average Gel Score')
    axes[0, 0].set_title('Gel Degradation Across Runs')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Pressure vs gel score
    axes[0, 1].scatter(time_series['gel_score'], time_series['elution_pressure'], 
                       alpha=0.5, s=10, c=time_series['column_runs'], cmap='viridis')
    axes[0, 1].set_xlabel('Gel Score')
    axes[0, 1].set_ylabel('Elution Pressure')
    axes[0, 1].set_title('Pressure vs Gel Degradation')
    plt.colorbar(axes[0, 1].collections[0], ax=axes[0, 1], label='Column Runs')
    
    # Plot 3: Time series of pressure with anomalies
    anomaly_points = time_series[time_series['anomaly'] == 1]
    axes[1, 0].plot(time_series['timestamp'], time_series['elution_pressure'], 
                    'b-', alpha=0.7, label='Pressure')
    axes[1, 0].scatter(anomaly_points['timestamp'], anomaly_points['elution_pressure'],
                       color='red', s=50, zorder=5, label='Anomalies')
    axes[1, 0].set_xlabel('Time')
    axes[1, 0].set_ylabel('Elution Pressure')
    axes[1, 0].set_title('Pressure Time Series with Anomalies')
    axes[1, 0].legend()
    axes[1, 0].tick_params(axis='x', rotation=45)
    
    # Plot 4: Batch-level view
    batch_stats = time_series.groupby(['run_id', 'batch_in_run']).agg({
        'gel_score': 'mean',
        'elution_pressure': 'mean'
    }).reset_index()
    
    for run_id in batch_stats['run_id'].unique()[:5]:  # First 5 runs
        run_data = batch_stats[batch_stats['run_id'] == run_id]
        axes[1, 1].plot(run_data['batch_in_run'], run_data['gel_score'], 
                       marker='o', label=f'Run {run_id}')
    
    axes[1, 1].set_xlabel('Batch in Run')
    axes[1, 1].set_ylabel('Gel Score')
    axes[1, 1].set_title('Gel Degradation Within Runs')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('data/synthetic/degradation_analysis.png', dpi=150, bbox_inches='tight')
    plt.show()

if __name__ == "__main__":
    dataset = main()

ModuleNotFoundError: No module named 'gel_data_generator'