dev-wei · dev-wei · Aug 4, 2025 · Aug 3, 2025 · Aug 4, 2025 · Aug 4, 2025
diff --git a/.env.example b/.env.example
@@ -42,11 +42,6 @@ CAPTURE_PROVIDER=pyaudio
 # Default: 16000
 AUDIO_SAMPLE_RATE=16000
 
-# Number of audio channels
-# Options: 1 (mono), 2 (stereo)
-# Default: 1
-AUDIO_CHANNELS=1
-
 # Audio chunk size for processing
 # Default: 1024
 AUDIO_CHUNK_SIZE=1024
@@ -84,10 +79,12 @@ AWS_LANGUAGE_CODE=en-US
 # Default: 10
 AWS_MAX_SPEAKERS=10
 
-# AWS connection strategy
-# Options: auto, single, dual
-# Default: auto
-AWS_CONNECTION_STRATEGY=auto
+# AWS connection strategy (DEPRECATED - auto-detected based on device channels)
+# Connection strategy is now automatically determined:
+# - 1 channel device → single AWS connection
+# - 2+ channel device → dual AWS connections for optimal transcription
+# This setting is ignored and will be removed in a future version.
+# AWS_CONNECTION_STRATEGY=auto
 
 # Enable dual connection fallback
 # Default: true
@@ -102,22 +99,6 @@ AWS_CHANNEL_BALANCE_THRESHOLD=0.3
 # Default: false
 AWS_DUAL_CONNECTION_TEST_MODE=false
 
-# Save split audio files for debugging
-# Default: false
-AWS_DUAL_SAVE_SPLIT_AUDIO=false
-
-# Save raw stereo audio for debugging
-# Default: false
-AWS_DUAL_SAVE_RAW_AUDIO=false
-
-# Path to save debug audio files
-# Default: debug_audio/
-AWS_DUAL_AUDIO_SAVE_PATH=debug_audio/
-
-# Duration to save debug audio (seconds)
-# Default: 30
-AWS_DUAL_AUDIO_SAVE_DURATION=30
-
 # ======================================================================
 # AZURE SPEECH SERVICE CONFIGURATION
 # ======================================================================
@@ -200,6 +181,29 @@ PARTIAL_RESULT_TIMEOUT=2.0
 # Default: 0.0
 CONFIDENCE_THRESHOLD=0.0
 
+# ======================================================================
+# AUDIO SAVING (Provider-Agnostic)
+# ======================================================================
+
+# Enable raw audio saving to WAV file
+# Works with all transcription providers (AWS, Azure, Whisper, etc.)
+# Default: false
+SAVE_RAW_AUDIO=false
+
+# Enable split audio saving (save left/right channels separately for stereo input)
+# Works with all transcription providers - saves separate L/R channel files alongside main file
+# Only applies when stereo input device is selected
+# Default: false
+SAVE_SPLIT_AUDIO=false
+
+# Directory path to save audio files
+# Default: debug_audio/
+AUDIO_SAVE_PATH=debug_audio/
+
+# Maximum recording duration to save (seconds)
+# Default: 30
+AUDIO_SAVE_DURATION=30
+
 # ======================================================================
 # DATABASE CONFIGURATION (Optional)
 # ======================================================================
@@ -267,13 +271,33 @@ TESTING=false
 # AZURE_SPEECH_LANGUAGE=es-ES
 # AZURE_ENABLE_SPEAKER_DIARIZATION=true
 
-# Example 3: Development setup with file input
+# Example 3: Development setup with file input and audio saving
 # TRANSCRIPTION_PROVIDER=aws
 # CAPTURE_PROVIDER=file
+# SAVE_RAW_AUDIO=true
+# AUDIO_SAVE_PATH=./recordings/
+# AUDIO_SAVE_DURATION=60
 # LOG_LEVEL=DEBUG
 # SKIP_AWS_VALIDATION=true
 # MOCK_SERVICES=true
 
+# Example 4: Azure with stereo audio recording and channel splitting
+# TRANSCRIPTION_PROVIDER=azure
+# AZURE_SPEECH_KEY=your_azure_key_here
+# AZURE_SPEECH_REGION=eastus
+# AUDIO_CHANNELS=2
+# SAVE_RAW_AUDIO=true
+# SAVE_SPLIT_AUDIO=true
+# AUDIO_SAVE_PATH=./meeting_recordings/
+
+# Example 5: AWS with stereo device and split audio saving
+# TRANSCRIPTION_PROVIDER=aws
+# AWS_ACCESS_KEY_ID=your_access_key_here
+# AWS_SECRET_ACCESS_KEY=your_secret_key_here
+# SAVE_RAW_AUDIO=true
+# SAVE_SPLIT_AUDIO=true
+# AUDIO_SAVE_PATH=./debug_audio/
+
 # ======================================================================
 # SECURITY NOTES
 # ======================================================================
@@ -297,6 +321,6 @@ TESTING=false
 # 5. Visit the project documentation: https://github.com/dev-wei/ymemo
 
 # For more detailed configuration options, see:
-# - config/audio_config.py (configuration loading)
+# - src/config/audio_config.py (configuration loading)
 # - src/config/provider_config.py (provider configurations)
 # - README.md (setup instructions)
diff --git a/.github/test-env.sh b/.github/test-env.sh
@@ -26,7 +26,6 @@ export AWS_REGION=us-east-1
 export TRANSCRIPTION_PROVIDER=aws
 export CAPTURE_PROVIDER=pyaudio
 export AUDIO_SAMPLE_RATE=16000
-export AUDIO_CHANNELS=1
 
 # Additional AWS environment variables for comprehensive mocking
 export AWS_SESSION_TOKEN=test-session-token
@@ -74,5 +73,5 @@ echo "   - AWS validation: DISABLED"
 echo "   - Mock services: ENABLED"
 echo "   - Log level: WARNING"
 echo "   - Provider: $TRANSCRIPTION_PROVIDER"
-echo "   - Audio: ${AUDIO_SAMPLE_RATE}Hz, ${AUDIO_CHANNELS} channel(s)"
+echo "   - Audio: ${AUDIO_SAMPLE_RATE}Hz, device-based channels"
 echo ""
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -39,7 +39,6 @@ env:
   TRANSCRIPTION_PROVIDER: "aws"
   CAPTURE_PROVIDER: "pyaudio"
   AUDIO_SAMPLE_RATE: "16000"
-  AUDIO_CHANNELS: "1"
   LOG_LEVEL: "WARNING"  # Reduce CI log noise
 
   # Disable real service connections

diff --git a/.github/workflows/ci.yml.backup b/.github/workflows/ci.yml.backup
@@ -39,7 +39,6 @@ env:
   TRANSCRIPTION_PROVIDER: "aws"
   CAPTURE_PROVIDER: "pyaudio"
   AUDIO_SAMPLE_RATE: "16000"
-  AUDIO_CHANNELS: "1"
   LOG_LEVEL: "WARNING"  # Reduce CI log noise
 
   # Disable real service connections

diff --git a/.github/workflows/ci.yml.bak b/.github/workflows/ci.yml.bak
@@ -39,7 +39,6 @@ env:
   TRANSCRIPTION_PROVIDER: "aws"
   CAPTURE_PROVIDER: "pyaudio"
   AUDIO_SAMPLE_RATE: "16000"
-  AUDIO_CHANNELS: "1"
   LOG_LEVEL: "WARNING"  # Reduce CI log noise
 
   # Disable real service connections

diff --git a/.gitignore b/.gitignore
@@ -266,3 +266,5 @@ config_local.py
 settings_local.py
 
 /debug_audio
+
+/.ruff_cache
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -144,7 +144,7 @@ source .venv/bin/activate && python -c "from src.utils.database import test_data
 
 **Centralized Configuration System:**
 
-- All configuration is managed through `config/audio_config.py`
+- All configuration is managed through `src/config/audio_config.py`
 - Configuration is loaded from environment variables with sensible defaults
 - Automatic validation with helpful error messages
 - Debug logging shows loaded configuration values
@@ -154,17 +154,18 @@ source .venv/bin/activate && python -c "from src.utils.database import test_data
 *Provider Selection:*
 
 - `TRANSCRIPTION_PROVIDER` - Choose transcription provider ('aws', 'azure', 'whisper', 'google', default: 'aws')
-  - `aws` provider now intelligently switches between single and dual connections automatically
+  - `aws` provider automatically detects device channels and chooses optimal connection strategy
 - `CAPTURE_PROVIDER` - Choose audio capture provider ('pyaudio', 'file', default: 'pyaudio')
 
 *Audio Settings:*
 
 - `AUDIO_QUALITY` - Audio quality preset ('high' for 44,100 Hz CD-quality, 'average' for 16,000 Hz speech-optimized, default: not set)
 - `AUDIO_SAMPLE_RATE` - Sample rate in Hz (default: 16000, overridden by AUDIO_QUALITY if set)
-- `AUDIO_CHANNELS` - Number of audio channels (default: 1)
 - `AUDIO_CHUNK_SIZE` - Audio chunk size (default: 1024)
 - `AUDIO_FORMAT` - Audio format ('int16', 'int24', 'int32', 'float32', default: 'int16')
 
+**Note:** Number of audio channels is automatically detected based on the selected input device capabilities.
+
 *AWS Configuration:*
 
 - `AWS_REGION` - AWS region (default: 'us-east-1')
@@ -174,7 +175,9 @@ source .venv/bin/activate && python -c "from src.utils.database import test_data
 
 *AWS Connection Strategy:*
 
-- `AWS_CONNECTION_STRATEGY` - Connection mode ('auto', 'single', 'dual', default: 'auto')
+- Connection strategy is now **automatically determined** based on device channels:
+  - 1-channel device → Single AWS Transcribe connection
+  - 2+ channel device → Dual AWS Transcribe connections for optimal transcription
 - `AWS_DUAL_FALLBACK_ENABLED` - Enable fallback to dual connections (true/false, default: true)  
 - `AWS_CHANNEL_BALANCE_THRESHOLD` - Channel imbalance threshold for fallback (0.0-1.0, default: 0.3)
 
@@ -192,7 +195,7 @@ source .venv/bin/activate && python -c "from src.utils.database import test_data
 **Configuration Debugging:**
 
 ```python
-from config.audio_config import print_config_summary
+from src.config.audio_config import print_config_summary
 print_config_summary()  # Shows current configuration
 ```
 

diff --git a/README.md b/README.md
@@ -219,14 +219,16 @@ YMemo's provider system features enterprise-grade architecture:
 <summary><b>🚀 AWS Transcribe Configuration</b></summary>
 
 ```bash
-# Advanced AWS settings
-export AWS_CONNECTION_STRATEGY=dual           # Single or dual connections
+# Advanced AWS settings (connection strategy now auto-detected)
+# export AWS_CONNECTION_STRATEGY=dual         # DEPRECATED - auto-detected based on device
 export AWS_DUAL_FALLBACK_ENABLED=true        # Automatic fallback
 export AWS_MAX_SPEAKERS=10                    # Speaker diarization limit
 export ENABLE_PARTIAL_RESULTS=true           # Real-time partial results
 ```
 
-**Dual-Channel Mode**: YMemo's unique dual-channel architecture splits stereo audio for enhanced accuracy and speaker separation.
+**Auto-Detected Connection Strategy**: YMemo automatically chooses the optimal AWS connection strategy based on your audio device:
+- **1-channel devices** → Single AWS Transcribe connection
+- **2+ channel devices** → Dual AWS Transcribe connections for enhanced accuracy and speaker separation
 
 </details>
 

diff --git a/config/__init__.py b/config/__init__.py
diff --git a/src/audio/audio_file_writer.py b/src/audio/audio_file_writer.py
@@ -286,6 +286,15 @@ def __init__(
         # Create directory if needed
         self.save_path.mkdir(parents=True, exist_ok=True)
 
+        # Log important warning about dual channel audio saving
+        logger.warning("🎵 DualChannelAudioSaver: CREATING LEFT/RIGHT CHANNEL FILES")
+        logger.warning(
+            "   ⚠️ This component should ONLY be used when processing stereo input that needs to be split!"
+        )
+        logger.warning(
+            "   💡 If you're seeing this for mono input, there's a configuration error upstream"
+        )
+
         # Generate timestamp-based filenames
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         left_file = self.save_path / f"left_channel_{timestamp}.wav"
@@ -304,19 +313,31 @@ def __init__(
         logger.info("🎵 DualChannelAudioSaver: Initialized")
         logger.info(f"   📁 Left: {left_file}")
         logger.info(f"   📁 Right: {right_file}")
+        logger.info(
+            "   🎯 Expected usage: Stereo audio input that has been split into separate left/right channels"
+        )
 
     def start_recording(self) -> bool:
         """Start recording both channels."""
         if self.is_active:
             return True
 
+        logger.info("🎵 DualChannelAudioSaver: Starting dual channel recording")
+        logger.info(
+            "   💡 This will create LEFT and RIGHT channel WAV files - ensure input is stereo!"
+        )
+
         left_started = self.left_writer.start_recording()
         right_started = self.right_writer.start_recording()
 
         if left_started and right_started:
             self.is_active = True
             logger.info("🎵 DualChannelAudioSaver: Both channels recording started")
+            logger.info(
+                "   📁 Recording to 2 separate files (left_channel_*.wav and right_channel_*.wav)"
+            )
             return True
+
         logger.error("❌ DualChannelAudioSaver: Failed to start recording")
         # Clean up any successful starts
         if left_started:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -266,3 +266,5 @@ config_local.py
		settings_local.py

		/debug_audio

		/.ruff_cache