In [None]:
"""
Enhanced Enterprise Technical Analysis Framework
Combines fluent interface with TALib integration for comprehensive financial analysis
"""

import numpy as np
import pandas as pd
import talib
from typing import Union, Optional, Dict, List, Callable, Any, Tuple
from abc import ABC, abstractmethod
import logging
from dataclasses import dataclass, field
from enum import Enum
import re
from functools import lru_cache, wraps
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed
import multiprocessing as mp
from contextlib import contextmanager

In [None]:

# Build full path to CSV
csv_path = "../src/xauusdm15.filtered.csv"

# Load CSV
df = pd.read_csv(csv_path)
df.head()


In [None]:
"""
Enhanced Enterprise Technical Analysis Framework
Combines fluent interface with TALib integration for comprehensive financial analysis
"""

import numpy as np
import pandas as pd
import talib
from typing import Union, Optional, Dict, List, Callable, Any, Tuple
from abc import ABC, abstractmethod
import logging
from dataclasses import dataclass, field
from enum import Enum
import re
from functools import lru_cache, wraps
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed
import multiprocessing as mp
from contextlib import contextmanager

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore', category=FutureWarning)

# %%
class ComparisonType(Enum):
    """Enumeration of supported comparison operations"""
    ABOVE = "above"
    BELOW = "below"
    CROSSED_UP = "crossed_up"
    CROSSED_DOWN = "crossed_dn"
    EQUALS = "equals"
    GREATER_EQUAL = "greater_equal"
    LESS_EQUAL = "less_equal"

class IndicatorType(Enum):
    """Enumeration of TALib indicator categories"""
    OVERLAP = "overlap"
    MOMENTUM = "momentum"
    VOLUME = "volume"
    VOLATILITY = "volatility"
    PRICE = "price"
    CYCLE = "cycle"
    PATTERN = "pattern"

# %%
@dataclass
class AnalysisResult:
    """Enhanced data class to encapsulate analysis results"""
    column_name: str
    operation: str
    success: bool
    message: str = ""
    data: Optional[pd.Series] = None
    execution_time: float = 0.0
    memory_usage: int = 0

@dataclass
class IndicatorConfig:
    """Configuration for technical indicators"""
    name: str
    period: Optional[int] = None
    fast_period: Optional[int] = None
    slow_period: Optional[int] = None
    signal_period: Optional[int] = None
    parameters: Dict[str, Any] = field(default_factory=dict)
    source_column: str = "Close"

class TAException(Exception):
    """Enhanced exception for Technical Analysis operations"""
    def __init__(self, message: str, error_code: str = "GENERAL"):
        self.error_code = error_code
        super().__init__(message)

# %%
class PerformanceProfiler:
    """Performance monitoring and optimization utilities"""

    @staticmethod
    def profile_execution(func: Callable) -> Callable:
        """Decorator to profile function execution time and memory"""
        @wraps(func)
        def wrapper(*args, **kwargs):
            import time
            import tracemalloc

            tracemalloc.start()
            start_time = time.perf_counter()

            try:
                result = func(*args, **kwargs)
                return result
            finally:
                execution_time = time.perf_counter() - start_time
                current, peak = tracemalloc.get_traced_memory()
                tracemalloc.stop()

                logger.debug(f"{func.__name__} executed in {execution_time:.4f}s, "
                           f"memory: {current / 1024 / 1024:.2f}MB")

        return wrapper

    @staticmethod
    @contextmanager
    def memory_efficient_processing():
        """Context manager for memory-efficient processing"""
        import gc
        gc.collect()
        try:
            yield
        finally:
            gc.collect()

# %%
class DataValidator:
    """Enhanced data validation with performance optimization"""

    @staticmethod
    @lru_cache(maxsize=128)
    def validate_column_exists(df_shape: Tuple[int, int], columns_tuple: Tuple[str, ...],
                              column: str) -> bool:
        """Cached column existence validation"""
        if column not in columns_tuple:
            raise TAException(f"Column '{column}' not found. Available: {list(columns_tuple)}",
                            "COLUMN_NOT_FOUND")
        return True

    @staticmethod
    def validate_numeric_column(df: pd.DataFrame, column: str) -> bool:
        """Enhanced numeric column validation with performance optimization"""
        DataValidator.validate_column_exists(df.shape, tuple(df.columns), column)

        if not pd.api.types.is_numeric_dtype(df[column]):
            raise TAException(f"Column '{column}' must be numeric, got {df[column].dtype}",
                            "INVALID_DTYPE")

        # Check for excessive NaN values
        nan_ratio = df[column].isna().sum() / len(df)
        if nan_ratio > 0.5:
            logger.warning(f"Column '{column}' has {nan_ratio:.1%} NaN values")

        return True

    @staticmethod
    def validate_ohlcv_data(df: pd.DataFrame) -> Dict[str, bool]:
        """Validate OHLCV data structure"""
        required_cols = ['Open', 'High', 'Low', 'Close']
        optional_cols = ['Volume']

        validation_results = {}

        for col in required_cols:
            try:
                DataValidator.validate_numeric_column(df, col)
                validation_results[col] = True
            except TAException:
                validation_results[col] = False
                logger.warning(f"Required OHLCV column '{col}' is invalid or missing")

        for col in optional_cols:
            if col in df.columns:
                try:
                    DataValidator.validate_numeric_column(df, col)
                    validation_results[col] = True
                except TAException:
                    validation_results[col] = False

        return validation_results

# %%
class TALibIndicatorEngine:
    """High-performance TALib indicator calculation engine"""

    def __init__(self):
        self._indicator_cache = {}
        self._available_indicators = self._get_available_indicators()

    @staticmethod
    @lru_cache(maxsize=1)
    def _get_available_indicators() -> Dict[str, Dict[str, Any]]:
        """Cache available TALib indicators with their metadata"""
        indicators = {}

        for func_name in dir(talib):
            if func_name.isupper() and hasattr(talib, func_name):
                func = getattr(talib, func_name)
                if callable(func):
                    try:
                        info = talib.abstract.Function(func_name).info
                        indicators[func_name] = {
                            'function': func,
                            'info': info,
                            'inputs': info.get('input_names', []),
                            'parameters': info.get('parameters', {}),
                            'outputs': info.get('output_names', [])
                        }
                    except:
                        # Fallback for indicators without abstract info
                        indicators[func_name] = {
                            'function': func,
                            'info': {},
                            'inputs': ['close'],
                            'parameters': {},
                            'outputs': [func_name.lower()]
                        }

        return indicators

    def is_indicator_available(self, indicator: str) -> bool:
        """Check if indicator is available in TALib"""
        return indicator.upper() in self._available_indicators

    @PerformanceProfiler.profile_execution
    def calculate_indicator(self, df: pd.DataFrame, config: IndicatorConfig) -> pd.Series:
        """Calculate technical indicator with performance optimization and TALib compatibility"""
        indicator_name = config.name.upper()

        if not self.is_indicator_available(indicator_name):
            raise TAException(f"Indicator '{indicator_name}' not available in TALib",
                            "INDICATOR_NOT_FOUND")

        # Create cache key
        cache_key = self._create_cache_key(df, config)

        if cache_key in self._indicator_cache:
            logger.debug(f"Using cached result for {indicator_name}")
            return self._indicator_cache[cache_key]

        try:
            func_info = self._available_indicators[indicator_name]
            func = func_info['function']

            # Prepare parameters
            kwargs = self._prepare_parameters(config, func_info)

            # Get input data with proper data types
            input_data = self._prepare_input_data(df, config, func_info)

            # Special handling for indicators that need multiple inputs
            if indicator_name == 'ADX':
                # ADX needs High, Low, Close
                if len(input_data) < 3:
                    high_data = df['High'].astype(np.float64).values if 'High' in df.columns else df[config.source_column].astype(np.float64).values
                    low_data = df['Low'].astype(np.float64).values if 'Low' in df.columns else df[config.source_column].astype(np.float64).values
                    close_data = df['Close'].astype(np.float64).values if 'Close' in df.columns else df[config.source_column].astype(np.float64).values
                    result = func(high_data, low_data, close_data, **kwargs)
                else:
                    result = func(input_data[0], input_data[1], input_data[2], **kwargs)
            elif indicator_name == 'MACD':
                # MACD typically returns 3 values: macd, signal, histogram
                result = func(input_data[0], **kwargs)
            elif indicator_name == 'BBANDS':
                # BBANDS returns upper, middle, lower
                result = func(input_data[0], **kwargs)
            else:
                # Calculate indicator with proper input handling
                with PerformanceProfiler.memory_efficient_processing():
                    if len(input_data) == 1:
                        result = func(input_data[0], **kwargs)
                    elif len(input_data) == 2:
                        result = func(input_data[0], input_data[1], **kwargs)
                    elif len(input_data) == 3:
                        result = func(input_data[0], input_data[1], input_data[2], **kwargs)
                    else:
                        result = func(*input_data, **kwargs)

            # Handle multiple output indicators
            if isinstance(result, tuple):
                if indicator_name == 'MACD' and len(result) >= 2:
                    result = result[0]  # Take MACD line
                elif indicator_name == 'BBANDS':
                    result = result[1]  # Take middle band (SMA)
                else:
                    result = result[0]  # Take first output by default

            result_series = pd.Series(result, index=df.index)

            # Cache result
            self._indicator_cache[cache_key] = result_series

            return result_series

        except Exception as e:
            logger.debug(f"TALib calculation error details: {str(e)}")
            raise TAException(f"Failed to calculate {indicator_name}: {str(e)}",
                            "CALCULATION_ERROR")

    def _create_cache_key(self, df: pd.DataFrame, config: IndicatorConfig) -> str:
        """Create cache key for indicator calculation"""
        data_hash = hash(tuple(df[config.source_column].fillna(0)))
        config_hash = hash((config.name, config.period, config.fast_period,
                          config.slow_period, config.signal_period))
        return f"{data_hash}_{config_hash}"

    def _prepare_parameters(self, config: IndicatorConfig,
                          func_info: Dict[str, Any]) -> Dict[str, Any]:
        """Prepare parameters for TALib function"""
        kwargs = {}

        # Map standard parameters
        if config.period is not None:
            kwargs['timeperiod'] = config.period
        if config.fast_period is not None:
            kwargs['fastperiod'] = config.fast_period
        if config.slow_period is not None:
            kwargs['slowperiod'] = config.slow_period
        if config.signal_period is not None:
            kwargs['signalperiod'] = config.signal_period

        # Add custom parameters
        kwargs.update(config.parameters)

        return kwargs

    def _prepare_input_data(self, df: pd.DataFrame, config: IndicatorConfig,
                          func_info: Dict[str, Any]) -> List[np.ndarray]:
        """Prepare input data based on indicator requirements - ensure float64 for TALib"""
        inputs = func_info.get('inputs', ['close'])
        input_data = []

        for inp in inputs:
            if inp.lower() in ['close', 'real']:
                # Ensure float64 for TALib compatibility
                data = df[config.source_column].astype(np.float64).values
                input_data.append(data)
            elif inp.lower() == 'high':
                if 'High' in df.columns:
                    data = df['High'].astype(np.float64).values
                else:
                    data = df[config.source_column].astype(np.float64).values
                input_data.append(data)
            elif inp.lower() == 'low':
                if 'Low' in df.columns:
                    data = df['Low'].astype(np.float64).values
                else:
                    data = df[config.source_column].astype(np.float64).values
                input_data.append(data)
            elif inp.lower() == 'open':
                if 'Open' in df.columns:
                    data = df['Open'].astype(np.float64).values
                else:
                    data = df[config.source_column].astype(np.float64).values
                input_data.append(data)
            elif inp.lower() == 'volume':
                if 'Volume' in df.columns:
                    data = df['Volume'].astype(np.float64).values
                else:
                    data = np.ones(len(df), dtype=np.float64)
                input_data.append(data)
            else:
                data = df[config.source_column].astype(np.float64).values
                input_data.append(data)

        return input_data

    def clear_cache(self):
        """Clear indicator calculation cache"""
        self._indicator_cache.clear()
        logger.info("Indicator cache cleared")

# %%
class QueryParser:
    """Enhanced query parser with better natural language understanding"""

    COMPARISON_PATTERNS = {
        r'\babove\b': ComparisonType.ABOVE.value,
        r'\bbelow\b': ComparisonType.BELOW.value,
        r'\bcrossed[\s_]?up\b': ComparisonType.CROSSED_UP.value,
        r'\bcrossed[\s_]?down\b': ComparisonType.CROSSED_DOWN.value,
        r'\bequals?\b': ComparisonType.EQUALS.value,
        r'\bgreater[\s_]?than[\s_]?or[\s_]?equal\b': ComparisonType.GREATER_EQUAL.value,
        r'\bless[\s_]?than[\s_]?or[\s_]?equal\b': ComparisonType.LESS_EQUAL.value,
    }

    @classmethod
    def parse_query(cls, query: str) -> List[Dict[str, Any]]:
        """Enhanced query parsing with better pattern matching"""
        operations = []

        for line in query.strip().splitlines():
            line = line.strip()
            if not line or line.startswith('#'):
                continue

            operation = cls._parse_line(line)
            if operation:
                operations.append(operation)

        return operations

    @classmethod
    def _parse_line(cls, line: str) -> Optional[Dict[str, Any]]:
        """Parse a single query line"""
        line_lower = line.lower()

        # Find comparison operation
        comparison = None
        for pattern, comp_type in cls.COMPARISON_PATTERNS.items():
            if re.search(pattern, line_lower):
                comparison = comp_type
                break

        if not comparison:
            logger.warning(f"No valid comparison found in: {line}")
            return None

        # Split by comparison operation
        parts = re.split(r'\b(?:above|below|crossed[\s_]?(?:up|down)|equals?|greater[\s_]?than[\s_]?or[\s_]?equal|less[\s_]?than[\s_]?or[\s_]?equal)\b',
                        line, flags=re.IGNORECASE)

        if len(parts) < 2:
            logger.warning(f"Malformed query line: {line}")
            return None

        column1 = parts[0].strip()
        column2 = parts[1].strip()

        # Try to convert column2 to numeric
        try:
            column2 = float(column2)
        except ValueError:
            pass  # Keep as string

        return {
            'column1': column1,
            'operation': comparison,
            'column2': column2,
            'original_line': line
        }

    @staticmethod
    def extract_indicators(query: str) -> List[IndicatorConfig]:
        """Extract indicator configurations from query"""
        indicators = []
        words = re.findall(r'\b[A-Z_]+_\d+\b|\b[A-Z_]+\b', query.upper())

        for word in words:
            if word in ['ABOVE', 'BELOW', 'CROSSED', 'UP', 'DOWN', 'EQUALS']:
                continue

            if '_' in word:
                parts = word.split('_')
                if len(parts) >= 2 and parts[1].isdigit():
                    indicators.append(IndicatorConfig(
                        name=parts[0],
                        period=int(parts[1])
                    ))
            else:
                # Check if it's a known indicator
                engine = TALibIndicatorEngine()
                if engine.is_indicator_available(word):
                    indicators.append(IndicatorConfig(name=word))

        return list({(ind.name, ind.period): ind for ind in indicators}.values())

# %%
class BaseComparator(ABC):
    """Enhanced abstract base class for comparison operations"""

    @abstractmethod
    def compare(self, df: pd.DataFrame, x: str, y: Union[str, float],
                new_col: Optional[str] = None) -> pd.DataFrame:
        """Perform the comparison operation"""
        pass

    def _generate_column_name(self, x: str, y: Union[str, float], operation: str) -> str:
        """Generate descriptive column name"""
        y_str = str(y).replace('.', '_').replace('-', 'neg')
        return f"{x}_{operation}_{y_str}"

    def _add_constant_column(self, df: pd.DataFrame, name: str, value: float) -> pd.DataFrame:
        """Add a constant value column efficiently"""
        if name not in df.columns:
            df[name] = np.full(len(df), value, dtype=np.float64)
        return df

# %%
class AboveComparator(BaseComparator):
    """Optimized above comparison with vectorized operations"""

    @PerformanceProfiler.profile_execution
    def compare(self, df: pd.DataFrame, x: str, y: Union[str, float],
                new_col: Optional[str] = None) -> pd.DataFrame:
        DataValidator.validate_numeric_column(df, x)

        if isinstance(y, (int, float)):
            comparison_array = df[x].values > y
        else:
            DataValidator.validate_numeric_column(df, y)
            comparison_array = df[x].values > df[y].values

        new_col = new_col or self._generate_column_name(x, y, "above")
        df[new_col] = comparison_array.astype(np.int8)  # Use int8 for memory efficiency
        return df

# %%
class BelowComparator(BaseComparator):
    """Optimized below comparison"""

    @PerformanceProfiler.profile_execution
    def compare(self, df: pd.DataFrame, x: str, y: Union[str, float],
                new_col: Optional[str] = None) -> pd.DataFrame:
        DataValidator.validate_numeric_column(df, x)

        if isinstance(y, (int, float)):
            comparison_array = df[x].values < y
        else:
            DataValidator.validate_numeric_column(df, y)
            comparison_array = df[x].values < df[y].values

        new_col = new_col or self._generate_column_name(x, y, "below")
        df[new_col] = comparison_array.astype(np.int8)
        return df

# %%
class CrossedUpComparator(BaseComparator):
    """Optimized crossed up detection with vectorized operations"""

    @PerformanceProfiler.profile_execution
    def compare(self, df: pd.DataFrame, x: str, y: Union[str, float],
                new_col: Optional[str] = None) -> pd.DataFrame:
        DataValidator.validate_numeric_column(df, x)

        x_values = df[x].values

        if isinstance(y, (int, float)):
            diff = x_values - y
            diff_prev = np.roll(diff, 1)
        else:
            DataValidator.validate_numeric_column(df, y)
            y_values = df[y].values
            diff = x_values - y_values
            diff_prev = np.roll(diff, 1)

        # Vectorized cross-up detection
        crossed_up = (diff > 0) & (diff_prev <= 0)
        crossed_up[0] = False  # First element cannot be a cross

        new_col = new_col or self._generate_column_name(x, y, "crossed_up")
        df[new_col] = crossed_up.astype(np.int8)
        return df

# %%
class CrossedDownComparator(BaseComparator):
    """Optimized crossed down detection"""

    @PerformanceProfiler.profile_execution
    def compare(self, df: pd.DataFrame, x: str, y: Union[str, float],
                new_col: Optional[str] = None) -> pd.DataFrame:
        DataValidator.validate_numeric_column(df, x)

        x_values = df[x].values

        if isinstance(y, (int, float)):
            diff = x_values - y
            diff_prev = np.roll(diff, 1)
        else:
            DataValidator.validate_numeric_column(df, y)
            y_values = df[y].values
            diff = x_values - y_values
            diff_prev = np.roll(diff, 1)

        crossed_down = (diff < 0) & (diff_prev >= 0)
        crossed_down[0] = False

        new_col = new_col or self._generate_column_name(x, y, "crossed_down")
        df[new_col] = crossed_down.astype(np.int8)
        return df

# %%
class ComparatorFactory:
    """Enhanced factory with additional comparison types"""

    _comparators: Dict[str, BaseComparator] = {
        ComparisonType.ABOVE.value: AboveComparator(),
        ComparisonType.BELOW.value: BelowComparator(),
        ComparisonType.CROSSED_UP.value: CrossedUpComparator(),
        ComparisonType.CROSSED_DOWN.value: CrossedDownComparator(),
    }

    @classmethod
    def get_comparator(cls, operation: str) -> BaseComparator:
        """Get comparator instance for the given operation"""
        comparator = cls._comparators.get(operation.lower())
        if not comparator:
            available = list(cls._comparators.keys())
            raise TAException(f"Unsupported operation '{operation}'. Available: {available}",
                            "UNSUPPORTED_OPERATION")
        return comparator

# %%
class EnhancedTechnicalAnalyzer:
    """Main enhanced technical analyzer with TALib integration and performance optimization"""

    def __init__(self, df: pd.DataFrame, validate_ohlcv: bool = True):
        """Initialize with enhanced validation and optimization"""
        if not isinstance(df, pd.DataFrame):
            raise TAException("Input must be a pandas DataFrame", "INVALID_INPUT")

        if df.empty:
            raise TAException("DataFrame cannot be empty", "EMPTY_DATAFRAME")

        self._original_df = df.copy()
        self._df = df.copy()
        self._operations_log: List[AnalysisResult] = []
        self._indicator_engine = TALibIndicatorEngine()

        # Validate OHLCV data if requested
        if validate_ohlcv:
            self._ohlcv_validation = DataValidator.validate_ohlcv_data(df)

        # Optimize data types
        self._optimize_datatypes()

        logger.info(f"Initialized analyzer with DataFrame shape: {self._df.shape}")
        logger.info(f"Memory usage: {self._df.memory_usage(deep=True).sum() / 1024 / 1024:.2f}MB")

    def _optimize_datatypes(self):
        """Optimize DataFrame data types for better performance - keep OHLCV as float64 for TALib"""
        # Keep OHLCV columns as float64 for TALib compatibility
        ohlcv_cols = ['Open', 'High', 'Low', 'Close', 'Volume']

        for col in self._df.select_dtypes(include=['float64']).columns:
            # Skip OHLCV columns - TALib needs float64
            if col in ohlcv_cols:
                continue

            if self._df[col].dtype == 'float64':
                # Check if values can fit in float32
                if (self._df[col].min() >= np.finfo(np.float32).min and
                    self._df[col].max() <= np.finfo(np.float32).max):
                    self._df[col] = self._df[col].astype(np.float32)

    @property
    def df(self) -> pd.DataFrame:
        """Get the current DataFrame"""
        return self._df

    @property
    def operations_log(self) -> List[AnalysisResult]:
        """Get log of all operations performed"""
        return self._operations_log

    def add_indicator(self, config: IndicatorConfig, column_name: Optional[str] = None) -> 'EnhancedTechnicalAnalyzer':
        """Add technical indicator to DataFrame"""
        try:
            import time
            start_time = time.perf_counter()

            result = self._indicator_engine.calculate_indicator(self._df, config)

            # Generate column name
            if column_name is None:
                if config.period:
                    column_name = f"{config.name}_{config.period}"
                else:
                    column_name = config.name

            self._df[column_name] = result

            execution_time = time.perf_counter() - start_time

            # Log operation
            analysis_result = AnalysisResult(
                column_name=column_name,
                operation=f"ADD_INDICATOR_{config.name}",
                success=True,
                message="Indicator added successfully",
                data=result,
                execution_time=execution_time
            )
            self._operations_log.append(analysis_result)

            logger.info(f"✓ Added indicator {column_name} in {execution_time:.4f}s")

        except Exception as e:
            analysis_result = AnalysisResult(
                column_name=column_name or config.name,
                operation=f"ADD_INDICATOR_{config.name}",
                success=False,
                message=str(e),
                execution_time=0.0
            )
            self._operations_log.append(analysis_result)
            logger.error(f"✗ Failed to add indicator {config.name}: {e}")
            raise TAException(f"Failed to add indicator: {e}")

        return self

    def above(self, x: str, y: Union[str, float], new_col: Optional[str] = None) -> 'EnhancedTechnicalAnalyzer':
        """Fluent interface for above comparison"""
        return self._execute_comparison(ComparisonType.ABOVE.value, x, y, new_col)

    def below(self, x: str, y: Union[str, float], new_col: Optional[str] = None) -> 'EnhancedTechnicalAnalyzer':
        """Fluent interface for below comparison"""
        return self._execute_comparison(ComparisonType.BELOW.value, x, y, new_col)

    def crossed_up(self, x: str, y: Union[str, float], new_col: Optional[str] = None) -> 'EnhancedTechnicalAnalyzer':
        """Fluent interface for crossed up detection"""
        return self._execute_comparison(ComparisonType.CROSSED_UP.value, x, y, new_col)

    def crossed_down(self, x: str, y: Union[str, float], new_col: Optional[str] = None) -> 'EnhancedTechnicalAnalyzer':
        """Fluent interface for crossed down detection"""
        return self._execute_comparison(ComparisonType.CROSSED_DOWN.value, x, y, new_col)

    @PerformanceProfiler.profile_execution
    def _execute_comparison(self, operation: str, x: str, y: Union[str, float],
                          new_col: Optional[str] = None) -> 'EnhancedTechnicalAnalyzer':
        """Execute a comparison operation with enhanced error handling"""
        import time
        start_time = time.perf_counter()

        try:
            comparator = ComparatorFactory.get_comparator(operation)
            self._df = comparator.compare(self._df, x, y, new_col)

            execution_time = time.perf_counter() - start_time
            result_col = new_col or comparator._generate_column_name(x, y, operation)

            result = AnalysisResult(
                column_name=result_col,
                operation=f"{x} {operation} {y}",
                success=True,
                message="Comparison completed successfully",
                data=self._df[result_col],
                execution_time=execution_time
            )
            self._operations_log.append(result)
            logger.info(f"✓ {result.operation} -> {result.column_name} ({execution_time:.4f}s)")

        except Exception as e:
            result = AnalysisResult(
                column_name="",
                operation=f"{x} {operation} {y}",
                success=False,
                message=str(e),
                execution_time=0.0
            )
            self._operations_log.append(result)
            logger.error(f"✗ {result.operation}: {result.message}")
            raise TAException(f"Comparison operation failed: {e}")

        return self

    def execute_query(self, query: str, auto_add_indicators: bool = True) -> 'EnhancedTechnicalAnalyzer':
        """Execute natural language query with automatic indicator addition"""

        # Extract and add indicators if requested
        if auto_add_indicators:
            indicators = QueryParser.extract_indicators(query)
            for indicator_config in indicators:
                try:
                    self.add_indicator(indicator_config)
                except Exception as e:
                    logger.warning(f"Could not add indicator {indicator_config.name}: {e}")

        # Parse and execute comparisons
        operations = QueryParser.parse_query(query)

        for op in operations:
            try:
                self._execute_comparison(op['operation'], op['column1'], op['column2'])
            except Exception as e:
                logger.error(f"Failed to execute query operation {op}: {e}")
                continue

        return self

    def get_signals(self, column: str) -> pd.Series:
        """Get signal series for a specific column"""
        DataValidator.validate_column_exists(self._df.shape, tuple(self._df.columns), column)
        return self._df[column]

    def get_active_signals(self, column: str, include_index: bool = True) -> pd.DataFrame:
        """Get only rows where signal is active with performance optimization"""
        DataValidator.validate_column_exists(self._df.shape, tuple(self._df.columns), column)

        active_mask = self._df[column] == 1

        if include_index:
            return self._df[active_mask]
        else:
            return self._df[active_mask].reset_index(drop=True)

    def summary(self) -> pd.DataFrame:
        """Enhanced summary with performance metrics"""
        summary_data = []
        for result in self._operations_log:
            summary_data.append({
                'Operation': result.operation,
                'Column': result.column_name,
                'Success': result.success,
                'Execution_Time_ms': round(result.execution_time * 1000, 2),
                'Active_Signals': result.data.sum() if result.data is not None else 0,
                'Signal_Ratio': (result.data.sum() / len(result.data) * 100) if result.data is not None else 0,
                'Message': result.message
            })
        return pd.DataFrame(summary_data)

    def performance_report(self) -> Dict[str, Any]:
        """Generate comprehensive performance report"""
        total_operations = len(self._operations_log)
        successful_operations = sum(1 for op in self._operations_log if op.success)
        total_execution_time = sum(op.execution_time for op in self._operations_log)

        return {
            'total_operations': total_operations,
            'successful_operations': successful_operations,
            'success_rate': (successful_operations / total_operations * 100) if total_operations > 0 else 0,
            'total_execution_time': total_execution_time,
            'average_execution_time': total_execution_time / total_operations if total_operations > 0 else 0,
            'memory_usage_mb': self._df.memory_usage(deep=True).sum() / 1024 / 1024,
            'dataframe_shape': self._df.shape,
            'generated_columns': len([col for col in self._df.columns if col not in self._original_df.columns])
        }

    def reset(self) -> 'EnhancedTechnicalAnalyzer':
        """Reset to original DataFrame state with cache clearing"""
        self._df = self._original_df.copy()
        self._operations_log.clear()
        self._indicator_engine.clear_cache()
        logger.info("Reset analyzer to original state")
        return self

    def optimize_memory(self) -> 'EnhancedTechnicalAnalyzer':
        """Optimize memory usage by downcasting data types"""
        original_memory = self._df.memory_usage(deep=True).sum()

        # Optimize numeric columns
        for col in self._df.select_dtypes(include=['int64']).columns:
            if self._df[col].min() >= 0 and self._df[col].max() <= 255:
                self._df[col] = self._df[col].astype('uint8')
            elif self._df[col].min() >= -128 and self._df[col].max() <= 127:
                self._df[col] = self._df[col].astype('int8')
            elif self._df[col].min() >= -32768 and self._df[col].max() <= 32767:
                self._df[col] = self._df[col].astype('int16')
            elif self._df[col].min() >= -2147483648 and self._df[col].max() <= 2147483647:
                self._df[col] = self._df[col].astype('int32')

        new_memory = self._df.memory_usage(deep=True).sum()
        memory_saved = (original_memory - new_memory) / 1024 / 1024

        logger.info(f"Memory optimization saved {memory_saved:.2f}MB")
        return self

    def export_signals(self, filename: str, format: str = 'csv') -> bool:
        """Export signals to file with multiple format support"""
        try:
            signal_columns = [col for col in self._df.columns
                            if any(op in col.lower() for op in ['above', 'below', 'crossed'])]

            export_df = self._df[signal_columns + ['Close']].copy()

            if format.lower() == 'csv':
                export_df.to_csv(filename, index=True)
            elif format.lower() == 'parquet':
                export_df.to_parquet(filename, index=True)
            elif format.lower() == 'excel':
                export_df.to_excel(filename, index=True)
            else:
                raise TAException(f"Unsupported export format: {format}")

            logger.info(f"Signals exported to {filename}")
            return True

        except Exception as e:
            logger.error(f"Export failed: {e}")
            return False

    def backtest_signals(self, signal_column: str, entry_price_column: str = 'Close',
                        holding_period: int = 1) -> Dict[str, float]:
        """Simple backtesting for signal performance"""
        try:
            DataValidator.validate_column_exists(self._df.shape, tuple(self._df.columns), signal_column)
            DataValidator.validate_column_exists(self._df.shape, tuple(self._df.columns), entry_price_column)

            signals = self._df[signal_column]
            prices = self._df[entry_price_column]

            # Find signal entry points
            entry_points = np.where(signals == 1)[0]

            if len(entry_points) == 0:
                return {'total_signals': 0, 'avg_return': 0, 'win_rate': 0}

            returns = []

            for entry_idx in entry_points:
                exit_idx = min(entry_idx + holding_period, len(prices) - 1)

                if exit_idx > entry_idx:
                    entry_price = prices.iloc[entry_idx]
                    exit_price = prices.iloc[exit_idx]

                    if entry_price != 0:  # Avoid division by zero
                        ret = (exit_price - entry_price) / entry_price
                        returns.append(ret)

            if not returns:
                return {'total_signals': len(entry_points), 'avg_return': 0, 'win_rate': 0}

            returns = np.array(returns)

            return {
                'total_signals': len(returns),
                'avg_return': returns.mean(),
                'win_rate': (returns > 0).sum() / len(returns),
                'best_return': returns.max(),
                'worst_return': returns.min(),
                'total_return': returns.sum()
            }

        except Exception as e:
            logger.error(f"Backtesting failed: {e}")
            return {}

# %%
def create_analyzer(df: pd.DataFrame, validate_ohlcv: bool = True) -> EnhancedTechnicalAnalyzer:
    """Factory function to create EnhancedTechnicalAnalyzer instance"""
    return EnhancedTechnicalAnalyzer(df, validate_ohlcv)

# Alias for backward compatibility
def cabr(df: pd.DataFrame) -> EnhancedTechnicalAnalyzer:
    """Legacy factory function for compatibility"""
    return EnhancedTechnicalAnalyzer(df)

# %%
class TradingSignalGenerator:
    """Advanced signal generation with multiple timeframes and strategies"""

    def __init__(self, analyzer: EnhancedTechnicalAnalyzer):
        self.analyzer = analyzer

    def generate_trend_following_signals(self) -> 'EnhancedTechnicalAnalyzer':
        """Generate comprehensive trend-following signals with better error handling"""

        # Add required indicators with proper error handling
        indicators = [
            IndicatorConfig(name='EMA', period=21),
            IndicatorConfig(name='EMA', period=50),
            IndicatorConfig(name='RSI', period=14),
            IndicatorConfig(name='MACD', fast_period=12, slow_period=26, signal_period=9),
        ]

        # Try to add ADX only if OHLC data is available
        if all(col in self.analyzer.df.columns for col in ['High', 'Low', 'Close']):
            indicators.append(IndicatorConfig(name='ADX', period=14))

        for indicator in indicators:
            try:
                if indicator.period:
                    column_name = f"{indicator.name}_{indicator.period}"
                else:
                    column_name = indicator.name
                self.analyzer.add_indicator(indicator, column_name)
                logger.info(f"Successfully added {column_name}")
            except Exception as e:
                logger.warning(f"Could not add {indicator.name}: {e}")
                continue

        # Generate signals only if indicators were successfully added
        try:
            available_cols = self.analyzer.df.columns.tolist()

            if 'EMA_21' in available_cols:
                self.analyzer.above('Close', 'EMA_21', 'bullish_trend')

            if all(col in available_cols for col in ['EMA_21', 'EMA_50']):
                self.analyzer.above('EMA_21', 'EMA_50', 'ema_bullish')

            if 'RSI_14' in available_cols:
                self.analyzer.below('RSI_14', 70, 'rsi_not_overbought')
                self.analyzer.above('RSI_14', 30, 'rsi_not_oversold')

            # Combine signals if available
            signal_components = ['bullish_trend', 'ema_bullish', 'rsi_not_overbought']
            available_signals = [col for col in signal_components if col in self.analyzer.df.columns]

            if len(available_signals) >= 2:  # At least 2 components available
                combined_signal = self.analyzer.df[available_signals[0]].astype(bool)
                for signal in available_signals[1:]:
                    combined_signal = combined_signal & self.analyzer.df[signal].astype(bool)

                self.analyzer.df['trend_following_signal'] = combined_signal.astype(int)
                logger.info(f"Created trend following signal with {len(available_signals)} components")

        except Exception as e:
            logger.warning(f"Could not generate all trend-following signals: {e}")

        return self.analyzer

    def generate_mean_reversion_signals(self) -> 'EnhancedTechnicalAnalyzer':
        """Generate mean reversion signals with better error handling"""

        try:
            # Add RSI first
            self.analyzer.add_indicator(IndicatorConfig(name='RSI', period=14), 'RSI_14')

            # Try to add Bollinger Bands - they return 3 values
            bb_config = IndicatorConfig(name='BBANDS', period=20)

            # BBANDS returns tuple (upper, middle, lower)
            bb_result = self.analyzer._indicator_engine.calculate_indicator(self.analyzer.df, bb_config)

            # For now, we'll manually calculate simple moving average as middle band
            self.analyzer.add_indicator(IndicatorConfig(name='SMA', period=20), 'BB_MIDDLE')

            # Calculate upper and lower bands manually (2 std dev from SMA)
            if 'BB_MIDDLE' in self.analyzer.df.columns:
                close_prices = self.analyzer.df['Close']
                sma_20 = self.analyzer.df['BB_MIDDLE']
                rolling_std = close_prices.rolling(window=20).std()

                self.analyzer.df['BB_UPPER'] = sma_20 + (2 * rolling_std)
                self.analyzer.df['BB_LOWER'] = sma_20 - (2 * rolling_std)

            # Generate mean reversion signals
            if 'BB_LOWER' in self.analyzer.df.columns:
                self.analyzer.below('Close', 'BB_LOWER', 'oversold_bb')

            if 'RSI_14' in self.analyzer.df.columns:
                self.analyzer.below('RSI_14', 30, 'oversold_rsi')

            # Combine for mean reversion buy signal
            signal_components = ['oversold_bb', 'oversold_rsi']
            available_signals = [col for col in signal_components if col in self.analyzer.df.columns]

            if len(available_signals) >= 1:
                combined_signal = self.analyzer.df[available_signals[0]].astype(bool)
                for signal in available_signals[1:]:
                    combined_signal = combined_signal & self.analyzer.df[signal].astype(bool)

                self.analyzer.df['mean_reversion_buy'] = combined_signal.astype(int)
                logger.info(f"Created mean reversion signal with {len(available_signals)} components")

        except Exception as e:
            logger.warning(f"Could not generate mean reversion signals: {e}")

        return self.analyzer

# %%
# Example usage and demonstration
if __name__ == "__main__":
    print("=== Enhanced Enterprise Technical Analysis Framework ===\n")

    # Create sample data for demonstration
    np.random.seed(42)
    dates = pd.date_range('2023-01-01', periods=1000, freq='D')

    # Generate realistic OHLCV data
    close_prices = 100 + np.cumsum(np.random.randn(1000) * 0.02)
    high_prices = close_prices + np.abs(np.random.randn(1000) * 0.01)
    low_prices = close_prices - np.abs(np.random.randn(1000) * 0.01)
    open_prices = close_prices + np.random.randn(1000) * 0.005
    volumes = 1000000 + np.random.randint(-100000, 100000, 1000)

    sample_data = pd.DataFrame({
        'DateTime': dates,
        'Open': open_prices,
        'High': high_prices,
        'Low': low_prices,
        'Close': close_prices,
        'Volume': volumes
    })

    # Create analyzer
    analyzer = create_analyzer(sample_data)

    # Demo 1: Fluent interface with automatic indicator addition
    print("Demo 1: Fluent Interface with TALib Integration")
    query = """
    Close above EMA_21
    RSI_14 below 70
    Volume above 1200000
    Close crossed_up EMA_50
    """

    analyzer.execute_query(query)
    print(analyzer.summary())
    print()

    # Demo 2: Performance report
    print("Demo 2: Performance Report")
    perf_report = analyzer.performance_report()
    for key, value in perf_report.items():
        print(f"{key}: {value}")
    print()

    # Demo 3: Advanced signal generation
    print("Demo 3: Advanced Signal Generation")
    signal_gen = TradingSignalGenerator(analyzer)
    signal_gen.generate_trend_following_signals()

    # Demo 4: Backtesting
    if 'trend_following_signal' in analyzer.df.columns:
        backtest_results = analyzer.backtest_signals('trend_following_signal', holding_period=5)
        print("Backtest Results:", backtest_results)

    # Demo 5: Memory optimization
    print("\nDemo 5: Memory Optimization")
    original_memory = analyzer.df.memory_usage(deep=True).sum() / 1024 / 1024
    analyzer.optimize_memory()
    optimized_memory = analyzer.df.memory_usage(deep=True).sum() / 1024 / 1024
    print(f"Memory usage: {original_memory:.2f}MB -> {optimized_memory:.2f}MB")

    print(f"\nFinal DataFrame shape: {analyzer.df.shape}")
    print(f"Generated columns: {len([col for col in analyzer.df.columns if col not in sample_data.columns])}")

    # Display available indicators
    engine = TALibIndicatorEngine()
    print(f"\nAvailable TALib indicators: {len(engine._available_indicators)}")
    print("Example indicators:", list(engine._available_indicators.keys())[:10])

=== Enhanced Enterprise Technical Analysis Framework ===



2025-09-18 12:47:58,131 - INFO - Initialized analyzer with DataFrame shape: (1000, 6)
2025-09-18 12:47:58,379 - INFO - Memory usage: 0.05MB


Demo 1: Fluent Interface with TALib Integration


2025-09-18 12:48:21,960 - INFO - ✓ Added indicator EMA_21 in 23.5711s
2025-09-18 12:48:22,888 - INFO - ✓ Added indicator RSI_14 in 0.8424s
2025-09-18 12:48:23,129 - INFO - ✓ Added indicator EMA_50 in 0.2394s
2025-09-18 12:48:23,146 - INFO - ✓ Close above EMA_21 -> Close_above_EMA_21 (0.0151s)
2025-09-18 12:48:23,150 - INFO - ✓ RSI_14 below 70.0 -> RSI_14_below_70_0 (0.0022s)
2025-09-18 12:48:23,157 - INFO - ✓ Volume above 1200000.0 -> Volume_above_1200000_0 (0.0049s)
2025-09-18 12:48:23,164 - INFO - ✓ Close crossed_up EMA_50 -> Close_crossed_up_EMA_50 (0.0060s)
2025-09-18 12:48:23,300 - INFO - ✓ Added indicator EMA_21 in 0.0048s
2025-09-18 12:48:23,301 - INFO - Successfully added EMA_21
2025-09-18 12:48:23,304 - INFO - ✓ Added indicator EMA_50 in 0.0027s
2025-09-18 12:48:23,306 - INFO - Successfully added EMA_50
2025-09-18 12:48:23,310 - INFO - ✓ Added indicator RSI_14 in 0.0029s
2025-09-18 12:48:23,311 - INFO - Successfully added RSI_14
2025-09-18 12:48:23,318 - INFO - ✓ Added indicat

                 Operation                   Column  Success  \
0        ADD_INDICATOR_EMA                   EMA_21     True   
1        ADD_INDICATOR_RSI                   RSI_14     True   
2        ADD_INDICATOR_EMA                   EMA_50     True   
3       Close above EMA_21       Close_above_EMA_21     True   
4        RSI_14 below 70.0        RSI_14_below_70_0     True   
5   Volume above 1200000.0   Volume_above_1200000_0     True   
6  Close crossed_up EMA_50  Close_crossed_up_EMA_50     True   

   Execution_Time_ms  Active_Signals  Signal_Ratio  \
0           23571.06    97988.785116   9798.878512   
1             842.44    50583.578095   5058.357809   
2             239.41    95084.546366   9508.454637   
3              15.08      496.000000     49.600000   
4               2.23      905.000000     90.500000   
5               4.86        0.000000      0.000000   
6               6.03       45.000000      4.500000   

                             Message  
0       Indicat

In [None]:
analyzer = create_analyzer(df)

# Method 1: Natural language query (automatically adds indicators)
query = """
Close above EMA_21
RSI_14 below 70
Volume crossed_up 1000000
"""
analyzer.execute_query(query)

# Method 2: Fluent interface
analyzer = (analyzer
    .above('Close', 'EMA_50')
    .below('RSI_14', 30)
    .crossed_up('MACD', 0))

# Method 3: Manual indicator addition
analyzer.add_indicator(IndicatorConfig(name='STOCH', fast_period=14, slow_period=3))

# Get results
print(analyzer.summary())
print(analyzer.performance_report())

# Advanced signal generation
signal_gen = TradingSignalGenerator(analyzer)
signal_gen.generate_trend_following_signals()

# Backtesting
results = analyzer.backtest_signals('trend_following_signal', holding_period=5)