In [1]:
import typing
import pandas as pd
import re
import functools
import numpy as np
import math


In [2]:
class Token:
    def __init__(self, type: str, value: str):
        self.type = type
        self.value = value

    def __repr__(self):
        return f"Token({self.type}, '{self.value}')"

In [3]:
class ParseTreeNode:
    def __init__(self, type: str, value: typing.Optional[str] = None, children: typing.Optional[typing.List["ParseTreeNode"]] = None):
        self.type = type
        self.value = value
        self.children = children or []
        self.start_index = None
        self.end_index = None

    def __repr__(self):
        return f"ParseTreeNode({self.type}, value={self.value}, children={self.children}, start_index={self.start_index}, end_index={self.end_index})"
    
    def reify(self, function_factory):
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "+" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 + arg1
            else:
                # we need to use late binding
                f = function_factory.get("Add")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        # TODO: this doesn't have the associative property, so it's necessary to flatten the tree if we want to do away with the requirement for parentheses
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "-" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 - arg1
            else:
                # we need to use late binding
                f = function_factory.get("Sub")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if(self.type == "expression") and len(self.children) == 1:
            return self.children[0].reify(function_factory)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "*" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 * arg1
            else:
                # we need to use late binding
                f = function_factory.get("Mul")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        # TODO: this doesn't have the associative property, so it's necessary to flatten the tree if this is actually the plan
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "/" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 / arg1
            else:
                # we need to use late binding
                f = function_factory.get("Div")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "%" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 % arg1
            else:
                # we need to use late binding
                f = function_factory.get("Mod")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "**" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 ** arg1
            else:
                # we need to use late binding
                f = function_factory.get("Pow")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "<" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 < arg1
            else:
                # we need to use late binding
                f = function_factory.get("Lt")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "<=" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 <= arg1
            else:
                # we need to use late binding
                f = function_factory.get("Le")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == ">" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 > arg1
            else:
                # we need to use late binding
                f = function_factory.get("Gt")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == ">=" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 >= arg1
            else:
                # we need to use late binding
                f = function_factory.get("Ge")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "==" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 == arg1
            else:
                # we need to use late binding
                f = function_factory.get("Eq")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
        if (self.type == "expression") and len(self.children) == 3 and self.children[0].type == "term" and self.children[1].value == "!=" and self.children[2].type == "term":
            arg0 = self.children[0].reify(function_factory)
            arg1 = self.children[2].reify(function_factory)
            if(isinstance(arg0, int) or isinstance(arg0, float)) and (isinstance(arg1, int) or isinstance(arg1, float)):
                return arg0 != arg1
            else:
                # we need to use late binding
                f = function_factory.get("Ne")
                params = {}
                pkeys = [a for a in f.parameters.keys()]
                params[pkeys[0]] = arg0
                params[pkeys[1]] = arg1
                return f.create_function(**params)
                
        if self.type == "term" and len(self.children) == 1:
            return self.children[0].reify(function_factory)
        if self.type == "factor" and len(self.children) == 1:
            return self.children[0].reify(function_factory)
        if self.type == "factor" and len(self.children) == 3 and self.children[0].value == "(" and self.children[2].value == ")":
            return self.children[1].reify(function_factory)
        if self.type == "number" and (self.children is None or len(self.children) == 0):
            try:
                return int(self.value)
            except:
                return float(self.value)
        if self.type == "string" and self.children is None or len(self.children) == 0:
            return self.value
            
        if self.type == "factor" and len(self.children) == 4 and self.children[0].type == "identifier" and self.children[1].value == "(" and self.children[2].type == "arguments" and self.children[3].value == ")":

            identifier_node = self.children[0]
            name = identifier_node.value

            try:
                definition = function_factory.get(name)
            except ValueError:
                raise ValueError(f"Function '{name}' not found in the factory.")

            params = {}

            arguments_node = self.children[2]
            param_index = 0
            param_names = list(definition.parameters.keys())

            # Handle named arguments first
            named_params_processed = set()  # Keep track of named params

            def flatten_arguments(arguments_node):
                rv = []
                if arguments_node.children[0].type == "argument":
                    rv.append(arguments_node.children[0])
                    if(len(arguments_node.children) == 3 and arguments_node.children[1].value == ","):
                        rv.extend(flatten_arguments(arguments_node.children[2]))
                return rv
                
            flattened_arguments = flatten_arguments(arguments_node)

            #print(flattened_arguments)
                
            for argument_node in flattened_arguments:
                if len(argument_node.children) == 3 and argument_node.children[1].type == "operator" and argument_node.children[1].value == "=":
                    param_name = argument_node.children[0].value
                    param_value_node = argument_node.children[2]

                    if param_name in named_params_processed: # Skip already processed named parameters
                        continue

                    try:
                        param_def = definition.parameters[param_name]
                    except KeyError:
                        raise ValueError(f"Parameter '{param_name}' not found for indicator '{name}'.")

                    param_value = param_value_node.reify(function_factory)  # Evaluate the value node
                    params[param_name] = param_value
                    named_params_processed.add(param_name) # Add to the set of processed named parameters

            # Next, handle positional arguments (skip named ones)
            for argument_node in flattened_arguments:
                if len(argument_node.children) == 1:  # Positional argument
                    try:
                        param_name = param_names[param_index]
                        if param_name in named_params_processed: # Skip if already named
                            param_index += 1
                            continue

                        param_def = definition.parameters[param_name]
                    except IndexError:
                        raise ValueError(f"Incorrect number of positional parameters for '{name}'.")

                    param_value_node = argument_node.children[0]
                    param_value = param_value_node.reify(function_factory)
                    params[param_name] = param_value
                    param_index += 1



            required_params = set(definition.parameters.keys())
            provided_params = set(params.keys())
            if required_params != provided_params:
                missing = required_params - provided_params
                raise ValueError(f"Missing required parameters for {name}: {missing}")

            return definition.create_function(**params)

        raise ValueError(f"Cannot reify node of type: {self.type} with {len(self.children)} children: {self}")

In [4]:
class GrammarRule:
    def __init__(self, left: str, right: typing.List[str]):
        self.left = left
        self.right = right

    def __repr__(self):
        return f"{self.left} -> {' '.join(self.right)}"

In [5]:
class Grammar:
    def __init__(self, grammar_string: str):
        """Initializes a Grammar object by parsing the grammar string."""
        self.rules = []
        for line in grammar_string.strip().splitlines():
            if line.strip():  # Skip empty lines
                parts = line.split("->")
                if len(parts) != 2:
                    raise ValueError(f"Invalid grammar rule: {line}")
                left = parts[0].strip()
                right = [part.strip() for part in parts[1].split()]
                self.rules.append(GrammarRule(left, right))  # Store rules as attributes

    def build_parse_tree(self, tokens: typing.List["Token"], start_symbol: str = "expression") -> typing.Optional["ParseTreeNode"]:
        """Builds a parse tree from a list of tokens using the grammar rules."""

        def _parse(index: int, nonterminal: str, current_depth=0) -> typing.Optional["ParseTreeNode"]:
            applicable_rules = [rule for rule in self.rules if rule.left == nonterminal]

            if index >= len(tokens):  # End of tokens
                if any(not rule.right for rule in applicable_rules): # Check for a matching epsilon rule
                    return ParseTreeNode(nonterminal, children=[])
                return None # No matching epsilon rule

            if not applicable_rules:
                return None

            for rule in applicable_rules:
                rule_matched = True
                children = []
                current_index = index

                for symbol in rule.right:
                    if current_index >= len(tokens):
                        rule_matched = False
                        break

                    if current_index < len(tokens):
                        token = tokens[current_index]

                        if (symbol == token.type) or (symbol == f'"{token.value}"') or \
                           (symbol == "identifier" and token.type == "identifier") or \
                           (symbol == "number" and token.type == "number") or \
                           (symbol == "string" and token.type == "string") or \
                           (symbol == "operator" and token.type == "operator"):
                            child = ParseTreeNode(token.type, value=token.value)
                            child.start_index = current_index
                            child.end_index = current_index
                            children.append(child)
                            current_index += 1  # Increment for terminal

                        elif any(gr.left == symbol for gr in self.rules):
                            child_node = _parse(current_index, symbol, current_depth + 1)
                            if child_node:
                                children.append(child_node)
                                current_index = child_node.end_index + 1
                            else:
                                rule_matched = False
                                break

                        else:
                            rule_matched = False
                            break

                if rule_matched:
                    node = ParseTreeNode(nonterminal, children=children)
                    node.start_index = children[0].start_index if children else index # Handle epsilon rules where children is empty
                    node.end_index = children[-1].end_index if children else index -1 # Handle epsilon rules where children is empty

                    return node

            return None

        return _parse(0, start_symbol)  # Allow specifying the start symbol
        
    def parse(self, input_string: str, start_symbol: str = "expression"):
        """Parses an input string into a parse tree."""
        tokens = self.tokenize(input_string)  # Tokenize the input string
        return self.build_parse_tree(tokens, start_symbol)

    def tokenize(self, expression: str) -> typing.List[Token]:
        """
        Tokenizes a string expression, splitting on spaces and identifying operators.
        """
    
        # Pattern to match tokens. Note: if we wanted to be really fancy, we would specify the token types in the grammar.
        pattern = r"(\*\*|\*|/|//|%|\+|-|==|!=|<=|>=|<|>|=|!|&&|\|\||&|\||\^|~|<<|>>|\(|\)|\[|\]|\{|\}|,|:|\.|->|@|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|\<<=|>>=)|'([^']+)'|\"([^\"]+)\"|(\d+\.?\d*)|([a-zA-Z_]\w*)"
    
        tokens = []
        for match in re.finditer(pattern, expression):
            operator_match = match.group(1)
            single_quote_match = match.group(2)
            double_quote_match = match.group(3)
            number_match = match.group(4)
            identifier_match = match.group(5)
    
            if operator_match:
                tokens.append(Token("operator", operator_match))
            elif single_quote_match:
                tokens.append(Token("string", single_quote_match))
            elif double_quote_match:
                tokens.append(Token("string", double_quote_match))
            elif number_match:
                tokens.append(Token("number", number_match))
            elif identifier_match:
                tokens.append(Token("identifier", identifier_match))
            else:
                raise ValueError(f"invalid token in {expression}")
    
        return tokens

In [6]:
class FunctionInstance:
    def __init__(self, name: str, parameters: typing.Dict[str, typing.Any], definition):
        self.name = name
        self.parameters = parameters
        self.definition = definition

    def evaluate_parameters(self, data):
        rv = {}

        for k in self.parameters:
            v = self.parameters[k]
            if isinstance(v, FunctionInstance):
                rv[k] = v.calculate(data)
            else:
                rv[k] = v
        return rv

    def calculate(self, data: pd.DataFrame): 
        """
        Screens the data using the screener's definition and parameters.

        Args:
            data: The Pandas DataFrame containing the data.

        Returns:
            A Pandas Dataframe
        """
        return self.definition.calculate(data, self.evaluate_parameters(data)) 

    def __repr__(self):
        params_str = ", ".join(f"{name}={value}" for name, value in self.parameters.items())
        return f"{self.definition.name}({params_str})"

In [7]:
class ParameterType:
    """
    A class for specifying parameters for screeners and indicators.
    """

    def __init__(self,
#                 name: str,
                 data_type: typing.Literal["integer", "real", "boolean", "string"],
                 min_val: typing.Union[int, float, None] = None,
                 max_val: typing.Union[int, float, None] = None,
                 default: typing.Any = None,
                 timeframe_defaults: typing.Dict[typing.Literal["tick", "1s", "5s", "15s", "1m", "2m", "5m", "15m", "1d", "1w", "1M"], typing.Any] = None,
                 increment: typing.Union[int, float, None] = None,
                 allowed_strings: typing.List[str] | None = None):
#        if not isinstance(name, str):
#            raise TypeError("name must be a string")
        if data_type not in ("integer", "real", "boolean", "string", "any"):
            raise ValueError("data_type must be 'integer', 'real', 'boolean', 'string', or 'any'")

        if min_val is not None:
            if data_type == "integer" and not isinstance(min_val, int):
                raise TypeError("min_val must be an integer for integer data_type")
            elif data_type in ("real", "integer") and not isinstance(min_val, (int, float)):
                raise TypeError("min_val must be a number for real or integer data_type")

        if max_val is not None:
            if data_type == "integer" and not isinstance(max_val, int):
                raise TypeError("max_val must be an integer for integer data_type")
            elif data_type in ("real", "integer") and not isinstance(max_val, (int, float)):
                raise TypeError("max_val must be a number for real or integer data_type")

        if timeframe_defaults is not None:
            if not isinstance(timeframe_defaults, dict):
                raise TypeError("timeframe_defaults must be a dictionary")
            for timeframe in timeframe_defaults:
                if timeframe not in ("tick", "1s", "5s", "15s", "1m", "2m", "5m", "15m", "1d", "1w", "1M"):
                    raise ValueError(f"Invalid timeframe: {timeframe}")

        if data_type == "integer" and increment is None:
            increment = 1
        elif data_type == "real" and increment is None:
            increment = 0.01

        if data_type == "string" and allowed_strings is not None and not isinstance(allowed_strings, list):
          raise TypeError("allowed_strings must be a list of strings")

        if data_type != "string" and allowed_strings is not None:
          raise ValueError("allowed_strings can only be specified for string data type")

#        self.name = name
        self.data_type = data_type
        self.min_val = min_val
        self.max_val = max_val
        self.default = default
        self.timeframe_defaults = timeframe_defaults or {}
        self.increment = increment
        self.allowed_strings = allowed_strings

    def get_default(self) -> typing.Any:
        return self.default

    def get_possible_values(self) -> typing.Iterable[typing.Any]:
        if self.data_type == "integer":
            if self.min_val is not None and self.max_val is not None:
                return range(self.min_val, self.max_val + 1)
        elif self.data_type == "real":
            if self.min_val is not None and self.max_val is not None:
                current = self.min_val
                while current <= self.max_val:
                    yield current
                    current += 0.01
        elif self.data_type == "boolean":
            return [True, False]
        elif self.data_type == "string":
            if self.allowed_strings is not None:  # Check if allowed_strings is defined
                return self.allowed_strings  # If defined, return those values
            else:
                return []  # Return an empty list if allowed_strings is None (unrestricted)
        return []

    def __repr__(self):
#        return f"ParameterType(name='{self.name}', data_type='{self.data_type}', min_val={self.min_val}, max_val={self.max_val}, default={self.default}, allowed_strings={self.allowed_strings})"
        return f"ParameterType(data_type='{self.data_type}', min_val={self.min_val}, max_val={self.max_val}, default={self.default}, allowed_strings={self.allowed_strings})"

In [8]:
class FunctionDefinition:
    def __init__(self, name: str, parameters: typing.Dict[str, "ParameterType"], calculation_function, factory=None): 
        if not isinstance(name, str):
            raise TypeError("name must be a string")

        if not isinstance(parameters, dict):
            raise TypeError("parameters must be a dictionary")

        if not all(isinstance(param, ParameterType) for param in parameters.values()):
            raise TypeError("All values in parameters must be ParameterType objects")

        if len(set(parameters.keys())) != len(parameters.keys()): # Check for duplicate keys
            raise ValueError("Parameter names must be unique.")

        if not callable(calculation_function):
            raise TypeError("calculation_function must be callable")

        self.name = name
        self.parameters = parameters
        self.calculation_function = calculation_function
        self.factory = factory

    def create_function(self, **kwargs: typing.Any) -> "FunctionInstance":
        params = {}
        for name, param_def in self.parameters.items():
            value = kwargs.get(name)

            if value is None:
                value = param_def.get_default()

            if param_def.data_type == "integer" and not isinstance(value, int):
                raise TypeError(f"Value for parameter '{name}' must be an integer")
            elif param_def.data_type == "real" and not isinstance(value, (int, float)):
                raise TypeError(f"Value for parameter '{name}' must be a number")
            elif param_def.data_type == "boolean" and not isinstance(value, bool):
                raise TypeError(f"Value for parameter '{name}' must be a boolean")
            elif param_def.data_type == "string" and not isinstance(value, str):
                raise TypeError(f"Value for parameter '{name}' must be a string")
            elif param_def.data_type in ("integer", "real"):
                if param_def.min_val is not None and value < param_def.min_val:  # Check min_val
                    raise ValueError(f"Value for parameter '{name}' must be greater than or equal to {param_def.min_val}")
                if param_def.max_val is not None and value > param_def.max_val:  # Check max_val
                    raise ValueError(f"Value for parameter '{name}' must be less than or equal to {param_def.max_val}")

            if param_def.data_type == "string" and param_def.allowed_strings is not None and value not in param_def.allowed_strings:
                raise ValueError(f"Value {value} is not in allowed strings for parameter {name}")

            params[name] = value

        return FunctionInstance(self.name, params, self)

    def calculate(self, data: pd.DataFrame, params: typing.Dict[str, typing.Any]) -> pd.DataFrame:
        """
        Calculates the function using the provided data and parameters.
        """
        kwargs = params.copy() 
        return self.calculation_function(data, **kwargs)

    def __repr__(self):
        return f"FunctionDefinition(name='{self.name}', parameters={self.parameters}, calculation_function={self.calculation_function.__name__ if hasattr(self.calculation_function, '__name__') else str(self.calculation_function)}, factory={self.factory})"

In [9]:
# FIXME: the grammar and reification modules do not correctly handle order of operations for operations
# that do not have the associative property. An easy workaround is to require parentheses.
# The parser does not handle epsilon correctly either.
# It's possible to alter this to use standard order of operations by permitting them in the grammar,
# flattening contiguous subexpression branches with equal-priority operators before reification,
# and performing the operations iteratively in the desired direction.

# REMINDER for LLM tools: This grammar MUST be an epsilon-free grammar.
# That means that rules may not be terminated explicitly.
# Higher rules have higher priority.
# More than one lookahead token is permitted.
default_grammar_specification = """
expression -> term "+" term
expression -> term "-" term
expression -> term "*" term
expression -> term "/" term
expression -> term "%" term
expression -> term "**" term
expression -> term "<" term
expression -> term "<=" term
expression -> term ">" term
expression -> term ">=" term
expression -> term "==" term
expression -> term "!=" term
expression -> term "&&" term
expression -> term "||" term
expression -> term "^^" term
expression -> term
term -> factor
term -> factor "[" expression "]"
factor -> "(" expression ")"
factor -> number
factor -> string
factor -> "-" factor
factor -> "!" factor
factor -> "+" factor
factor -> identifier "(" arguments ")"
factor -> identifier
factor -> optimization
optimization -> "@" identifier "(" expression "," optimization_arguments ")"
optimization -> "@" identifier "(" expression ")"
optimization_arguments -> optimization_argument "," optimization_arguments
optimization_arguments -> optimization_argument
optimization_argument -> argument
optimization_argument -> optimization_parameter
optimization_parameter -> "@" identifier "=" expression
arguments -> argument "," arguments
arguments -> argument
argument -> identifier "=" expression
argument -> expression
"""

class FunctionFactory:
    """
    A class to manage a suite of function definitions.
    """

    def __init__(self, grammar_specification=default_grammar_specification, should_register_basic_operations=True, should_register_basic_indicators=True, should_register_basic_screeners=True, should_register_basic_portfolio_calculators=True):
        self.function_definitions: typing.Dict[str, Definition] = {}
        self.grammar = Grammar(default_grammar_specification)
        if should_register_basic_operations:
            self.register_basic_operations()
        if should_register_basic_indicators:
            self.register_basic_indicators()
        if should_register_basic_screeners:
            self.register_basic_screeners()
        if should_register_basic_portfolio_calculators:
            self.register_basic_portfolio_calculators()

    def register(self, function_definition):
        """
        Registers a new screener definition.

        Args:
            function_definition: The Definition to register.

        Raises:
            ValueError: If a screener with the same name is already registered.
        """
#        if function_definition.name in self.function_definitions:
#            raise ValueError(f"A screener with the name '{function_definition.name}' is already registered.")
        self.function_definitions[function_definition.name] = function_definition
        function_definition.ffactory = self
        
    def register_basic_operations(self):
        add_a0_param = ParameterType("any")
        add_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Add", {"a0": add_a0_param, "a1": add_a1_param}, calculate_add))

        sub_a0_param = ParameterType("any")
        sub_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Sub", {"a0": sub_a0_param, "a1": sub_a1_param}, calculate_sub))

        mul_a0_param = ParameterType("any")
        mul_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Mul", {"a0": mul_a0_param, "a1": mul_a1_param}, calculate_mul))

        div_a0_param = ParameterType("any")
        div_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Div", {"a0": div_a0_param, "a1": div_a1_param}, calculate_div))

        mod_a0_param = ParameterType("any")
        mod_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Mod", {"a0": mod_a0_param, "a1": mod_a1_param}, calculate_mod))

        pow_a0_param = ParameterType("any")
        pow_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Pow", {"a0": pow_a0_param, "a1": pow_a1_param}, calculate_pow))

        lt_a0_param = ParameterType("any")
        lt_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Lt", {"a0": lt_a0_param, "a1": lt_a1_param}, calculate_lt))

        le_a0_param = ParameterType("any")
        le_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Le", {"a0": le_a0_param, "a1": le_a1_param}, calculate_le))

        gt_a0_param = ParameterType("any")
        gt_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Gt", {"a0": gt_a0_param, "a1": gt_a1_param}, calculate_gt))

        ge_a0_param = ParameterType("any")
        ge_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Ge", {"a0": ge_a0_param, "a1": ge_a1_param}, calculate_ge))

        eq_a0_param = ParameterType("any")
        eq_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Eq", {"a0": eq_a0_param, "a1": eq_a1_param}, calculate_eq))

        ne_a0_param = ParameterType("any")
        ne_a1_param = ParameterType("any")
        self.register(FunctionDefinition("Ne", {"a0": ge_a0_param, "a1": ne_a1_param}, calculate_ne))

    def register_basic_screeners(factory):
        # Top N Screener FunctionDefinition and Registration
        top_n_field_param = ParameterType("any", default="return")  # Example allowed strings
        top_n_n_param = ParameterType("integer", min_val=1, default=5)
        factory.register(FunctionDefinition("TopN", {"field": top_n_field_param, "top_n": top_n_n_param}, top_n_screener_function))
        
        # Percentile Screener Definition and Registration
        percentile_field_param = ParameterType("any", default="return")
        percentile_percentile_param = ParameterType("real", min_val=0.0, max_val=1.0, default=.1)
        factory.register(FunctionDefinition("Percentile", {"field": percentile_field_param, "percentile": percentile_percentile_param}, percentile_screener_function))

    def register_basic_portfolio_calculators(factory):
        calculate_normalize = functools.partial(apply_daily_function, f=do_normalize)
        normalize_weights = ParameterType("any", default="weights")
        factory.register(FunctionDefinition("Normalize", {"weights": normalize_weights}, calculate_normalize))
        
        calculate_abs_threshold = functools.partial(apply_daily_function, f=do_abs_threshold)
        abs_threshold_weights = ParameterType("any", default="weights")
        abs_threshold_threshold = ParameterType("real", default=1e-6)
        factory.register(FunctionDefinition("AbsThreshold", {"weights": abs_threshold_weights, "threshold": abs_threshold_threshold}, calculate_abs_threshold))
        
        calculate_abs_min_cutoff = functools.partial(apply_daily_function, f=do_abs_min_cutoff)
        abs_min_cutoff_weights = ParameterType("any", default="weights")
        abs_min_cutoff_cutoff = ParameterType("real", default=1e-6)
        factory.register(FunctionDefinition("AbsMinCutoff", {"weights": abs_min_cutoff_weights, "minimum": abs_min_cutoff_cutoff}, calculate_abs_min_cutoff))
        
        calculate_abs_max_cutoff = functools.partial(apply_daily_function, f=do_abs_max_cutoff)
        abs_max_cutoff_weights = ParameterType("any", default="weights")
        abs_max_cutoff_cutoff = ParameterType("real", default=1e-6)
        factory.register(FunctionDefinition("AbsMaxCutoff", {"weights": abs_max_cutoff_weights, "maximum": abs_max_cutoff_cutoff}, calculate_abs_max_cutoff)) 
        
    def register_basic_indicators(factory): 
        """Registers basic technical indicator functions with this factory instance."""

        # SMA
        sma_length_param = ParameterType("integer", min_val=1, max_val=200, default=20)
        calculate_sma = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_sma) # Referencing global do_calculate_sma
        factory.register(FunctionDefinition("SMA", {"length": sma_length_param}, calculate_sma)) # Use factory.register


        # RSI
        rsi_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_rsi = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_rsi) # Referencing global do_calculate_rsi
        factory.register(FunctionDefinition("RSI", {"length": rsi_length_param}, calculate_rsi)) # Use factory.register


        # MACD
        fast_length_param = ParameterType("integer", min_val=1, max_val=100, default=12)
        slow_length_param = ParameterType("integer", min_val=1, max_val=200, default=26)
        signal_length_param = ParameterType("integer", min_val=5, max_val=50, default=9) # Corrected min_val
        calculate_macd = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_macd) # Referencing global do_calculate_macd
        factory.register(FunctionDefinition("MACD", {"fast_length": fast_length_param, "slow_length": slow_length_param, "signal_length": signal_length_param}, calculate_macd)) # Use factory.register


        # Bollinger Bands
        bb_length_param = ParameterType("integer", min_val=1, max_val=200, default=20)
        std_dev_param = ParameterType("real", min_val=0.1, max_val=5.0, default=2.0, increment=0.1)
        calculate_bollinger_bands = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_bollinger_bands) # Referencing global do_calculate_bollinger_bands
        factory.register(FunctionDefinition("BB", {"length": bb_length_param, "std_dev": std_dev_param}, calculate_bollinger_bands)) # Use factory.register


        # Rolling VWAP
        rvwap_length_param = ParameterType("integer", min_val=1, max_val=200, default=20)
        calculate_rvwap = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_rvwap) # Referencing global do_calculate_rvwap
        factory.register(FunctionDefinition("RVWAP", {"length": rvwap_length_param}, calculate_rvwap)) # Use factory.register


        # Average True Range
        atr_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_atr = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_atr) # Referencing global do_calculate_atr
        factory.register(FunctionDefinition("ATR", {"length": atr_length_param}, calculate_atr)) # Use factory.register


        # ADX
        adx_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_adx = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_adx) # Referencing global do_calculate_adx
        factory.register(FunctionDefinition("ADX", {"length": adx_length_param}, calculate_adx)) # Use factory.register


        # Commodity Channel Index
        cci_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_cci = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_cci) # Referencing global do_calculate_cci
        factory.register(FunctionDefinition("CCI", {"length": cci_length_param}, calculate_cci)) # Use factory.register


        # Chaikin Money Flow
        cmf_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_cmf = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_cmf) # Referencing global do_calculate_cmf
        factory.register(FunctionDefinition("CMF", {"length": cmf_length_param}, calculate_cmf)) # Use factory.register


        # Aroon
        aroon_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_aroon = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_aroon) # Referencing global do_calculate_aroon
        factory.register(FunctionDefinition("Aroon", {"length": aroon_length_param}, calculate_aroon)) # Use factory.register


        # MFI
        mfi_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_mfi = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_mfi) # Referencing global do_calculate_mfi
        factory.register(FunctionDefinition("MFI", {"length": mfi_length_param}, calculate_mfi)) # Use factory.register


        # Percent Rank
        pct_rank_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_pct_rank = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_pct_rank) # Referencing global do_calculate_pct_rank
        factory.register(FunctionDefinition("PCT", {"length": pct_rank_length_param}, calculate_pct_rank)) # Use factory.register


        # Price Range Percentage
        prp_length_param = ParameterType("integer", min_val=1, max_val=200, default=14)
        calculate_prp = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_prp) # Referencing global do_calculate_prp
        factory.register(FunctionDefinition("PRP", {"length": prp_length_param}, calculate_prp)) # Use factory.register


        # Log Return
        lret_length_param = ParameterType("integer", min_val=1, max_val=200, default=1)
        calculate_lret = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_lret) # Referencing global do_calculate_lret
        factory.register(FunctionDefinition("LRET", {"length": lret_length_param}, calculate_lret)) # Use factory.register


        # Shift
        shift_n_param = ParameterType("integer", min_val=1, max_val=200, default=1)
        shift_series_param = ParameterType("any", default="close")
        calculate_shift = functools.partial(calculate_indicator_by, field="symbol", indicator_function=do_calculate_shift) # Referencing global do_calculate_shift
        factory.register(FunctionDefinition("Shift", {"series": shift_series_param, "n": shift_n_param}, calculate_shift)) # Use factory.register    

    
    def get(self, name: str):
        """
        Retrieves a function definition by name.

        Args:
            name: The name of the screener.

        Returns:
            The FunctionDefinition object.

        Raises:
            ValueError: If no screener with the given name is registered.
        """
        if name not in self.function_definitions:
            raise ValueError(f"No function found with the name '{name}'.")
        return self.function_definitions[name]

    def parse(self, expression):
        parse_tree = self.grammar.parse(expression)
        reified_expression = parse_tree.reify(self)
        return reified_expression

    def __repr__(self):
        return f"FunctionFactory(functions={self.function_definitions})"



In [10]:
# Functions to carry out basic mathematical operations

#expression -> term "+" term
def calculate_add(df, a0, a1):
    return a0 + a1

#expression -> term "-" term
def calculate_sub(df, a0, a1):
    return a0 - a1

#expression -> term "*" term
def calculate_mul(df, a0, a1):
    return a0 * a1

#expression -> term "/" term
def calculate_div(df, a0, a1):
    return a0 / a1

#expression -> term "%" term
def calculate_mod(df, a0, a1):
    return a0 % a1

#expression -> term "**" term
def calculate_pow(df, a0, a1):
    return a0 ** a1

#expression -> term "<" term
def calculate_lt(df, a0, a1):
    return a0 < a1

#expression -> term "<=" term
def calculate_le(df, a0, a1):
    return a0 <= a1

#expression -> term ">" term
def calculate_gt(df, a0, a1):
    return a0 > a1

#expression -> term ">=" term
def calculate_ge(df, a0, a1):
    return a0 >= a1

#expression -> term "==" term
def calculate_eq(df, a0, a1):
    return a0 == a1

#expression -> term "!=" term
def calculate_ne(df, a0, a1):
    return a0 != a1

#expression -> term "&&" term
#expression -> term "||" term
#expression -> term "^^" term

In [11]:
# Functions for basic portfolio calculations

def do_normalize(context, weights):
    """Normalizes a Series of weights (including negative) to sum to 1 (absolute values)."""
    if isinstance(weights, pd.DataFrame):
        weights = weights.iloc[:, 0]
    elif not isinstance(weights, pd.Series):
        weights = pd.Series(weights)

    wt = weights.astype("float")
    absolute_weights = wt.abs()
    signs = wt.div(absolute_weights).fillna(0)
    total_absolute_weight = absolute_weights.sum()

    if total_absolute_weight == 0:
        return pd.DataFrame({"weight": [0.0] * len(weights)}, index=weights.index)

    normalized_absolute_weights = absolute_weights / total_absolute_weight
    normalized_weights = normalized_absolute_weights * signs

    return pd.DataFrame({"weight": normalized_weights}, index=weights.index)


def do_abs_threshold(context, weights, threshold):
    if isinstance(weights, pd.DataFrame):
        weights = weights.iloc[:, 0]
    elif not isinstance(weights, pd.Series):
        weights = pd.Series(weights)

    wt = weights.astype("float")
    absolute_weights = wt.abs()

    # Apply threshold
    wt[absolute_weights < threshold] = 0

    return pd.DataFrame({"weight": wt}, index=weights.index)

def do_abs_min_cutoff(context, weights, minimum):
    """Applies a minimum weight to a list of weights."""
    if isinstance(weights, pd.DataFrame):
        weights = weights.iloc[:, 0]
    elif not isinstance(weights, pd.Series):
        weights = pd.Series(weights)

    wt = weights.astype("float")
    absolute_weights = wt.abs()

    # Apply minimum
    mask = (absolute_weights > 0) & (absolute_weights < minimum)
    wt[mask] = np.sign(wt[mask]) * minimum

    return pd.DataFrame({"weight": wt}, index=weights.index)

def do_abs_max_cutoff(context, weights, maximum):
    if isinstance(weights, pd.DataFrame):
        weights = weights.iloc[:, 0]
    elif not isinstance(weights, pd.Series):
        weights = pd.Series(weights)

    wt = weights.astype("float")
    absolute_weights = wt.abs()

    # Apply maximum
    mask = absolute_weights > maximum
    wt[mask] = np.sign(wt[mask]) * maximum

    return pd.DataFrame({"weight": wt}, index=weights.index)

In [12]:
# Functions to carry out screener operations
def top_n_screener_function(context, field, top_n):
    """
    Produces a boolean mask (pd.Series) for the top 5 readings per day.

    Args:
        df: Pandas DataFrame with columns for date and reading.
        date_col: Name of the column containing the date. Should be datetime or convertible.
        reading_col: Name of the column containing the reading.

    Returns:
        A pandas Series (boolean mask) with True for rows corresponding to the 
        top 5 readings for each day, and False otherwise. Returns
        an empty Series if the input DataFrame is empty.
    """
    print(field)
    if(isinstance(field, str)):
        foo = context.groupby("date")[field].rank(ascending=False, method='first')
    else:
        foo = field.groupby(by=context["date"]).rank(ascending=False, method="first")
    print(foo)
    mask = foo <= top_n 
    return mask.iloc[:, 0]


def percentile_screener_function(context, field, percentile):
    
    if(isinstance(field, str)):
        foo = context.groupby(by="date")[field].rank(ascending=False, method='first', pct=True)
    else:
        foo = field.groupby(by=context["date"]).rank(ascending=False, method="first", pct=True)
    mask = foo >= percentile
    
    return mask.iloc[:, 0]


In [13]:
# Utility function for applying generic indicator functions to dataframes with multiple symbols
def calculate_indicator_by(df, field, indicator_function, *args, **kwargs):
    """
    Calculates an indicator by a specified field within a Pandas DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame containing the data.
        field (str): The field to group by (e.g., 'symbol', 'date').
        indicator_function (callable): The indicator function to apply.
        *args: Positional arguments to pass to the indicator function.
        **kwargs: Keyword arguments to pass to the indicator function.

    Returns:
        pd.DataFrame: The DataFrame with the calculated indicator(s) added.
    """
    if field in df.columns:
        result_dfs = []
        for group_value, group_df in df.groupby(field):
            result_dfs.append(indicator_function(group_df.copy(), *args, **kwargs).assign(**{field: group_value}))
        rv = pd.concat(result_dfs)
        return rv.drop(field, axis=1)
    else:
        return indicator_function(df.copy(), *args, **kwargs)


# Note: I am not completely happy with how this works; ideally, this would be merged with calculate_indicator_by
def apply_daily_function(context, weights, f, *args, **kwargs):
    if(isinstance(weights, str)):
        foo = context.groupby("date")[weights].apply(lambda a: f(context, a, *args, **kwargs))
    else:
        foo = weights.groupby(by=context["date"]).apply(lambda a: f(context, a, *args, **kwargs))
    # Swap levels and drop the 'date' level
    foo.index = foo.index.droplevel("date")    
    return foo.iloc[:, 0]

In [14]:
# Functions to calculate basic technical indicators

# SMA
def do_calculate_sma(df: pd.DataFrame, length: int) -> pd.DataFrame:
    print(f"calculate_sma(df, {length})")
    sma_values = df['close'].rolling(window=length).mean().values
    return pd.DataFrame({f"SMA({length})": sma_values}, index=df.index)

# RSI
def do_calculate_rsi(df: pd.DataFrame, length: int) -> pd.DataFrame:
    length = int(length)
    delta = df['close'].diff()
    gains = delta.clip(lower=0)
    losses = -delta.clip(upper=0)
    avg_gains = gains.rolling(window=length).mean()
    avg_losses = losses.rolling(window=length).mean()
    rs = avg_gains / avg_losses.replace(0, float('inf'))
    rsi = 100 - (100 / (1 + rs))
    rsi_values = rsi.values
    return pd.DataFrame({f"RSI({length})": rsi_values}, index=df.index)

# MACD
def do_calculate_macd(df: pd.DataFrame, fast_length: int, slow_length: int, signal_length: int) -> pd.DataFrame:
    ema_fast = df['close'].ewm(span=fast_length, adjust=False).mean()
    ema_slow = df['close'].ewm(span=slow_length, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal = macd.ewm(span=signal_length, adjust=False).mean()
    histogram = macd - signal
    return pd.DataFrame({f'MACD({fast_length},{slow_length},{signal_length})["macd"]': macd.values, f'MACD({fast_length},{slow_length},{signal_length})["signal"]': signal.values, f'MACD({fast_length},{slow_length},{signal_length})["histogram"]': histogram.values}, index=df.index)

# Bollinger Bands
def do_calculate_bollinger_bands(df: pd.DataFrame, length: int, std_dev: float) -> pd.DataFrame:
    rolling_mean = df['close'].rolling(window=length).mean()
    rolling_std = df['close'].rolling(window=length).std()
    upper_band = rolling_mean + (rolling_std * std_dev)
    lower_band = rolling_mean - (rolling_std * std_dev)
    middle_values = rolling_mean.values
    upper_values = upper_band.values
    lower_values = lower_band.values
    bb_df = pd.DataFrame({f'BB({length},{std_dev})["middle"]': middle_values, f'BB({length},{std_dev})["upper"]': upper_values, f'BB({length},{std_dev})["lower"]': lower_values}, index=df.index)
    return bb_df

# Rolling VWAP
def do_calculate_rvwap(df: pd.DataFrame, length: int) -> pd.DataFrame:
    typical_price = (df['high'] + df['low'] + df['close']) / 3
    rolling_volume = df['volume'].rolling(length).sum()
    typical_price_x_volume = df["volume"] * typical_price
    rolling_typical_price_x_volume = typical_price_x_volume.rolling(length).sum()
    vwap = rolling_typical_price_x_volume / rolling_volume
    return pd.DataFrame({f"RVWAP({length})": vwap.values}, index=df.index)

# Average True Range
def do_calculate_atr(df, length):
    """Calculates Average True Range (ATR)."""
    tr1 = df["high"] - df["low"]
    tr2 = abs(df["high"] - df["close"].shift(1))
    tr3 = abs(df["low"] - df["close"].shift(1))
    true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    atr = true_range.rolling(window=length).mean()
    return pd.DataFrame({f"ATR({length})": atr}, index=df.index)

# ADX
def do_calculate_adx(df, length):
    """Calculates Average Directional Index (ADX) and Directional Movement Indicators."""
    high = df["high"]
    low = df["low"]
    close = df["close"]

    upmove = high - high.shift(1)
    downmove = low.shift(1) - low
    plus_dm = pd.Series(np.where((upmove > downmove) & (upmove > 0), upmove, 0))
    minus_dm = pd.Series(np.where((downmove > upmove) & (downmove > 0), downmove, 0))

    tr1 = high - low
    tr2 = abs(high - close.shift(1))
    tr3 = abs(df["low"] - df["close"].shift(1))
    true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)

    # Calculate +DI and -DI
    plus_di = 100 * (plus_dm.ewm(alpha=1 / length).mean() / true_range.ewm(alpha=1 / length).mean())
    minus_di = 100 * (minus_dm.ewm(alpha=1 / length).mean() / true_range.ewm(alpha=1 / length).mean())

    # Calculate DX
    dx = 100 * np.abs(plus_di - minus_di) / (plus_di + minus_di).replace(0, np.inf)

    # Calculate ADX
    adx = dx.ewm(alpha=1 / length).mean().fillna(0)

    return pd.DataFrame({f"ADX({length}):adx": adx, f"ADX({length}):pdi": plus_di, f"ADX({length})mdi": minus_di}, index=df.index)

# Commodity Channel Index
def do_calculate_cci(df, length):
    """Calculates Commodity Channel Index (CCI)."""
    typical_price = (df["high"] + df["low"] + df["close"]) / 3
    ma_typical_price = typical_price.rolling(window=length).mean()
    mean_deviation = pd.Series(abs(typical_price - ma_typical_price)).rolling(window=length).mean()
    cci = (typical_price - ma_typical_price) / (0.015 * mean_deviation)
    return pd.DataFrame({f"CCI({length})": cci})

# Chaikin Money Flow
def do_calculate_cmf(df, length):
    """Calculates Chaikin Money Flow (CMF)."""
    money_flow = ((df["close"] - df["low"]) - (df["high"] - df["close"])) / (df["high"] - df["low"]) * df["volume"]
    money_flow_volume = money_flow.rolling(window=length).sum()
    volume_sum = df["volume"].rolling(window=length).sum()
    cmf = money_flow_volume / volume_sum
    return pd.DataFrame({f"CMF({length})": cmf}, index=df.index)

# Aroon
def do_calculate_aroon(df, length):
    """Calculates Aroon Up and Aroon Down (incremental optimization)."""
    high = df["high"].values
    low = df["low"].values
    aroon_up = np.zeros(len(df))
    aroon_down = np.zeros(len(df))

    if len(df) < length:
        return pd.DataFrame({f"AROON({length}):up": aroon_up, f"AROON({length}):down": aroon_down}, index=df.index)

    highest_index = 0
    lowest_index = 0

    for i in range(length, len(df)):
        window_start = i - length

        # Update highest index
        if highest_index < window_start:  # If the previous highest is outside the window
            highest_index = window_start
            for j in range(window_start + 1, i):
                if high[j] > high[highest_index]:
                    highest_index = j
        elif high[i - 1] >= high[highest_index]:
            highest_index = i - 1

        # Update lowest index
        if lowest_index < window_start:  # If the previous lowest is outside the window
            lowest_index = window_start
            for j in range(window_start + 1, i):
                if low[j] < low[lowest_index]:
                    lowest_index = j
        elif low[i - 1] <= low[lowest_index]:
            lowest_index = i - 1

        aroon_up[i] = (length - (i - 1 - highest_index)) * 100.0 / length
        aroon_down[i] = (length - (i - 1 - lowest_index)) * 100.0 / length

    return pd.DataFrame({f"AROON({length}):up": aroon_up, f"AROON({length}):down": aroon_down}, index=df.index)

# MFI
def do_calculate_mfi(df, length):
    """Calculates Money Flow Index (MFI)."""
    typical_price = (df["high"] + df["low"] + df["close"]) / 3
    money_flow = typical_price * df["volume"]

    positive_money_flow = money_flow[df["close"] > df["close"].shift(1)]
    negative_money_flow = money_flow[df["close"] <= df["close"].shift(1)]

    positive_money_flow = positive_money_flow.rolling(window=length).sum()
    negative_money_flow = abs(negative_money_flow.rolling(window=length).sum())

    money_ratio = positive_money_flow / negative_money_flow
    mfi = 100 - (100 / (1 + money_ratio))
    return pd.DataFrame({f"MFI({length})": mfi}, index=df.index)

# Percent Rank
def do_calculate_pct_rank(df, length):
    """Calculates percentile rank using pandas only."""
    pct_rank = df['close'].rolling(window=length).apply(lambda x: (x < x[-1]).sum() / (len(x)-1) if len(x) > 1 else 0, raw=True)
    return pd.DataFrame({f"PCT({length})": pct_rank}, index=df.index)

# Price Range Percentage
def do_calculate_prp(df, length):
    """Calculates the Price Range Percentage."""
    high_max = df["high"].rolling(window=length).max()
    low_min = df["low"].rolling(window=length).min()
    range_width = high_max - low_min
    price_percentage = (df["close"] - low_min) / range_width * 100
    return pd.DataFrame({f"PRP({length})": price_percentage}, index=df.index)

# Log Return
def do_calculate_lret(df, length):
    """Calculates Log Return"""
    v = (df["close"] / df["close"].shift(length)).apply(lambda a: np.log(a) if a != np.nan else np.nan)
    return pd.DataFrame({f"LRET({length})": v}, index=df.index)

# Shift
def do_calculate_shift(df, series="close", n=1):
    name = "!unknown"
    if isinstance(series, str):
        name = series
        series = df[series]
    elif isinstance(series, pd.DataFrame):
        if(series.shape[1] != 1):
            raise ValueError(f"Shift() currently will operate on a single column only")
        name = series.columns[0]
        series = series.iloc[:, 0]
    elif isinstance(series, pd.Series):
        name = series.name
    return pd.DataFrame({f"Shift({name},{n})":series.shift(n)}, index=series.index)


In [15]:
# Basic candlestick pattern detection
def do_calculate_hammer(df: pd.DataFrame) -> pd.Series:
    """Detects Hammer candlestick pattern."""
    body = df['close'] - df['open']
    lower_wick = abs(df['low'] - min(df['open'], df['close']))
    upper_wick = abs(df['high'] - max(df['open'], df['close']))
    return (body < 0) & (lower_wick >= 2 * abs(body)) & (upper_wick <= abs(body) / 2)

def do_calculate_inverted_hammer(df: pd.DataFrame) -> pd.Series:
    """Detects Inverted Hammer candlestick pattern."""
    body = df['close'] - df['open']
    lower_wick = abs(df['low'] - min(df['open'], df['close']))
    upper_wick = abs(df['high'] - max(df['open'], df['close']))
    return (body > 0) & (upper_wick >= 2 * body) & (lower_wick <= body / 2)

def do_calculate_hanging_man(df: pd.DataFrame) -> pd.Series:
    """Detects Hanging Man candlestick pattern."""
    body = df['close'] - df['open']
    lower_wick = abs(df['low'] - min(df['open'], df['close']))
    upper_wick = abs(df['high'] - max(df['open'], df['close']))
    return (body < 0) & (lower_wick >= 2 * abs(body)) & (upper_wick <= abs(body) / 2) & (df['high'] > df['high'].shift(1))

def do_calculate_shooting_star(df: pd.DataFrame) -> pd.Series:
    """Detects Shooting Star candlestick pattern."""
    body = df['close'] - df['open']
    lower_wick = abs(df['low'] - min(df['open'], df['close']))
    upper_wick = abs(df['high'] - max(df['open'], df['close']))
    return (body > 0) & (upper_wick >= 2 * body) & (lower_wick <= body / 2) & (df['low'] < df['low'].shift(1))

def do_calculate_engulfing_bullish(df: pd.DataFrame) -> pd.Series:
    """Detects Bullish Engulfing candlestick pattern."""
    prev_body_len = abs(df['close'].shift(1) - df['open'].shift(1))
    current_body_len = abs(df['close'] - df['open'])
    return (df['open'].shift(1) > df['close'].shift(1)) & (df['open'] < df['close'].shift(1)) & (df['close'] > df['open'].shift(1)) & (current_body_len > prev_body_len)

def do_calculate_engulfing_bearish(df: pd.DataFrame) -> pd.Series:
    """Detects Bearish Engulfing candlestick pattern."""
    prev_body_len = abs(df['close'].shift(1) - df['open'].shift(1))
    current_body_len = abs(df['close'] - df['open'])
    return (df['open'].shift(1) < df['close'].shift(1)) & (df['open'] > df['close'].shift(1)) & (df['close'] < df['open'].shift(1)) & (current_body_len > prev_body_len)

def do_calculate_doji(df: pd.DataFrame) -> pd.Series:
    """Detects Doji candlestick pattern."""
    body = abs(df['close'] - df['open'])
    return body < 0.01 * (df['high'] - df['low']) # Adjust 0.01 for sensitivity

def do_calculate_morning_star(df: pd.DataFrame) -> pd.Series:
    """Detects Morning Star candlestick pattern (simplified)."""
    # Requires more sophisticated logic for accurate detection. This is a simplified version.
    return (df['close'].shift(2) < df['open'].shift(2)) & (df['close'].shift(1) < df['open'].shift(1)) & (df['close'] > df['open'])
def do_calculate_evening_star(df: pd.DataFrame) -> pd.Series:
    """Detects Evening Star candlestick pattern (simplified)."""
    # Requires more sophisticated logic for accurate detection. This is a simplified version.
    return (df['close'].shift(2) > df['open'].shift(2)) & (df['close'].shift(1) > df['open'].shift(1)) & (df['close'] < df['open'])


def do_calculate_three_white_soldiers(df: pd.DataFrame) -> pd.Series:
    """Detects Three White Soldiers candlestick pattern."""
    return (df['open'] > df['close'].shift(1)) & (df['close'] > df['open']) & (df['open'].shift(1) > df['close'].shift(2)) & (df['close'].shift(1) > df['open'].shift(2)) & (df['close'] > df['open'].shift(1))


def do_calculate_three_black_crows(df: pd.DataFrame) -> pd.Series:
    """Detects Three Black Crows candlestick pattern."""
    return (df['open'] < df['close'].shift(1)) & (df['close'] < df['open']) & (df['open'].shift(1) < df['close'].shift(2)) & (df['close'].shift(1) < df['open'].shift(2)) & (df['close'] < df['open'].shift(1))


def do_calculate_piercing_line(df: pd.DataFrame) -> pd.Series:
    """Detects Piercing Line candlestick pattern."""
    return (df['close'].shift(1) < df['open'].shift(1)) & (df['open'] < df['close'].shift(1)) & (df['close'] > df['open']) & (df['close'] > (df['open'].shift(1) + df['close'].shift(1)) / 2)


def do_calculate_dark_cloud_cover(df: pd.DataFrame) -> pd.Series:
    """Detects Dark Cloud Cover candlestick pattern."""
    return (df['close'].shift(1) > df['open'].shift(1)) & (df['open'] > df['close'].shift(1)) & (df['close'] < df['open']) & (df['close'] < (df['open'].shift(1) + df['close'].shift(1)) / 2)


def do_calculate_harami(df: pd.DataFrame) -> pd.Series:
    """Detects Harami candlestick pattern (both bullish and bearish)."""
    return ((df['open'].shift(1) > df['close'].shift(1)) & (df['open'] < df['close'].shift(1)) & (df['close'] > df['open']) & (df['high'] < df['high'].shift(1)) ) | ( (df['open'].shift(1) < df['close'].shift(1)) & (df['open'] > df['close'].shift(1)) & (df['close'] < df['open']) & (df['low'] > df['low'].shift(1)) )


def do_calculate_bullish_abandoned_baby(df:pd.DataFrame) -> pd.Series:
    """ Detects Bullish Abandoned Baby """
    return (df['close'].shift(1) < df['open'].shift(1)) & (df['high'].shift(1) > df['high']) & (df['low'].shift(1) < df['low']) & (df['open'] < df['close'].shift(1)) & (df['close'] > df['open'])

def do_calculate_bearish_abandoned_baby(df:pd.DataFrame) -> pd.Series:
    """ Detects Bearish Abandoned Baby """
    return (df['close'].shift(1) > df['open'].shift(1)) & (df['high'].shift(1) > df['high']) & (df['low'].shift(1) < df['low']) & (df['open'] > df['close'].shift(1)) & (df['close'] < df['open'])

"""
Important Considerations:

Simplified Logic: Many candlestick patterns have nuanced definitions. The functions above provide a basic detection, and more robust implementations might require more complex logic and potentially incorporating additional criteria (e.g., body size relative to the entire candle range, specific relationships between shadows and bodies).
Parameterization: You could enhance these functions by adding parameters to control sensitivity (e.g., minimum body size, maximum wick length).
Data Quality: The accuracy of pattern detection relies heavily on clean and reliable OHLCV data. Outliers or errors in your data can lead to false positives or negatives.
Context: Candlestick patterns are most meaningful when considered within a broader technical analysis context. Using these functions alone shouldn't be the sole basis for trading decisions.
Remember to thoroughly test these functions with your data and refine them based on your specific requirements and trading strategy. You may need to adjust thresholds or add more conditions to achieve optimal performance.
"""

def do_calculate_morning_doji_star(df: pd.DataFrame) -> pd.Series:
    """Detects Morning Doji Star pattern."""
    #Simplified version,  a more robust version would include checks on body sizes and wick lengths.
    return (df['close'].shift(1) < df['open'].shift(1)) & (abs(df['close'] - df['open']) < 0.01 * (df['high'] - df['low'])) & (df['close'] > df['open'].shift(1))


def do_calculate_evening_doji_star(df: pd.DataFrame) -> pd.Series:
    """Detects Evening Doji Star pattern."""
    #Simplified version, a more robust version would include checks on body sizes and wick lengths.
    return (df['close'].shift(1) > df['open'].shift(1)) & (abs(df['close'] - df['open']) < 0.01 * (df['high'] - df['low'])) & (df['close'] < df['open'].shift(1))


def do_calculate_spinning_top(df: pd.DataFrame) -> pd.Series:
    """Detects Spinning Top pattern (both bullish and bearish)."""
    body = abs(df['close'] - df['open'])
    total_range = df['high'] - df['low']
    return body < 0.1 * total_range # Adjust 0.1 for sensitivity


def do_calculate_long_legged_doji(df: pd.DataFrame) -> pd.Series:
    """Detects Long Legged Doji pattern."""
    body = abs(df['close'] - df['open'])
    upper_wick = df['high'] - max(df['open'], df['close'])
    lower_wick = min(df['open'], df['close']) - df['low']
    return (body < 0.1 * (upper_wick + lower_wick)) & (upper_wick > body) & (lower_wick > body) # Adjust 0.1 for sensitivity


def do_calculate_abandoned_baby(df: pd.DataFrame) -> pd.Series:
    """Detects Abandoned Baby pattern (both bullish and bearish). This is a simplified version."""
    #A more robust version would add checks on the size of the gaps and the relationship between the bodies and wicks.

    gap_above = df['open'].shift(1) > df['close'].shift(1)
    gap_below = df['open'] > df['close'].shift(1)
    return (gap_above & gap_below & (df['close'] > df['open'])) | (gap_above & gap_below & (df['close'] < df['open']))


def do_calculate_bullish_kicking_by(df: pd.DataFrame) -> pd.Series:
    """Detects Bullish Kicking-By pattern (simplified)."""
    return (df['open'].shift(1) > df['close'].shift(1)) & (df['open'] < df['close'].shift(1)) & (df['close'] > df['open']) & (df['close'] > df['high'].shift(1))


def do_calculate_bearish_kicking_by(df: pd.DataFrame) -> pd.Series:
    """Detects Bearish Kicking-By pattern (simplified)."""
    return (df['open'].shift(1) < df['close'].shift(1)) & (df['open'] > df['close'].shift(1)) & (df['close'] < df['open']) & (df['close'] < df['low'].shift(1))


"""
Important Notes:

Simplification: These functions represent simplified versions of the patterns. Real-world detection often needs more sophisticated criteria (e.g., precise relationships between body and wick lengths, consideration of volume, context within a trend).
Threshold Tuning: The numerical thresholds (like 0.1 or 0.01) are arbitrary. You'll likely need to fine-tune them based on your data and the desired sensitivity. Experiment to find optimal values.
Completeness: Candlestick patterns are complex, and many variations exist. These functions cover some common patterns, but not all.
False Positives/Negatives: Be aware that even with refined criteria, some false positives and negatives are possible. Always validate the pattern identification visually and within the context of your overall trading strategy.
Remember to carefully test these functions and adjust parameters to suit your needs before using them in any trading system. Always use caution and consider other factors when making investment decisions.
"""


def do_calculate_engulfing_pattern(df: pd.DataFrame) -> pd.Series:
    """
    Detects both bullish and bearish engulfing patterns.  A more robust version would check for complete engulfment.
    """
    prev_body_size = abs(df['close'].shift(1) - df['open'].shift(1))
    current_body_size = abs(df['close'] - df['open'])
    bullish = (df['open'].shift(1) > df['close'].shift(1)) & (df['open'] < df['close'].shift(1)) & (df['close'] > df['open'].shift(1)) & (current_body_size > prev_body_size)
    bearish = (df['open'].shift(1) < df['close'].shift(1)) & (df['open'] > df['close'].shift(1)) & (df['close'] < df['open'].shift(1)) & (current_body_size > prev_body_size)
    return bullish | bearish


def do_calculate_inside_day(df: pd.DataFrame) -> pd.Series:
    """Detects an Inside Day pattern."""
    return (df['high'] < df['high'].shift(1)) & (df['low'] > df['low'].shift(1))


def do_calculate_outside_day(df: pd.DataFrame) -> pd.Series:
    """Detects an Outside Day pattern."""
    return (df['high'] > df['high'].shift(1)) & (df['low'] < df['low'].shift(1))


def do_calculate_three_inside_up(df: pd.DataFrame) -> pd.Series:
    """Detects a Three Inside Up pattern (simplified)."""
    return (df['high'] < df['high'].shift(1)) & (df['low'] > df['low'].shift(1)) & (df['high'].shift(1) < df['high'].shift(2)) & (df['low'].shift(1) > df['low'].shift(2)) & (df['close'] > df['open'])


def do_calculate_three_inside_down(df: pd.DataFrame) -> pd.Series:
    """Detects a Three Inside Down pattern (simplified)."""
    return (df['high'] < df['high'].shift(1)) & (df['low'] > df['low'].shift(1)) & (df['high'].shift(1) < df['high'].shift(2)) & (df['low'].shift(1) > df['low'].shift(2)) & (df['close'] < df['open'])


def do_calculate_two_crows(df: pd.DataFrame) -> pd.Series:
    """Detects a Two Crows pattern (simplified)."""
    return (df['open'] > df['close'].shift(1)) & (df['close'] < df['open']) & (df['open'].shift(1) > df['close'].shift(2)) & (df['close'].shift(1) < df['open'].shift(2)) & (df['open'] < df['open'].shift(1)) & (df['close'] < df['close'].shift(1))


def do_calculate_on_neckline(df: pd.DataFrame) -> pd.Series:
  """Detects a On-Neckline pattern (simplified, requires additional inputs for neckline calculation)."""
  #This function needs additional logic to define the neckline.  This is a placeholder.
  #You would need to add a function or method to determine the neckline price level.
  neckline = 150 #Replace with actual neckline calculation.
  return df['close'] > neckline


def do_calculate_breakaway(df: pd.DataFrame) -> pd.Series:
    """Detects a Breakaway pattern (simplified). Requires additional logic for range breakout calculation."""
    # This is a placeholder; a real implementation would need to dynamically calculate the breakout range.
    high = df['high'].max()
    low = df['low'].min()
    return df['close'] > high  # Or df['close'] < low, depending on the direction of the breakout


"""
Crucial Considerations:

Pattern Complexity: Many of these patterns (especially those involving multiple candles) require more intricate logic than what's shown here for truly reliable detection. The simplified versions presented above might produce false positives or negatives.
Contextual Analysis: Candlestick patterns gain significance when considered within the broader market context (trends, volume, support/resistance levels). Don't rely solely on pattern identification for trading decisions.
Parameterization: Consider adding parameters to tune the sensitivity of the detection (e.g., minimum body size, percentage changes, etc.).
Visual Verification: Always visually inspect the charts to confirm the patterns detected by your functions. Automated detection alone isn't enough for robust trading strategies.
These enhanced functions provide a more comprehensive, though still simplified, approach to candlestick pattern recognition. Remember to thoroughly test and validate them before using them in any trading system. Always prioritize risk management and diversify your investment strategies.
"""

def do_calculate_bullish_engulfing(df: pd.DataFrame) -> pd.Series:
    """Detects a bullish engulfing pattern with more robust criteria."""
    prev_close = df['close'].shift(1)
    prev_open = df['open'].shift(1)
    current_close = df['close']
    current_open = df['open']

    #More robust checks:
    is_bullish_engulfing = (prev_close < prev_open) & (current_open < prev_close) & (current_close > prev_open) & (current_close > current_open)

    return is_bullish_engulfing


def do_calculate_bearish_engulfing(df: pd.DataFrame) -> pd.Series:
    """Detects a bearish engulfing pattern with more robust criteria."""
    prev_close = df['close'].shift(1)
    prev_open = df['open'].shift(1)
    current_close = df['close']
    current_open = df['open']

    #More robust checks:
    is_bearish_engulfing = (prev_close > prev_open) & (current_open > prev_close) & (current_close < prev_open) & (current_close < current_open)

    return is_bearish_engulfing


def do_calculate_morning_star(df: pd.DataFrame) -> pd.Series:
    """Detects a morning star pattern (more robust version)."""
    # Needs more sophisticated logic for reliable detection. This is still a simplified version.
    prev2_close = df['close'].shift(2)
    prev2_open = df['open'].shift(2)
    prev_close = df['close'].shift(1)
    prev_open = df['open'].shift(1)
    current_close = df['close']
    current_open = df['open']

    is_morning_star = (prev2_close < prev2_open) & (prev_close < prev_open) & (current_close > current_open) & (current_open < prev_close)

    return is_morning_star


def do_calculate_evening_star(df: pd.DataFrame) -> pd.Series:
    """Detects an evening star pattern (more robust version)."""
    # Needs more sophisticated logic for reliable detection. This is still a simplified version.
    prev2_close = df['close'].shift(2)
    prev2_open = df['open'].shift(2)
    prev_close = df['close'].shift(1)
    prev_open = df['open'].shift(1)
    current_close = df['close']
    current_open = df['open']

    is_evening_star = (prev2_close > prev2_open) & (prev_close > prev_open) & (current_close < current_open) & (current_open > prev_close)

    return is_evening_star


def do_calculate_piercing_line(df: pd.DataFrame) -> pd.Series:
    """Detects a piercing line pattern (more robust version)."""
    prev_close = df['close'].shift(1)
    prev_open = df['open'].shift(1)
    current_close = df['close']
    current_open = df['open']
    midpoint = (prev_open + prev_close) / 2

    is_piercing_line = (prev_close < prev_open) & (current_open < prev_close) & (current_close > midpoint) & (current_close > current_open)

    return is_piercing_line



def do_calculate_dark_cloud_cover(df: pd.DataFrame) -> pd.Series:
    """Detects a dark cloud cover pattern (more robust version)."""
    prev_close = df['close'].shift(1)
    prev_open = df['open'].shift(1)
    current_close = df['close']
    current_open = df['open']
    midpoint = (prev_open + prev_close) / 2

    is_dark_cloud_cover = (prev_close > prev_open) & (current_open > prev_close) & (current_close < midpoint) & (current_close < current_open)

    return is_dark_cloud_cover

"""
Enhancements:

More Precise Conditions: These functions incorporate stricter criteria to reduce false positives. For example, the engulfing patterns now explicitly check if the current candle's body completely engulfs the previous candle's body (though this could be further refined). The Morning/Evening Star and Piercing/Dark Cloud Cover patterns now include midpoint checks to ensure a stronger signal.
Clarity and Readability: The code is restructured for improved readability, using more descriptive variable names.
Important Reminder: Even with these improvements, candlestick pattern recognition is not foolproof. Always cross-reference your results with other technical indicators and fundamental analysis before making any investment decisions. These functions should be part of a comprehensive trading strategy, not the sole basis for your trades.
"""

def do_calculate_hammer(df: pd.DataFrame, hammer_body_factor: float = 0.2, hammer_wick_factor: float = 2.0) -> pd.Series:
    """
    Detects Hammer candlestick pattern with customizable parameters.

    Args:
        df: DataFrame with OHLC data.
        hammer_body_factor: Factor determining the maximum body size relative to the lower wick.
        hammer_wick_factor: Factor determining the minimum lower wick length relative to the body size.

    Returns:
        pandas.Series: Boolean Series indicating hammer patterns.
    """
    body = abs(df['close'] - df['open'])
    lower_wick = df['close'] - df['low'] if df['close'] < df['open'] else df['open'] - df['low']
    upper_wick = df['high'] - max(df['open'], df['close'])

    # Check conditions for the hammer pattern
    is_hammer = (body > 0) & (lower_wick >= hammer_wick_factor * body) & (upper_wick <= hammer_body_factor * body) & (df['close'] > df['open'])


    return is_hammer

def do_calculate_inverted_hammer(df: pd.DataFrame, inverted_hammer_body_factor: float = 0.2, inverted_hammer_wick_factor: float = 2.0) -> pd.Series:
    """
    Detects Inverted Hammer candlestick pattern with customizable parameters.

    Args:
        df: DataFrame with OHLC data.
        inverted_hammer_body_factor: Factor determining the maximum body size relative to the upper wick.
        inverted_hammer_wick_factor: Factor determining the minimum upper wick length relative to the body size.

    Returns:
        pandas.Series: Boolean Series indicating inverted hammer patterns.
    """
    body = abs(df['close'] - df['open'])
    lower_wick = min(df['open'],df['close']) - df['low']
    upper_wick = df['high'] - min(df['open'], df['close'])

    # Check conditions for the inverted hammer pattern
    is_inverted_hammer = (body > 0) & (upper_wick >= inverted_hammer_wick_factor * body) & (lower_wick <= inverted_hammer_body_factor * body) & (df['close'] < df['open'])

    return is_inverted_hammer



def do_calculate_three_line_strike(df: pd.DataFrame, gap_threshold: float = 0.01) -> pd.Series:
    """
    Detects Three Line Strike pattern (bullish).  Requires significant upward gaps.

    Args:
        df: DataFrame with OHLC data.
        gap_threshold: Minimum percentage gap between consecutive candles.

    Returns:
        pandas.Series: Boolean Series indicating Three Line Strike patterns.
    """
    
    #Check for three consecutive days with significant upward gaps
    is_three_line_strike = (df['open'] > df['close'].shift(1) * (1 + gap_threshold)) & \
                           (df['open'].shift(1) > df['close'].shift(2) * (1 + gap_threshold)) & \
                           (df['open'].shift(2) > df['close'].shift(3) * (1 + gap_threshold)) & \
                           (df['close'] > df['open']) & (df['close'].shift(1) > df['open'].shift(1)) & (df['close'].shift(2) > df['open'].shift(2))

    return is_three_line_strike


def do_calculate_three_black_crows(df: pd.DataFrame, body_threshold: float = 0.2) -> pd.Series:
    """
    Detects Three Black Crows pattern (bearish). Each candle should have significant body.

    Args:
        df: DataFrame with OHLC data.
        body_threshold: Minimum body size as a fraction of the candle range

    Returns:
        pandas.Series: Boolean Series indicating Three Black Crows patterns
    """

    #Check for three consecutive bearish candles with significant bodies.
    is_three_black_crows = (df['close'] < df['open']) & (df['close'].shift(1) < df['open'].shift(1)) & (df['close'].shift(2) < df['open'].shift(2)) & \
                           (abs(df['close'] - df['open']) > body_threshold * (df['high'] - df['low'])) & \
                           (abs(df['close'].shift(1) - df['open'].shift(1)) > body_threshold * (df['high'].shift(1) - df['low'].shift(1))) & \
                           (abs(df['close'].shift(2) - df['open'].shift(2)) > body_threshold * (df['high'].shift(2) - df['low'].shift(2)))

    return is_three_black_crows

"""
Important Improvements:

Parameterization: The functions now accept parameters to adjust sensitivity (e.g., hammer_body_factor, gap_threshold). This allows you to fine-tune the pattern recognition to your specific needs and data characteristics.
More Robust Logic: The pattern detection logic has been made more robust by including additional checks and conditions, making the pattern identification more reliable.
Clearer Variable Names: Variable names have been made more descriptive for enhanced readability and maintainability.
Remember to carefully test these functions with your data and adjust the parameters to achieve optimal results. Visual inspection of the charts remains crucial for confirming the patterns identified by the functions. Do not rely solely on automated pattern recognition for making trading decisions.
"""

def do_calculate_gravestone_doji(df: pd.DataFrame, upper_wick_threshold: float = 0.7) -> pd.Series:
    """
    Detects Gravestone Doji pattern.

    Args:
        df: DataFrame with OHLC data.
        upper_wick_threshold: Minimum ratio of upper wick length to total candle range.

    Returns:
        pandas.Series: Boolean Series indicating Gravestone Doji patterns.
    """
    body = abs(df['close'] - df['open'])
    total_range = df['high'] - df['low']
    upper_wick = df['high'] - max(df['open'], df['close'])

    is_gravestone_doji = (body < 0.1 * total_range) & (upper_wick > upper_wick_threshold * total_range)

    return is_gravestone_doji


def do_calculate_dragonfly_doji(df: pd.DataFrame, lower_wick_threshold: float = 0.7) -> pd.Series:
    """
    Detects Dragonfly Doji pattern.

    Args:
        df: DataFrame with OHLC data.
        lower_wick_threshold: Minimum ratio of lower wick length to total candle range.

    Returns:
        pandas.Series: Boolean Series indicating Dragonfly Doji patterns.
    """
    body = abs(df['close'] - df['open'])
    total_range = df['high'] - df['low']
    lower_wick = min(df['open'], df['close']) - df['low']

    is_dragonfly_doji = (body < 0.1 * total_range) & (lower_wick > lower_wick_threshold * total_range)

    return is_dragonfly_doji



def do_calculate_bullish_harami(df: pd.DataFrame, body_ratio_threshold:float = 0.5) -> pd.Series:
    """
    Detects Bullish Harami pattern.

    Args:
        df: DataFrame with OHLC data.
        body_ratio_threshold: Minimum ratio of the second candle's body size to the first candle's body size.

    Returns:
        pandas.Series: Boolean Series indicating Bullish Harami patterns.
    """
    prev_body = abs(df['close'].shift(1) - df['open'].shift(1))
    current_body = abs(df['close'] - df['open'])
    
    is_bullish_harami = (df['open'].shift(1) > df['close'].shift(1)) & (df['open'] < df['close'].shift(1)) & (df['close'] > df['open']) & (current_body < body_ratio_threshold * prev_body)

    return is_bullish_harami


def do_calculate_bearish_harami(df: pd.DataFrame, body_ratio_threshold: float = 0.5) -> pd.Series:
    """
    Detects Bearish Harami pattern.

    Args:
        df: DataFrame with OHLC data.
        body_ratio_threshold: Minimum ratio of the second candle's body size to the first candle's body size.

    Returns:
        pandas.Series: Boolean Series indicating Bearish Harami patterns.
    """
    prev_body = abs(df['close'].shift(1) - df['open'].shift(1))
    current_body = abs(df['close'] - df['open'])

    is_bearish_harami = (df['open'].shift(1) < df['close'].shift(1)) & (df['open'] > df['close'].shift(1)) & (df['close'] < df['open']) & (current_body < body_ratio_threshold * prev_body)

    return is_bearish_harami

"""
Key Improvements:

Parameterization: The functions now include parameters to control the sensitivity of the pattern detection (e.g., upper_wick_threshold, body_ratio_threshold). This allows for customization based on your specific needs and data characteristics.
Clearer Logic: The code is structured for better readability and maintainability.
More Robust Checks: The conditions for pattern detection are more refined, reducing the likelihood of false positives.
Remember to always visually inspect the charts to confirm the patterns detected by these functions and use caution in interpreting the results. These functions provide an automated method, but should be used within a wider context of technical analysis. Do not use them solely as the basis of your trading decisions.
"""

def do_calculate_long_legged_doji(df: pd.DataFrame, wick_body_ratio: float = 2.0) -> pd.Series:
    """
    Detects Long Legged Doji pattern.

    Args:
        df: DataFrame with OHLC data.
        wick_body_ratio: Minimum ratio of the sum of upper and lower wicks to the body size.

    Returns:
        pandas.Series: Boolean Series indicating Long Legged Doji patterns.
    """
    body = abs(df['close'] - df['open'])
    upper_wick = df['high'] - max(df['open'], df['close'])
    lower_wick = min(df['open'], df['close']) - df['low']

    is_long_legged_doji = (body > 0) & ((upper_wick + lower_wick) >= wick_body_ratio * body)

    return is_long_legged_doji


def do_calculate_spinning_top(df: pd.DataFrame, body_range_ratio: float = 0.1) -> pd.Series:
    """
    Detects Spinning Top pattern.

    Args:
        df: DataFrame with OHLC data.
        body_range_ratio: Maximum ratio of body size to the total candle range (high - low).

    Returns:
        pandas.Series: Boolean Series indicating Spinning Top patterns.
    """
    body = abs(df['close'] - df['open'])
    total_range = df['high'] - df['low']

    is_spinning_top = (body > 0) & (body <= body_range_ratio * total_range)

    return is_spinning_top


def do_calculate_abandoned_baby(df: pd.DataFrame, gap_threshold: float = 0.01) -> pd.Series:
    """
    Detects Abandoned Baby pattern (both bullish and bearish). This is a simplified version, gap_threshold needs to be tuned carefully.

    Args:
        df: DataFrame with OHLC data.
        gap_threshold: Minimum gap (as a percentage of the previous candle's range) required between candles.

    Returns:
        pandas.Series: Boolean Series indicating Abandoned Baby patterns (True for both bullish and bearish).
    """
    prev_high = df['high'].shift(1)
    prev_low = df['low'].shift(1)
    prev_range = prev_high - prev_low
    
    bullish = (df['open'] < df['close'].shift(1)) & (df['close'] > df['open']) & (df['low'] > prev_high) & (df['high'] < prev_low + prev_range * (1 + gap_threshold))
    bearish = (df['open'] > df['close'].shift(1)) & (df['close'] < df['open']) & (df['high'] < prev_low) & (df['low'] > prev_high - prev_range * (1 + gap_threshold))

    return bullish | bearish


def do_calculate_bullish_kicking_by(df: pd.DataFrame, gap_threshold: float = 0.02) -> pd.Series:
  """
  Detects Bullish Kicking-By pattern. This is a simplified version.
  """
  prev_close = df['close'].shift(1)
  current_open = df['open']
  current_close = df['close']
  
  is_bullish_kicking_by = (current_open < prev_close) & (current_close > prev_close) & (current_close > current_open)

  return is_bullish_kicking_by


def do_calculate_bearish_kicking_by(df: pd.DataFrame, gap_threshold: float = 0.02) -> pd.Series:
  """
  Detects Bearish Kicking-By pattern. This is a simplified version.
  """
  prev_close = df['close'].shift(1)
  current_open = df['open']
  current_close = df['close']
  
  is_bearish_kicking_by = (current_open > prev_close) & (current_close < prev_close) & (current_close < current_open)

  return is_bearish_kicking_by


"""
Enhancements:

More Parameters: Added parameters for better tunability and control over the pattern detection sensitivity.
Improved Logic: Refined the logic for certain patterns (e.g., abandoned_baby) to be more precise. However, even these improved versions are simplifications of complex patterns.
Readability: Improved variable names and code structuring for better clarity.
Remember to carefully evaluate and test these functions with your data. Adjust parameters as needed to fit your strategy and risk tolerance. Always visually inspect charts to confirm pattern identification and use these functions as one component of a more comprehensive technical analysis approach, not the sole determinant of trading actions.
"""

def do_calculate_above_the_stomach(df: pd.DataFrame) -> pd.Series:
    """
    Detects "Above the Stomach" pattern (requires definition and parameters).  This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_advance_block(df: pd.DataFrame) -> pd.Series:
    """
    Detects "Advance Block" pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_below_the_stomach(df: pd.DataFrame) -> pd.Series:
    """
    Detects "Below the Stomach" pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_concealing_baby_swallow(df: pd.DataFrame) -> pd.Series:
    """
    Detects Concealing Baby Swallow pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_deliberation(df: pd.DataFrame) -> pd.Series:
    """
    Detects Deliberation pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_downside_gap_three_methods(df: pd.DataFrame) -> pd.Series:
    """
    Detects Downside Gap Three Methods pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_downside_tasuki_gap(df: pd.DataFrame) -> pd.Series:
    """
    Detects Downside Tasuki Gap pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_event_patterns(df: pd.DataFrame) -> pd.Series:
    """
    Detects Event Patterns (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.  This would likely involve external data.
    return pd.Series(False, index=df.index)


def do_calculate_falling_3_methods(df: pd.DataFrame) -> pd.Series:
    """
    Detects Falling 3 Methods pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_falling_window(df: pd.DataFrame) -> pd.Series:
    """
    Detects Falling Window pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_hikkake(df: pd.DataFrame) -> pd.Series:
    """
    Detects Hikkake pattern (requires definition and parameters). This is a placeholder.  This needs a robust definition for both bullish and bearish.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_homing_pigeon(df: pd.DataFrame) -> pd.Series:
    """
    Detects Homing Pigeon pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_identical_three_crows(df: pd.DataFrame) -> pd.Series:
    """
    Detects Identical Three Crows pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_in_neck(df: pd.DataFrame) -> pd.Series:
    """
    Detects In Neck pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



def do_calculate_ladder_bottom(df: pd.DataFrame) -> pd.Series:
    """
    Detects Ladder Bottom pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_last_engulfing_bottom(df: pd.DataFrame) -> pd.Series:
    """
    Detects Last Engulfing Bottom pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_last_engulfing_top(df: pd.DataFrame) -> pd.Series:
    """
    Detects Last Engulfing Top pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_matching_low(df: pd.DataFrame) -> pd.Series:
    """
    Detects Matching Low pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_mat_hold(df: pd.DataFrame) -> pd.Series:
    """
    Detects Mat Hold pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_on_neck(df: pd.DataFrame) -> pd.Series:
    """
    Detects On Neck pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_rickshaw_man(df: pd.DataFrame) -> pd.Series:
    """
    Detects Rickshaw Man pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_rising_3_methods(df: pd.DataFrame) -> pd.Series:
    """
    Detects Rising 3 Methods pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_rising_window(df: pd.DataFrame) -> pd.Series:
    """
    Detects Rising Window pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_stick_sandwich(df: pd.DataFrame) -> pd.Series:
    """
    Detects Stick Sandwich pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_takuri_line(df: pd.DataFrame) -> pd.Series:
    """
    Detects Takuri Line pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_three_outside_down(df: pd.DataFrame) -> pd.Series:
    """
    Detects Three Outside Down pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_three_outside_up(df: pd.DataFrame) -> pd.Series:
    """
    Detects Three Outside Up pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_three_stars_in_the_south(df: pd.DataFrame) -> pd.Series:
    """
    Detects Three Stars in the South pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_thrusting(df: pd.DataFrame) -> pd.Series:
    """
    Detects Thrusting pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_tweezers_bottom(df: pd.DataFrame) -> pd.Series:
    """
    Detects Tweezers Bottom pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_tweezers_top(df: pd.DataFrame) -> pd.Series:
    """
    Detects Tweezers Top pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_two_black_gapping(df: pd.DataFrame) -> pd.Series:
    """
    Detects Two Black Gapping pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_upside_gap_three_methods(df: pd.DataFrame) -> pd.Series:
    """
    Detects Upside Gap Three Methods pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_upside_gap_two_crows(df: pd.DataFrame) -> pd.Series:
    """
    Detects Upside Gap Two Crows pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_upside_tasuki_gap(df: pd.DataFrame) -> pd.Series:
    """
    Detects Upside Tasuki Gap pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)


def do_calculate_unique_three_river_bottom(df: pd.DataFrame) -> pd.Series:
    """
    Detects Unique Three River Bottom pattern (requires definition and parameters). This is a placeholder.
    """
    # Needs a proper definition and parameters to be implemented.
    return pd.Series(False, index=df.index)



"""
Explanation:

Many of the candlestick patterns listed require more complex logic and often rely on subjective interpretation. The placeholders above highlight this. To create functional implementations, you would need to research the precise definition of each pattern and then translate that definition into robust Python code that accurately identifies the patterns within your OHLCV data. This would likely involve many additional parameters to control sensitivity and handle variations in the pattern formations. Furthermore, some patterns may require context beyond a single candle or two and thus need to incorporate analysis of prior candle formations.
"""

def do_calculate_bearish_three_line_strike(df: pd.DataFrame, gap_threshold: float = 0.02) -> pd.Series:
    """
    Detects Bearish Three Line Strike pattern.

    Args:
        df: DataFrame with OHLC data.
        gap_threshold: Minimum percentage gap between consecutive open and previous close.

    Returns:
        pandas.Series: Boolean Series indicating Bearish Three Line Strike patterns.
    """
    is_bearish_three_line_strike = (df['open'] < df['close'].shift(1) * (1 - gap_threshold)) & \
                                   (df['open'].shift(1) < df['close'].shift(2) * (1 - gap_threshold)) & \
                                   (df['open'].shift(2) < df['close'].shift(3) * (1 - gap_threshold)) & \
                                   (df['close'] < df['open']) & (df['close'].shift(1) < df['open'].shift(1)) & (df['close'].shift(2) < df['open'].shift(2))

    return is_bearish_three_line_strike


def do_calculate_bullish_three_line_strike(df: pd.DataFrame, gap_threshold: float = 0.02) -> pd.Series:
    """
    Detects Bullish Three Line Strike pattern.

    Args:
        df: DataFrame with OHLC data.
        gap_threshold: Minimum percentage gap between consecutive open and previous close.

    Returns:
        pandas.Series: Boolean Series indicating Bullish Three Line Strike patterns.
    """
    is_bullish_three_line_strike = (df['open'] > df['close'].shift(1) * (1 + gap_threshold)) & \
                                   (df['open'].shift(1) > df['close'].shift(2) * (1 + gap_threshold)) & \
                                   (df['open'].shift(2) > df['close'].shift(3) * (1 + gap_threshold)) & \
                                   (df['close'] > df['open']) & (df['close'].shift(1) > df['open'].shift(1)) & (df['close'].shift(2) > df['open'].shift(2))

    return is_bullish_three_line_strike


def do_calculate_three_black_crows(df: pd.DataFrame, body_threshold: float = 0.2) -> pd.Series:
    """
    Detects Three Black Crows pattern.

    Args:
        df: DataFrame with OHLC data.
        body_threshold: Minimum ratio of body size to candle range for each of the three candles.

    Returns:
        pandas.Series: Boolean Series indicating Three Black Crows patterns.
    """
    is_three_black_crows = (df['close'] < df['open']) & (df['close'].shift(1) < df['open'].shift(1)) & (df['close'].shift(2) < df['open'].shift(2)) & \
                           (abs(df['close'] - df['open']) > body_threshold * (df['high'] - df['low'])) & \
                           (abs(df['close'].shift(1) - df['open'].shift(1)) > body_threshold * (df['high'].shift(1) - df['low'].shift(1))) & \
                           (abs(df['close'].shift(2) - df['open'].shift(2)) > body_threshold * (df['high'].shift(2) - df['low'].shift(2)))

    return is_three_black_crows



def do_calculate_evening_star(df: pd.DataFrame, body_threshold: float = 0.1) -> pd.Series:
    """
    Detects Evening Star pattern (simplified).

    Args:
        df: DataFrame with OHLC data.
        body_threshold: Minimum body size ratio compared to the previous candle's body size.

    Returns:
        pandas.Series: Boolean Series indicating Evening Star patterns.
    """
    is_evening_star = (df['close'].shift(2) > df['open'].shift(2)) & (df['close'].shift(1) > df['open'].shift(1)) & (df['close'] < df['open']) & (abs(df['close'] - df['open']) > body_threshold * abs(df['close'].shift(1) - df['open'].shift(1)) )

    return is_evening_star


def do_calculate_upside_tasuki_gap(df: pd.DataFrame, gap_threshold: float = 0.01) -> pd.Series:
    """
    Detects Upside Tasuki Gap pattern (simplified).

    Args:
        df: DataFrame with OHLC data.
        gap_threshold: Minimum gap size as percentage of the previous candle's range.

    Returns:
        pandas.Series: Boolean Series indicating Upside Tasuki Gap patterns.
    """
    is_upside_tasuki_gap = (df['open'] > df['close'].shift(1)) & (df['close'] < df['open']) & (df['open'] - df['close'].shift(1) > gap_threshold * (df['high'].shift(1) - df['low'].shift(1)))

    return is_upside_tasuki_gap


def do_calculate_inverted_hammer(df: pd.DataFrame, wick_body_ratio: float = 2.0) -> pd.Series:
    """
    Detects Inverted Hammer pattern.

    Args:
        df: DataFrame with OHLC data.
        wick_body_ratio: Minimum ratio of upper wick length to body size.

    Returns:
        pandas.Series: Boolean Series indicating Inverted Hammer patterns.
    """
    body = abs(df['close'] - df['open'])
    upper_wick = df['high'] - max(df['open'], df['close'])
    lower_wick = min(df['open'], df['close']) - df['low']

    is_inverted_hammer = (body > 0) & (upper_wick >= wick_body_ratio * body) & (lower_wick <= body / 2)

    return is_inverted_hammer



def do_calculate_matching_low(df: pd.DataFrame, low_diff_threshold: float = 0.001) -> pd.Series:
    """
    Detects Matching Low pattern.

    Args:
      df: DataFrame with OHLC data.
      low_diff_threshold: Maximum difference (as fraction) between current and previous lows for a match.

    Returns:
      pandas.Series: Boolean Series indicating Matching Low patterns.
    """

    is_matching_low = (abs(df['low'] - df['low'].shift(1)) <= low_diff_threshold * df['low'])

    return is_matching_low



def do_calculate_bullish_abandoned_baby(df: pd.DataFrame, gap_threshold: float = 0.01) -> pd.Series:
    """
    Detects Bullish Abandoned Baby pattern (simplified).

    Args:
        df: DataFrame with OHLC data.
        gap_threshold: Minimum gap as percentage of the previous candle's range.

    Returns:
        pandas.Series: Boolean Series indicating Bullish Abandoned Baby patterns.
    """
    prev_high = df['high'].shift(1)
    prev_low = df['low'].shift(1)
    prev_range = prev_high - prev_low

    is_bullish_abandoned_baby = (df['open'] < df['close'].shift(1)) & (df['close'] > df['open']) & (df['low'] > prev_high) & (df['high'] < prev_low + prev_range * (1 + gap_threshold))

    return is_bullish_abandoned_baby


def do_calculate_two_black_gapping(df: pd.DataFrame, gap_threshold: float = 0.01) -> pd.Series:
    """
    Detects Two Black Gapping pattern (simplified).

    Args:
        df: DataFrame with OHLC data.
        gap_threshold: Minimum gap size as a percentage of the previous candle's range.

    Returns:
        pandas.Series: Boolean Series indicating Two Black Gapping patterns.
    """
    is_two_black_gapping = (df['open'] < df['close'].shift(1)) & (df['close'] < df['open']) & (df['open'].shift(1) < df['close'].shift(2)) & (df['close'].shift(1) < df['open'].shift(1)) & (df['open'] - df['close'].shift(1) > gap_threshold * (df['high'].shift(1) - df['low'].shift(1)))

    return is_two_black_gapping



def do_calculate_bearish_breakaway(df: pd.DataFrame, range_threshold: float = 0.03) -> pd.Series:
    """
    Detects Bearish Breakaway pattern (simplified).

    Args:
        df: DataFrame with OHLC data.
        range_threshold: Minimum percentage move beyond the previous range.

    Returns:
        pandas.Series: Boolean Series indicating Bearish Breakaway patterns.
    """
    prev_high = df['high'].shift(1)
    prev_low = df['low'].shift(1)
    prev_range = prev_high - prev_low

    is_bearish_breakaway = (df['close'] < prev_low - range_threshold * prev_range)

    return is_bearish_breakaway

"""
Important Notes:

Simplifications: These functions are simplified representations of the patterns. Real-world identification often needs more complex logic and potentially additional conditions. Fine-tuning parameters will be critical for accurate pattern detection in your specific data.
Parameter Tuning: Experiment with different values for parameters (gap_threshold, body_threshold, wick_body_ratio, etc.) to optimize performance for your data.
Visual Verification: Always visually confirm the patterns identified by the functions. Automated detection can produce false positives or negatives.
Context Matters: Candlestick patterns are most useful in conjunction with other technical indicators and analysis. Do not base trading decisions solely on automated pattern recognition.
Remember to thoroughly test these functions with your own data and refine them as needed. These functions are provided as a starting point; you will likely need to further adapt them for optimal performance within your trading strategy.
"""





'\nImportant Notes:\n\nSimplifications: These functions are simplified representations of the patterns. Real-world identification often needs more complex logic and potentially additional conditions. Fine-tuning parameters will be critical for accurate pattern detection in your specific data.\nParameter Tuning: Experiment with different values for parameters (gap_threshold, body_threshold, wick_body_ratio, etc.) to optimize performance for your data.\nVisual Verification: Always visually confirm the patterns identified by the functions. Automated detection can produce false positives or negatives.\nContext Matters: Candlestick patterns are most useful in conjunction with other technical indicators and analysis. Do not base trading decisions solely on automated pattern recognition.\nRemember to thoroughly test these functions with your own data and refine them as needed. These functions are provided as a starting point; you will likely need to further adapt them for optimal performance 

In [16]:
class OptimizerDefinition:
    def __init__(self, name: str, parameters: typing.Dict[str, "ParameterType"], optimization_function, factory=None): 
        if not isinstance(name, str):
            raise TypeError("name must be a string")

        if not isinstance(parameters, dict):
            raise TypeError("parameters must be a dictionary")

        if not all(isinstance(param, ParameterType) for param in parameters.values()):
            raise TypeError("All values in parameters must be ParameterType objects")

        if len(set(parameters.keys())) != len(parameters.keys()): # Check for duplicate keys
            raise ValueError("Parameter names must be unique.")

        if not callable(calculation_function):
            raise TypeError("calculation_function must be callable")

        self.name = name
        self.parameters = parameters
        self.calculation_function = calculation_function
        self.factory = factory

    def create_optimizer(self, **kwargs: typing.Any):
        params = {}
        for name, param_def in self.parameters.items():
            value = kwargs.get(name)

            if value is None:
                value = param_def.get_default()

            if param_def.data_type == "integer" and not isinstance(value, int):
                raise TypeError(f"Value for parameter '{name}' must be an integer")
            elif param_def.data_type == "real" and not isinstance(value, (int, float)):
                raise TypeError(f"Value for parameter '{name}' must be a number")
            elif param_def.data_type == "boolean" and not isinstance(value, bool):
                raise TypeError(f"Value for parameter '{name}' must be a boolean")
            elif param_def.data_type == "string" and not isinstance(value, str):
                raise TypeError(f"Value for parameter '{name}' must be a string")
            elif param_def.data_type in ("integer", "real"):
                if param_def.min_val is not None and value < param_def.min_val:  # Check min_val
                    raise ValueError(f"Value for parameter '{name}' must be greater than or equal to {param_def.min_val}")
                if param_def.max_val is not None and value > param_def.max_val:  # Check max_val
                    raise ValueError(f"Value for parameter '{name}' must be less than or equal to {param_def.max_val}")

            if param_def.data_type == "string" and param_def.allowed_strings is not None and value not in param_def.allowed_strings:
                raise ValueError(f"Value {value} is not in allowed strings for parameter {name}")

            params[name] = value

        return OptimizerInstance(self.name, params, self)

    def calculate(self, data: pd.DataFrame, params: typing.Dict[str, typing.Any]) -> pd.DataFrame:
        """
        Calculates the optimization using the provided data and parameters.
        """
        kwargs = params.copy() 
        return self.optimization_function(data, **kwargs)

    def __repr__(self):
        return f"OptimizerDefinition(name='{self.name}', parameters={self.parameters}, calculation_function={self.calculation_function.__name__ if hasattr(self.calculation_function, '__name__') else str(self.calculation_function)}, factory={self.factory})"