# http://pyparsing.wikispaces.com/Examples

In [1]:
#
# simpleArith.py
#
# Example of defining an arithmetic expression parser using
# the operatorGrammar helper method in pyparsing.
#
# Copyright 2006, by Paul McGuire
#

from pyparsing import *

integer = Word(nums).setParseAction(lambda t:int(t[0]))
variable = Word(alphas,exact=1)
operand = integer | variable

expop = Literal('^')
signop = oneOf('+ -')
multop = oneOf('* /')
plusop = oneOf('+ -')
factop = Literal('!')

# To use the operatorGrammar helper:
#   1.  Define the "atom" operand term of the grammar.
#       For this simple grammar, the smallest operand is either
#       and integer or a variable.  This will be the first argument
#       to the operatorGrammar method.
#   2.  Define a list of tuples for each level of operator
#       precendence.  Each tuple is of the form
#       (opExpr, numTerms, rightLeftAssoc, parseAction), where
#       - opExpr is the pyparsing expression for the operator;
#          may also be a string, which will be converted to a Literal
#       - numTerms is the number of terms for this operator (must
#          be 1 or 2)
#       - rightLeftAssoc is the indicator whether the operator is
#          right or left associative, using the pyparsing-defined
#          constants opAssoc.RIGHT and opAssoc.LEFT.
#       - parseAction is the parse action to be associated with 
#          expressions matching this operator expression (the
#          parse action tuple member may be omitted)
#   3.  Call operatorGrammar passing the operand expression and
#       the operator precedence list, and save the returned value
#       as the generated pyparsing expression.  You can then use
#       this expression to parse input strings, or incorporate it
#       into a larger, more complex grammar.
#       
expr = operatorPrecedence( operand,
    [("!", 1, opAssoc.LEFT),
     ("^", 2, opAssoc.RIGHT),
     (signop, 1, opAssoc.RIGHT),
     (multop, 2, opAssoc.LEFT),
     (plusop, 2, opAssoc.LEFT),]
    )

test = ["9 + 2 + 3",
        "9 + 2 * 3",
        "(9 + 2) * 3",
        "(9 + -2) * 3",
        "(9 + -2) * 3^2^2",
        "(9! + -2) * 3^2^2",
        "M*X + B",
        "M*(X + B)",
        "1+2*-3^4*5+-+-6",]
for t in test:
    print t
    print expr.parseString(t)
    print 



9 + 2 + 3
[[9, '+', 2, '+', 3]]

9 + 2 * 3
[[9, '+', [2, '*', 3]]]

(9 + 2) * 3
[[[9, '+', 2], '*', 3]]

(9 + -2) * 3
[[[9, '+', ['-', 2]], '*', 3]]

(9 + -2) * 3^2^2
[[[9, '+', ['-', 2]], '*', [3, '^', [2, '^', 2]]]]

(9! + -2) * 3^2^2
[[[[9, '!'], '+', ['-', 2]], '*', [3, '^', [2, '^', 2]]]]

M*X + B
[[['M', '*', 'X'], '+', 'B']]

M*(X + B)
[['M', '*', ['X', '+', 'B']]]

1+2*-3^4*5+-+-6
[[1, '+', [2, '*', ['-', [3, '^', 4]], '*', 5], '+', ['-', ['+', ['-', 6]]]]]



http://pyparsing.wikispaces.com/Examples?responseToken=9c8af14fabf3112cda9f3a483b425183

In [4]:
# excelExpr.py
#
# Copyright 2010, Paul McGuire
# 
# A partial implementation of a parser of Excel formula expressions.
#
from pyparsing import (CaselessKeyword, Suppress, Word, alphas, 
    alphanums, nums, Optional, Group, oneOf, Forward, Regex, 
    infixNotation, opAssoc, dblQuotedString, delimitedList, 
    Combine, Literal, QuotedString, ParserElement, pyparsing_common)
ParserElement.enablePackrat()

EQ, LPAR, RPAR, COLON, COMMA = map(Suppress, '=():,')
name = Word(alphas, alphanums)

cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") 
                | cellRef | Word(alphas,alphanums))

expr = Forward()

COMPARISON_OP = oneOf("< = > >= <= != <>")
condExpr = expr + COMPARISON_OP + expr

ifFunc = (CaselessKeyword("if") - 
          LPAR + 
          Group(condExpr)("condition") + 
          COMMA + Group(expr)("if_true") + 
          COMMA + Group(expr)("if_false") + RPAR)

statFunc = lambda name : Group(CaselessKeyword(name) + Group(LPAR + delimitedList(expr) + RPAR))
sumFunc = statFunc("sum")
minFunc = statFunc("min")
maxFunc = statFunc("max")
aveFunc = statFunc("ave")
funcCall = ifFunc | sumFunc | minFunc | maxFunc | aveFunc

multOp = oneOf("* /")
addOp = oneOf("+ -")
numericLiteral = pyparsing_common.number
operand = numericLiteral | funcCall | cellRange | cellRef 
arithExpr = infixNotation(operand,
    [
    (multOp, 2, opAssoc.LEFT),
    (addOp, 2, opAssoc.LEFT),
    ])

textOperand = dblQuotedString | cellRef
textExpr = infixNotation(textOperand,
    [
    ('&', 2, opAssoc.LEFT),
    ])

expr << (arithExpr | textExpr)


(expr).runTests("""\
Ts_Skewness(Rank(volume),4)
    3*A7+5
    3*Sheet1!$A$7+5
    3*'Sheet 1'!$A$7+5"
    3*'O''Reilly''s sheet'!$A$7+5
    if(Sum(A1:A25)>42,Min(B1:B25),if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25)))
    sum(a1:a25,10,min(b1,c2,d3))
    if("T"&a2="TTime", "Ready", "Not ready")
""")


3*A7+5
[[[3, '*', 'A7'], '+', 5]]
[0]:
  [[3, '*', 'A7'], '+', 5]
  [0]:
    [3, '*', 'A7']
  [1]:
    +
  [2]:
    5


3*Sheet1!$A$7+5
[[[3, '*', 'Sheet1!$A$7'], '+', 5]]
[0]:
  [[3, '*', 'Sheet1!$A$7'], '+', 5]
  [0]:
    [3, '*', 'Sheet1!$A$7']
  [1]:
    +
  [2]:
    5


3*'Sheet 1'!$A$7+5"
                  ^
FAIL: Expected end of text (at char 18), (line:1, col:19)


3*'O''Reilly''s sheet'!$A$7+5
[[[3, '*', "O'Reilly's sheet!$A$7"], '+', 5]]
[0]:
  [[3, '*', "O'Reilly's sheet!$A$7"], '+', 5]
  [0]:
    [3, '*', "O'Reilly's sheet!$A$7"]
  [1]:
    +
  [2]:
    5


if(Sum(A1:A25)>42,Min(B1:B25),if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25)))
['if', [['sum', [['A1', 'A25']]], '>', 42], [['min', [['B1', 'B25']]]], ['if', [['sum', [['C1', 'C25']]], '>', 3.14], [[[['min', [['C1', 'C25']]], '+', 3], '*', 18]], [['max', [['B1', 'B25']]]]]]
- condition: [['sum', [['A1', 'A25']]], '>', 42]
  [0]:
    ['sum', [['A1', 'A25']]]
    [0]:
      sum
    [1]:
      [['A1', 'A25']]
      - 

(False,
 [('3*A7+5', ([([([3, '*', 'A7'], {}), '+', 5], {})], {})),
  ('3*Sheet1!$A$7+5', ([([([3, '*', 'Sheet1!$A$7'], {}), '+', 5], {})], {})),
  ('3*\'Sheet 1\'!$A$7+5"',
   pyparsing.ParseException('3*\'Sheet 1\'!$A$7+5"',
                            18,
                            'Expected end of text')),
  ("3*'O''Reilly''s sheet'!$A$7+5",
   ([([([3, '*', "O'Reilly's sheet!$A$7"], {}), '+', 5], {})], {})),
  ('if(Sum(A1:A25)>42,Min(B1:B25),if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25)))',
   (['if', ([(['sum', ([(['A1', 'A25'], {'start': ['A1'], 'end': ['A25']})], {'range': [(['A1', 'A25'], {'start': ['A1'], 'end': ['A25']})]})], {}), '>', 42], {}), ([(['min', ([(['B1', 'B25'], {'start': ['B1'], 'end': ['B25']})], {'range': [(['B1', 'B25'], {'start': ['B1'], 'end': ['B25']})]})], {})], {}), (['if', ([(['sum', ([(['C1', 'C25'], {'start': ['C1'], 'end': ['C25']})], {'range': [(['C1', 'C25'], {'start': ['C1'], 'end': ['C25']})]})], {}), '>', 3.14], {}), ([([([(['min', ([(['C

In [16]:
# fourFn.py
#
# Demonstration of the pyparsing module, implementing a simple 4-function expression parser,
# with support for scientific notation, and symbols for e and pi.
# Extended to add exponentiation and simple built-in functions.
# Extended test cases, simplified pushFirst method.
#
# Copyright 2003-2006 by Paul McGuire
#
from pyparsing import Literal,CaselessLiteral,Word,Combine,Group,Optional,\
    ZeroOrMore,Forward,nums,alphas
import math
import operator

exprStack = []

def pushFirst( strg, loc, toks ):
    exprStack.append( toks[0] )
def pushUMinus( strg, loc, toks ):
    if toks and toks[0]=='-': 
        exprStack.append( 'unary -' )
        #~ exprStack.append( '-1' )
        #~ exprStack.append( '*' )

def BNF():
    """
    expop   :: '^'
    multop  :: '*' | '/'
    addop   :: '+' | '-'
    integer :: ['+' | '-'] '0'..'9'+
    atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
    factor  :: atom [ expop factor ]*
    term    :: factor [ multop factor ]*
    expr    :: term [ addop term ]*
    """
    point = Literal( "." )
    e     = CaselessLiteral( "E" )
    fnumber = Combine( Word( "+-"+nums, nums ) + 
                       Optional( point + Optional( Word( nums ) ) ) +
                       Optional( e + Word( "+-"+nums, nums ) ) )
    ident = Word(alphas, alphas+nums+"_$")

    plus  = Literal( "+" )
    minus = Literal( "-" )
    mult  = Literal( "*" )
    div   = Literal( "/" )
    lpar  = Literal( "(" ).suppress()
    rpar  = Literal( ")" ).suppress()
    addop  = plus | minus
    multop = mult | div
    expop = Literal( "^" )
    pi    = CaselessLiteral( "PI" )

    expr = Forward()
    atom = (Optional("-") + ( pi | e | fnumber | Optional(ident) + lpar + expr + rpar ) | ( lpar + expr.suppress() + rpar ))

    factor = Forward()
    factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )

    term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
    expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
    bnf = expr
    return bnf

# map operator symbols to corresponding arithmetic operations
epsilon = 1e-12
opn = { "+" : operator.add,
        "-" : operator.sub,
        "*" : operator.mul,
        "/" : operator.truediv,
        "^" : operator.pow }
fn  = { "sin" : math.sin,
        "cos" : math.cos,
        "tan" : math.tan,
        "abs" : abs,
        "trunc" : lambda a: int(a),
        "round" : round,
        "sgn" : lambda a: abs(a)>epsilon and cmp(a,0) or 0}


if __name__ == "__main__":
    
    def test( s, expVal ):
        results = BNF().parseString( s )
        print s, results
        
    test( "9", 9 )
    test( "-9", -9 )
    test( "--9", 9 )
    test( "-E", -math.e )
    test( "9 + 3 + 6", 9 + 3 + 6 )
    test( "9 + 3 / 11", 9 + 3.0 / 11 )
    test( "(9 + 3)", (9 + 3) )
    test( "(9+3) / 11", (9+3.0) / 11 )
    test( "9 - 12 - 6", 9 - 12 - 6 )
    test( "9 - (12 - 6)", 9 - (12 - 6) )
    test( "2*3.14159", 2*3.14159 )
    test( "3.1415926535*3.1415926535 / 10", 3.1415926535*3.1415926535 / 10 )
    test( "PI * PI / 10", math.pi * math.pi / 10 )
    test( "PI*PI/10", math.pi*math.pi/10 )
    test( "PI^2", math.pi**2 )
    test( "round(PI^2)", round(math.pi**2) )
    test( "6.02E23 * 8.048", 6.02E23 * 8.048 )
    test( "e / 3", math.e / 3 )
    test( "sin(PI/2)", math.sin(math.pi/2) )
    test( "trunc(E)", int(math.e) )
    test( "trunc(-E)", int(-math.e) )
    test( "round(E)", round(math.e) )
    test( "round(-E)", round(-math.e) )
    test( "E^PI", math.e**math.pi )
    test( "2^3^2", 2**3**2 )
    test( "2^3+2", 2**3+2 )
    test( "2^9", 2**9 )
    test( "sgn(-2)", -1 )
    test( "sgn(0)", 0 )
    test( "sgn(0.1)", 1 )

9 ['9']
-9 ['-', '9']
--9 ['-', '-9']
-E ['-', 'E']
9 + 3 + 6 ['9', '+', '3', '+', '6']
9 + 3 / 11 ['9', '+', '3', '/', '11']
(9 + 3) ['9', '+', '3']
(9+3) / 11 ['9', '+', '3', '/', '11']
9 - 12 - 6 ['9', '-', '12', '-', '6']
9 - (12 - 6) ['9', '-', '12', '-', '6']
2*3.14159 ['2', '*', '3.14159']
3.1415926535*3.1415926535 / 10 ['3.1415926535', '*', '3.1415926535', '/', '10']
PI * PI / 10 ['PI', '*', 'PI', '/', '10']
PI*PI/10 ['PI', '*', 'PI', '/', '10']
PI^2 ['PI', '^', '2']
round(PI^2) ['round', 'PI', '^', '2']
6.02E23 * 8.048 ['6.02E23', '*', '8.048']
e / 3 ['E', '/', '3']
sin(PI/2) ['sin', 'PI', '/', '2']
trunc(E) ['trunc', 'E']
trunc(-E) ['trunc', '-', 'E']
round(E) ['round', 'E']
round(-E) ['round', '-', 'E']
E^PI ['E', '^', 'PI']
2^3^2 ['2', '^', '3', '^', '2']
2^3+2 ['2', '^', '3', '+', '2']
2^9 ['2', '^', '9']
sgn(-2) ['sgn', '-', '2']
sgn(0) ['sgn', '0']
sgn(0.1) ['sgn', '0.1']


# My parser

In [76]:
# excelExpr.py
#
# Copyright 2010, Paul McGuire
# 
# A partial implementation of a parser of Excel formula expressions.
#
from pyparsing import (CaselessKeyword, Suppress, Word, alphas, 
    alphanums, nums, Optional, Group, oneOf, Forward, Regex, 
    infixNotation, opAssoc, dblQuotedString, delimitedList, 
    Combine, Literal, QuotedString, ParserElement, pyparsing_common)
ParserElement.enablePackrat()

EQ, LPAR, RPAR, COLON, COMMA, QUESTION = map(Suppress, '=():,?')
name = Word(alphas, alphanums + '_')

expr = Forward()

COMPARISON_OP = oneOf("< = > >= <= != <>")
condExpr = expr + COMPARISON_OP + expr

ifFunc = (LPAR + Group(condExpr)("condition") + 
          QUESTION + Group(expr)("if_true") + 
          COLON+ Group(expr)("if_false") + RPAR)('if_func')

anyFunc = Group(name('name') + Group(LPAR + delimitedList(expr) + RPAR)('args'))('func_call')
funcCall = ifFunc | anyFunc

unminusOp = oneOf('-')
expOp = oneOf('^')
multOp = oneOf("* /")
addOp = oneOf("+ -")
numericLiteral = pyparsing_common.number
operand = Optional(unminusOp) + (numericLiteral | funcCall | name)
arithExpr = infixNotation(operand,
    [
    (expOp, 2, opAssoc.RIGHT),
    (multOp, 2, opAssoc.LEFT),
    (addOp, 2, opAssoc.LEFT),
    ])


expr << arithExpr


#(expr).runTests("""\
#(a < r ? b : c)
#a * b + c * d
#-1
#-f(x)
#Rank(4)
#Rank(voulme)
#TsSkewness(Rank(volume),4)
#0.389729901479*scale(0.773807704921*eps+0.106782237546*scale(0.462155132918*rank(0.109882077612*0.366904800709*scale(ts_rank(volume, 10))+0.0921731335025*volume/adv20+1.20468126512*rank(ts_rank(volume/adv20, 10)))+0.335781326658*scale((-1*correlation(open,volume,10)))))+0.610270098521*rank(-rank(capex/revenue))
#""")
#res = expr.parseString('f(g(c), j(v))')
#res = expr.parseString('a * g(h, k)')
res1 = expr.parseString('scale(0.462155132918*rank(0.109882077612*0.366904800709*scale(ts_rank(volume, 10))+0.335781326658*scale((0-1*correlation(open,volume,10)))))+0.610270098521*rank(0-rank(capex/revenue))')
res2 = expr.parseString('((((rank((1/close))*volume)/adv20)*((high*rank((high-close)))/(sum(high,5)/5)))-rank((vwap-delay(vwap,5))))')
#res = expr.parseString('0.389729901479*scale(4/capex)')
#res = expr.parseString('2 * 3 *7 * (1 * 2 + 2)')

In [77]:
res

([([(['scale', ([([0.462155132918, '*', (['rank', ([([([0.109882077612, '*', 0.366904800709, '*', (['scale', ([(['ts_rank', (['volume', 10.0], {})], {'args': [((['volume', 10.0], {}), 1)], 'name': [('ts_rank', 0)]})], {'func_call': [((['ts_rank', (['volume', 10.0], {})], {'args': [((['volume', 10.0], {}), 1)], 'name': [('ts_rank', 0)]}), 0)]})], {'args': [(([(['ts_rank', (['volume', 10.0], {})], {'args': [((['volume', 10.0], {}), 1)], 'name': [('ts_rank', 0)]})], {'func_call': [((['ts_rank', (['volume', 10.0], {})], {'args': [((['volume', 10.0], {}), 1)], 'name': [('ts_rank', 0)]}), 0)]}), 1)], 'name': [('scale', 0)]})], {'func_call': [((['scale', ([(['ts_rank', (['volume', 10.0], {})], {'args': [((['volume', 10.0], {}), 1)], 'name': [('ts_rank', 0)]})], {'func_call': [((['ts_rank', (['volume', 10.0], {})], {'args': [((['volume', 10.0], {}), 1)], 'name': [('ts_rank', 0)]}), 0)]})], {'args': [(([(['ts_rank', (['volume', 10.0], {})], {'args': [((['volume', 10.0], {}), 1)], 'name': [('ts_

In [95]:
def count_vertex(res):
    c = 0
    for r in res:
        if r == res:
            c = 1
            break
        c += count_vertex(r)
    print res, c
    return c
def dict_tree(res):
    if isinstance(res, float) or isinstance(res, int) or isinstance(res, str):
        return {'type': 'const', 'value': res}
    if len(res) == 1:
        n = res[0]
        if n == res:
            return {'type': 'variable', 'value': str(res)}
        else:
            return dict_tree(n)
    if len(res) == 2:
        print res[1]
        return { 'type': 'call', 'name': res[0],  'args': [dict_tree(r) for r in res[1]]}
    if len(res) % 2 == 1:
        return {'type': 'infix', 'name': res[1], 'args': [dict_tree(res[2 * k]) for k in range((len(res) + 1) / 2)]}
    raise ValueError('WTF')
    
def compile_alpha(d):
    if not d:
        return ''
    if d['type'] == 'infix':
        return d['name'].join(['(' + compile_alpha(copy.deepcopy(arg)) + ')' for arg in d['args']])
    elif d['type'] == 'call':
        return d['name'] + '(' + ','.join([compile_alpha(copy.deepcopy(arg)) for arg in d['args']]) + ')'
    elif d['type'] == 'const' or d['type'] == 'variable':
        return str(d['value'])

In [96]:
d1 = dict_tree(res1)
d2 = dict_tree(res2)

[[0.462155132918, '*', ['rank', [[[0.109882077612, '*', 0.366904800709, '*', ['scale', [['ts_rank', ['volume', 10.0]]]]], '+', [0.335781326658, '*', ['scale', [[0.0, '-', [1.0, '*', ['correlation', ['open', 'volume', 10.0]]]]]]]]]]]]
[[[0.109882077612, '*', 0.366904800709, '*', ['scale', [['ts_rank', ['volume', 10.0]]]]], '+', [0.335781326658, '*', ['scale', [[0.0, '-', [1.0, '*', ['correlation', ['open', 'volume', 10.0]]]]]]]]]
[['ts_rank', ['volume', 10.0]]]
['volume', 10.0]
[[0.0, '-', [1.0, '*', ['correlation', ['open', 'volume', 10.0]]]]]
['open', 'volume', 10.0]
[[0.0, '-', ['rank', [['capex', '/', 'revenue']]]]]
[['capex', '/', 'revenue']]
[[1.0, '/', 'close']]
[['high', '-', 'close']]
['high', 5.0]
[['vwap', '-', ['delay', ['vwap', 5.0]]]]
['vwap', 5.0]


In [97]:
d2

{'args': [{'args': [{'args': [{'args': [{'args': [{'args': [{'type': 'const',
             'value': 1.0},
            {'type': 'const', 'value': 'close'}],
           'name': '/',
           'type': 'infix'}],
         'name': 'rank',
         'type': 'call'},
        {'type': 'const', 'value': 'volume'}],
       'name': '*',
       'type': 'infix'},
      {'type': 'const', 'value': 'adv20'}],
     'name': '/',
     'type': 'infix'},
    {'args': [{'args': [{'type': 'const', 'value': 'high'},
        {'args': [{'args': [{'type': 'const', 'value': 'high'},
            {'type': 'const', 'value': 'close'}],
           'name': '-',
           'type': 'infix'}],
         'name': 'rank',
         'type': 'call'}],
       'name': '*',
       'type': 'infix'},
      {'args': [{'args': [{'type': 'const', 'value': 'high'},
          {'type': 'const', 'value': 5.0}],
         'name': 'sum',
         'type': 'call'},
        {'type': 'const', 'value': 5.0}],
       'name': '/',
       'type': 'inf

In [98]:
print compile_alpha(d1)
print compile_alpha(d2)

(scale((0.462155132918)*(rank(((0.109882077612)*(0.366904800709)*(scale(ts_rank(volume,10.0))))+((0.335781326658)*(scale((0.0)-((1.0)*(correlation(open,volume,10.0))))))))))+((0.610270098521)*(rank((0.0)-(rank((capex)/(revenue))))))
((((rank((1.0)/(close)))*(volume))/(adv20))*(((high)*(rank((high)-(close))))/((sum(high,5.0))/(5.0))))-(rank((vwap)-(delay(vwap,5.0))))


In [99]:
from numpy import random
import copy
def mix_trees_(node1, node2, path1, path2, node_pairs, prob=0.1):
    if (path1, path2) in node_pairs or node1['type'] == 'const' or node2['type'] == 'const':
        return False
    node_pairs.add((path1, path2))
    if node1['name'] == node2['name']:
        args_num = len(node1['args'])
        node1['args'][random.randint(0,args_num)] = node2['args'][random.randint(0,args_num)]
        return True
    else:
        for child1 in node1['args']:
            if mix_trees_(child1, node2, path1+str(id(child1)), path2, node_pairs):
                    return True
        for child2 in node2['args']:
            if mix_trees_(node1, child2, path1, path2+str(id(child2)), node_pairs):
                    return True
                
        return False
    
def mix_trees(node1, node2):
    for i in range(100):
        path1, path2 = "", ""
        node_pairs = set({})
        if mix_trees_(node1, node2, path1, path2, node_pairs):
            break
    node1 = {
        'args': [copy.deepcopy(node1), node2],
        'name': '+',
        'type': 'infix',
    }

In [100]:
mix_trees(d1, d2)

In [105]:
d1

{'args': [{'args': [{'args': [{'type': 'const', 'value': 0.462155132918},
      {'args': [{'args': [{'args': [{'args': [{'args': [{'type': 'const',
                 'value': 'high'},
                {'args': [{'args': [{'type': 'const', 'value': 'high'},
                    {'type': 'const', 'value': 'close'}],
                   'name': '-',
                   'type': 'infix'}],
                 'name': 'rank',
                 'type': 'call'}],
               'name': '*',
               'type': 'infix'},
              {'args': [{'args': [{'type': 'const', 'value': 'high'},
                  {'type': 'const', 'value': 5.0}],
                 'name': 'sum',
                 'type': 'call'},
                {'type': 'const', 'value': 5.0}],
               'name': '/',
               'type': 'infix'}],
             'name': '/',
             'type': 'infix'},
            {'type': 'const', 'value': 0.366904800709},
            {'args': [{'args': [{'type': 'const', 'value': 'volume'},
     

In [106]:
compile_alpha(d1)

'(scale((0.462155132918)*(rank(((((high)*(rank((high)-(close))))/((sum(high,5.0))/(5.0)))*(0.366904800709)*(scale(ts_rank(volume,10.0))))+((0.335781326658)*(scale((0.0)-((1.0)*(correlation(open,volume,10.0))))))))))+((0.610270098521)*(rank((0.0)-(rank((capex)/(revenue))))))'

In [6]:
import numpy as np
import copy
def get_random_node(d, p):
    d_ = copy.deepcopy(d)
    def run(d_arg):
        if np.random.binomial(1, p):
            return d_arg
        if 'args' not in d_arg:
            return None
        return get_random_node(d_arg['args'][np.random.randint(len(d_arg['args']))], p)
    n = 0
    while True:
        print d_
        print d
        n += 1
        if n > 30:
            raise ValueError('WTF')
        r = 0
        run(copy.deepcopy(d_))
        if r:
            return r

In [7]:
compile_alpha(get_random_node(d, 0.2))

{'args': [{'args': [{'args': [{'type': 'const', 'value': 0.462155132918}, {'args': [{'args': [{'args': [{'type': 'const', 'value': 0.109882077612}, {'type': 'const', 'value': 0.366904800709}, {'args': [{'args': [{'type': 'const', 'value': 'volume'}, {'type': 'const', 'value': 10.0}], 'type': 'call', 'name': 'ts_rank'}], 'type': 'call', 'name': 'scale'}], 'type': 'infix', 'name': '*'}, {'args': [{'type': 'const', 'value': 0.335781326658}, {'args': [{'args': [{'type': 'const', 'value': 0.0}, {'args': [{'type': 'const', 'value': 1.0}, {'args': [{'type': 'const', 'value': 'open'}, {'type': 'const', 'value': 'volume'}, {'type': 'const', 'value': 10.0}], 'type': 'call', 'name': 'correlation'}], 'type': 'infix', 'name': '*'}], 'type': 'infix', 'name': '-'}], 'type': 'call', 'name': 'scale'}], 'type': 'infix', 'name': '*'}], 'type': 'infix', 'name': '+'}], 'type': 'call', 'name': 'rank'}], 'type': 'infix', 'name': '*'}], 'type': 'call', 'name': 'scale'}, {'args': [{'type': 'const', 'value': 0.

ValueError: WTF

In [107]:
help(random.randint)

Help on built-in function randint:

randint(...)
    randint(low, high=None, size=None, dtype='l')
    
    Return random integers from `low` (inclusive) to `high` (exclusive).
    
    Return random integers from the "discrete uniform" distribution of
    the specified dtype in the "half-open" interval [`low`, `high`). If
    `high` is None (the default), then results are from [0, `low`).
    
    Parameters
    ----------
    low : int
        Lowest (signed) integer to be drawn from the distribution (unless
        ``high=None``, in which case this parameter is the *highest* such
        integer).
    high : int, optional
        If provided, one above the largest (signed) integer to be drawn
        from the distribution (see above for behavior if ``high=None``).
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  Default is None, in which case a
        single value is returned.

In [109]:
help(str.replace)

Help on method_descriptor:

replace(...)
    S.replace(old, new[, count]) -> string
    
    Return a copy of string S with all occurrences of substring
    old replaced by new.  If the optional argument count is
    given, only the first count occurrences are replaced.

