In [1]:
# TODO: Special treatment to 0 and 1 in formula.
# TODO: Compare terms to ensure each term appears only once in ModelFormula

In [2]:
import formulae as fm
import pandas as pd
import numpy as np

def parse(code):
    return fm.Parser(fm.Scanner(code).scan_tokens()).parse()

def resolve(code):
    return fm.Resolver(parse(code)).resolve()

## Check precedence

### Sum and product

In [3]:
print(parse("x + y * z"))
print(parse("x * y + z"))

Binary(
  left=Variable(x),
  op=+,
  right=Binary(
    left=Variable(y),
    op=*,
    right=Variable(z)
  )
)
Binary(
  left=Binary(
    left=Variable(x),
    op=*,
    right=Variable(y)
  ),
  op=+,
  right=Variable(z)
)


### Sum, product, interaction

In [4]:
print(parse("x + y : z * y"))
print(parse("x : y + z * y"))

Binary(
  left=Variable(x),
  op=+,
  right=Binary(
    left=Binary(
      left=Variable(y),
      op=:,
      right=Variable(z)
    ),
    op=*,
    right=Variable(y)
  )
)
Binary(
  left=Binary(
    left=Variable(x),
    op=:,
    right=Variable(y)
  ),
  op=+,
  right=Binary(
    left=Variable(z),
    op=*,
    right=Variable(y)
  )
)


### Power interaction

In [5]:
print(parse("x ** y : z"))
print(parse("y : z ** 5"))

Binary(
  left=Binary(
    left=Variable(x),
    op=**,
    right=Variable(y)
  ),
  op=:,
  right=Variable(z)
)
Binary(
  left=Variable(y),
  op=:,
  right=Binary(
    left=Variable(z),
    op=**,
    right=Literal(5)
  )
)


In [6]:
resolve("x")

Term(
  name= x
  variable= x
  kind= None
  data= None
)

In [7]:
resolve("x + y")

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  Term(
    name= y
    variable= y
    kind= None
    data= None
  )
)

In [8]:
resolve("x + z - z")

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  )
)

In [9]:
resolve("x + z - y + m")

ModelTerms(
  Term(
    name= m
    variable= m
    kind= None
    data= None
  ),
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  Term(
    name= z
    variable= z
    kind= None
    data= None
  )
)

In [10]:
resolve("y ~ a + b")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  )
)

In [11]:
resolve("y ~ a + b + a:b + c + a:b + a:b:c")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b:c
    variables= {'b', 'c', 'a'}
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'b', 'a'}
  )
)

In [12]:
resolve("y ~ a ** 3 + x:y + b + c")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  InteractionTerm(
    name= x:y
    variables= {'y', 'x'}
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  )
)

In [13]:
resolve("y ~ a*b")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'b', 'a'}
  )
)

In [14]:
resolve("y ~ a/b + b**3 + c:c:c")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'b', 'a'}
  )
)

In [15]:
print(parse("+a"))
resolve("+a")

Unary(
  op=+, right=Variable(a)
)


Term(
  name= a
  variable= a
  kind= None
  data= None
)

In [16]:
print(parse("-a"))
resolve("-0")
# resolve("-a") raises an expected error

Unary(
  op=-, right=Variable(a)
)


NegatedTerm(what=intercept)

## Associativity of interaction

In [17]:
print(resolve("(x + y):(u + v)"))
print(resolve("(x + y):(u)"))
print(resolve("(x + y):u:v"))

ModelTerms(
  InteractionTerm(
    name= y:v
    variables= {'y', 'v'}
  ),
  InteractionTerm(
    name= x:u
    variables= {'x', 'u'}
  ),
  InteractionTerm(
    name= y:u
    variables= {'y', 'u'}
  ),
  InteractionTerm(
    name= x:v
    variables= {'x', 'v'}
  )
)
ModelTerms(
  InteractionTerm(
    name= x:u
    variables= {'x', 'u'}
  ),
  InteractionTerm(
    name= y:u
    variables= {'y', 'u'}
  )
)
ModelTerms(
  InteractionTerm(
    name= y:u:v
    variables= {'y', 'u', 'v'}
  ),
  InteractionTerm(
    name= x:u:v
    variables= {'x', 'u', 'v'}
  )
)


In [18]:
np.random.seed(1234)
data = pd.DataFrame({
    'x': np.random.normal(size=10),
    'y': np.random.normal(size=10),
    'z': np.random.normal(size=10)
})
fm.eval_in_data_mask('x / y', data=data, n=1)

0      0.409931
1     -1.200646
2      1.502854
3      0.154682
4      2.156952
5    418.796134
6      2.120067
7     -2.201803
8      0.011881
9      1.449788
dtype: float64