In [1]:
# TODO: Add support for 'y ~ .' notation, which implies 'y ~ . -a' and 'y~ . -1' and 'y~ -1 + .' must also work.

In [2]:
import formulae as fm
import pandas as pd
import numpy as np

def parse(code):
    return fm.Parser(fm.Scanner(code).scan_tokens()).parse()
    
def resolve(code):
    return fm.Resolver(parse(code)).resolve()

## Check precedence

### Sum and product

In [3]:
print(parse("x + y * z"))
print(parse("x * y + z"))

Binary(
  left=Variable(x),
  op=+,
  right=Binary(
    left=Variable(y),
    op=*,
    right=Variable(z)
  )
)
Binary(
  left=Binary(
    left=Variable(x),
    op=*,
    right=Variable(y)
  ),
  op=+,
  right=Variable(z)
)


### Sum, product, interaction

In [4]:
print(parse("x + y : z * y"))
print(parse("x : y + z * y"))

Binary(
  left=Variable(x),
  op=+,
  right=Binary(
    left=Binary(
      left=Variable(y),
      op=:,
      right=Variable(z)
    ),
    op=*,
    right=Variable(y)
  )
)
Binary(
  left=Binary(
    left=Variable(x),
    op=:,
    right=Variable(y)
  ),
  op=+,
  right=Binary(
    left=Variable(z),
    op=*,
    right=Variable(y)
  )
)


### Power interaction

In [5]:
print(parse("x ** y : z"))
print(parse("y : z ** 5"))

Binary(
  left=Binary(
    left=Variable(x),
    op=**,
    right=Variable(y)
  ),
  op=:,
  right=Variable(z)
)
Binary(
  left=Variable(y),
  op=:,
  right=Binary(
    left=Variable(z),
    op=**,
    right=Literal(5)
  )
)


In [6]:
parse("+1")

Unary(
  op=+, right=Literal(1)
)

In [7]:
fm.Scanner('+1)').scan_tokens()

[{'type': PLUS, 'lexeme': +, 'literal': None},
 {'type': NUMBER, 'lexeme': 1, 'literal': 1},
 {'type': RIGHT_PAREN, 'lexeme': ), 'literal': None},
 {'type': EOF, 'lexeme': , 'literal': None}]

In [8]:
fm.Resolver(parse("x")).resolve()

Term(
  name= x
  variable= x
  kind= None
  data= None
)

In [9]:
fm.Resolver(parse("x + y")).resolve()

ModelTerms(
  Term(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= x
    variable= x
    kind= None
    data= None
  )
)

In [10]:
fm.Resolver(parse("x + z - z")).resolve()

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  )
)

In [11]:
fm.Resolver(parse("x + z - y + m")).resolve()

ModelTerms(
  Term(
    name= m
    variable= m
    kind= None
    data= None
  ),
  Term(
    name= z
    variable= z
    kind= None
    data= None
  ),
  Term(
    name= x
    variable= x
    kind= None
    data= None
  )
)

In [12]:
fm.Resolver(parse("y ~ a + b")).resolve()

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  )
)

In [13]:
fm.Resolver(parse("y ~ a + b + a:b + c + a:b + a:b:c")).resolve()

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b:c
    variables= {'a', 'b', 'c'}
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'a', 'b'}
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  )
)

In [14]:
parse("y ~ a ** 5")

Binary(
  left=Variable(y),
  op=~,
  right=Binary(
    left=Variable(a),
    op=**,
    right=Literal(5)
  )
)

In [15]:
fm.Resolver(parse("y ~ a ** 3 + x:y + b + c")).resolve()

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  InteractionTerm(
    name= x:y
    variables= {'x', 'y'}
  )
)

In [16]:
fm.Resolver(parse("y ~ a*b")).resolve()

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'a', 'b'}
  )
)

In [17]:
fm.Resolver(parse("y ~ a/b + b**3 + c:c:c")).resolve()

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'a', 'b'}
  )
)

In [18]:
print(parse("+a"))
resolve("+a")

Unary(
  op=+, right=Variable(a)
)


Term(
  name= a
  variable= a
  kind= None
  data= None
)

In [19]:
print(parse("-a"))
resolve("-0")
# resolve("-a") raises an expected error

Unary(
  op=-, right=Variable(a)
)


NegatedTerm(
  what=intercept
)

## Associativity of interaction

In [20]:
print(resolve("(x + y):(u + v)"))
print(resolve("(x + y):(u)"))
print(resolve("(x + y):u:v"))

ModelTerms(
  InteractionTerm(
    name= y:u
    variables= {'u', 'y'}
  ),
  InteractionTerm(
    name= x:u
    variables= {'u', 'x'}
  ),
  InteractionTerm(
    name= y:v
    variables= {'v', 'y'}
  ),
  InteractionTerm(
    name= x:v
    variables= {'v', 'x'}
  )
)
ModelTerms(
  InteractionTerm(
    name= y:u
    variables= {'u', 'y'}
  ),
  InteractionTerm(
    name= x:u
    variables= {'u', 'x'}
  )
)
ModelTerms(
  InteractionTerm(
    name= x:u:v
    variables= {'v', 'u', 'x'}
  ),
  InteractionTerm(
    name= y:u:v
    variables= {'v', 'u', 'y'}
  )
)


In [23]:
print(resolve("-1 + a"))
print(resolve("a - 0"))

ModelTerms(
  NegatedTerm(
    what=intercept
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  )
)
ModelTerms(
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  NegatedTerm(
    what=intercept
  )
)


In [22]:
np.random.seed(1234)
data = pd.DataFrame({
    'x': np.random.normal(size=10),
    'y': np.random.normal(size=10),
    'z': np.random.normal(size=10)
})
fm.eval_in_data_mask('x / y', data=data, n=1)

0      0.409931
1     -1.200646
2      1.502854
3      0.154682
4      2.156952
5    418.796134
6      2.120067
7     -2.201803
8      0.011881
9      1.449788
dtype: float64