In [1]:
# TODO: Add support for 'y ~ .' notation, which implies 'y ~ . -a' and 'y~ . -1' and 'y~ -1 + .' must also work.
# TODO: Raise error when there's more than one '~'

In [2]:
import formulae as fm
import pandas as pd
import numpy as np

def scan(code):
    return fm.Scanner(code).scan_tokens()
    
def parse(code):
    return fm.Parser(scan(code)).parse()
    
def resolve(code):
    return fm.Resolver(parse(code)).resolve()

In [3]:
scan("x + fun(~x + 1) + b + fun2(a=1)")

[{'type': IDENTIFIER, 'lexeme': x, 'literal': None},
 {'type': PLUS, 'lexeme': +, 'literal': None},
 {'type': IDENTIFIER, 'lexeme': fun, 'literal': None},
 {'type': CALLARGS, 'lexeme': (~x + 1), 'literal': None},
 {'type': PLUS, 'lexeme': +, 'literal': None},
 {'type': IDENTIFIER, 'lexeme': b, 'literal': None},
 {'type': PLUS, 'lexeme': +, 'literal': None},
 {'type': IDENTIFIER, 'lexeme': fun2, 'literal': None},
 {'type': CALLARGS, 'lexeme': (a=1), 'literal': None},
 {'type': EOF, 'lexeme': , 'literal': None}]

In [4]:
parse("x + fun(~x + 1) + b + fun2(a=1)")

Binary(
  left=Binary(
    left=Binary(
      left=Variable(x),
      op=+,
      right=Call(
        callee=Variable(fun),
        args=(~x + 1),
        special=False
      )
    ),
    op=+,
    right=Variable(b)
  ),
  op=+,
  right=Call(
    callee=Variable(fun2),
    args=(a=1),
    special=False
  )
)

## Check precedence

### Sum and product

In [5]:
parse("x + y * z")

Binary(
  left=Variable(x),
  op=+,
  right=Binary(
    left=Variable(y),
    op=*,
    right=Variable(z)
  )
)

In [6]:
parse("x * y + z")

Binary(
  left=Binary(
    left=Variable(x),
    op=*,
    right=Variable(y)
  ),
  op=+,
  right=Variable(z)
)

### Sum, product, interaction

In [7]:
parse("x + y : z * y")

Binary(
  left=Variable(x),
  op=+,
  right=Binary(
    left=Binary(
      left=Variable(y),
      op=:,
      right=Variable(z)
    ),
    op=*,
    right=Variable(y)
  )
)

In [8]:
parse("x : y + z * y")

Binary(
  left=Binary(
    left=Variable(x),
    op=:,
    right=Variable(y)
  ),
  op=+,
  right=Binary(
    left=Variable(z),
    op=*,
    right=Variable(y)
  )
)

### Power interaction

In [9]:
parse("x ** y : z")

Binary(
  left=Binary(
    left=Variable(x),
    op=**,
    right=Variable(y)
  ),
  op=:,
  right=Variable(z)
)

In [10]:
parse("y : z ** 5")

Binary(
  left=Variable(y),
  op=:,
  right=Binary(
    left=Variable(z),
    op=**,
    right=Literal(5)
  )
)

# Stuff about the resolver

In [11]:
resolve("x")

Term(
  name= x
  variable= x
  kind= None
  data= None
)

In [12]:
resolve("x + y")

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  Term(
    name= y
    variable= y
    kind= None
    data= None
  )
)

In [13]:
resolve("x + z - z")

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  )
)

In [14]:
resolve("x + z - y + m")

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  Term(
    name= z
    variable= z
    kind= None
    data= None
  ),
  Term(
    name= m
    variable= m
    kind= None
    data= None
  )
)

In [15]:
resolve("y ~ a + b")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  )
)

In [16]:
resolve("y ~ a + b + a:b + c + a:b + a:b:c")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'a', 'b'}
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b:c
    variables= {'a', 'b', 'c'}
  )
)

In [17]:
resolve("y ~ a ** 3 + x:y + b + c")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  InteractionTerm(
    name= x:y
    variables= {'x', 'y'}
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  )
)

In [18]:
resolve("y ~ a*b")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'a', 'b'}
  )
)

In [19]:
resolve("y ~ a/b + b**3 + c:c:c")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  InteractionTerm(
    name= a:b
    variables= {'a', 'b'}
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  ),
  Term(
    name= c
    variable= c
    kind= None
    data= None
  )
)

In [20]:
resolve("+a")

Term(
  name= a
  variable= a
  kind= None
  data= None
)

In [21]:
resolve("-0")
# resolve("-a") raises an intentional error, for now this is fine

NegatedTerm(
  what=intercept
)

## Associativity of interaction

In [22]:
resolve("(x + y):(u + v)")

ModelTerms(
  InteractionTerm(
    name= x:u
    variables= {'x', 'u'}
  ),
  InteractionTerm(
    name= x:v
    variables= {'x', 'v'}
  ),
  InteractionTerm(
    name= y:u
    variables= {'u', 'y'}
  ),
  InteractionTerm(
    name= y:v
    variables= {'v', 'y'}
  )
)

In [23]:
resolve("(x + y):(u)")

ModelTerms(
  InteractionTerm(
    name= x:u
    variables= {'x', 'u'}
  ),
  InteractionTerm(
    name= y:u
    variables= {'u', 'y'}
  )
)

In [24]:
resolve("(x + y):u:v")

ModelTerms(
  InteractionTerm(
    name= x:u:v
    variables= {'x', 'u', 'v'}
  ),
  InteractionTerm(
    name= y:u:v
    variables= {'u', 'v', 'y'}
  )
)

## Associativity of product interaction

In [25]:
resolve("(x + y)*(u + v)")

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  Term(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= u
    variable= u
    kind= None
    data= None
  ),
  Term(
    name= v
    variable= v
    kind= None
    data= None
  ),
  InteractionTerm(
    name= x:u
    variables= {'x', 'u'}
  ),
  InteractionTerm(
    name= x:v
    variables= {'x', 'v'}
  ),
  InteractionTerm(
    name= y:u
    variables= {'u', 'y'}
  ),
  InteractionTerm(
    name= y:v
    variables= {'v', 'y'}
  )
)

In [26]:
resolve("(x + y)*(u)")

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  Term(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= u
    variable= u
    kind= None
    data= None
  ),
  InteractionTerm(
    name= x:u
    variables= {'x', 'u'}
  ),
  InteractionTerm(
    name= y:u
    variables= {'u', 'y'}
  )
)

In [27]:
resolve("(x + y)*u*v")
# (x:u) * v -> x:u:v + x:u + v 
# (y:u) * v -> y:u:v + y:u + v 

ModelTerms(
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  Term(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= u
    variable= u
    kind= None
    data= None
  ),
  InteractionTerm(
    name= x:u
    variables= {'x', 'u'}
  ),
  InteractionTerm(
    name= y:u
    variables= {'u', 'y'}
  ),
  Term(
    name= v
    variable= v
    kind= None
    data= None
  ),
  InteractionTerm(
    name= x:v
    variables= {'x', 'v'}
  ),
  InteractionTerm(
    name= y:v
    variables= {'v', 'y'}
  ),
  InteractionTerm(
    name= u:v
    variables= {'u', 'v'}
  ),
  InteractionTerm(
    name= x:u:v
    variables= {'x', 'u', 'v'}
  ),
  InteractionTerm(
    name= y:u:v
    variables= {'u', 'v', 'y'}
  )
)

## NegatedTerm for intercept

In [28]:
print(resolve("-1 + a"))
print(resolve("a - 0"))

ModelTerms(
  NegatedTerm(
    what=intercept
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  )
)
ModelTerms(
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  NegatedTerm(
    what=intercept
  )
)


## CallTerm

In [29]:
resolve("fun(x + y):y")

InteractionTerm(
  name= fun(x + y):y
  variables= {'y', 'fun(x + y)'}
)

In [30]:
resolve("fun(x + y)*y")

ModelTerms(
  CallTerm(
    call=fun(x + y),
    special=False
  ),
  Term(
    name= y
    variable= y
    kind= None
    data= None
  ),
  InteractionTerm(
    name= fun(x + y):y
    variables= {'y', 'fun(x + y)'}
  )
)

In [31]:
resolve("a + fun(x + y)*y + b")

ModelTerms(
  CallTerm(
    call=fun(x + y),
    special=False
  ),
  Term(
    name= y
    variable= y
    kind= None
    data= None
  ),
  InteractionTerm(
    name= fun(x + y):y
    variables= {'y', 'fun(x + y)'}
  ),
  Term(
    name= a
    variable= a
    kind= None
    data= None
  ),
  Term(
    name= b
    variable= b
    kind= None
    data= None
  )
)

## Random effects

In [32]:
resolve("y ~ x + (1|b)")

ModelTerms(
  ResponseTerm(
    name= y
    variable= y
    kind= None
    data= None
  ),
  Term(
    name= x
    variable= x
    kind= None
    data= None
  ),
  RandomTerm(
    expr= LiteralTerm(value=1),
    factor= Term(
      name= b
      variable= b
      kind= None
      data= None
    )
  )
)

In [33]:
resolve("(a|x) + (1|z)")

ModelTerms(
  RandomTerm(
    expr= Term(
      name= a
      variable= a
      kind= None
      data= None
    ),
    factor= Term(
      name= x
      variable= x
      kind= None
      data= None
    )
  ),
  RandomTerm(
    expr= LiteralTerm(value=1),
    factor= Term(
      name= z
      variable= z
      kind= None
      data= None
    )
  )
)

## Computing using a data frame as a data mask

In [34]:
np.random.seed(1234)
data = pd.DataFrame({
    'x': np.random.normal(size=10),
    'y': np.random.normal(size=10),
    'z': np.random.normal(size=10)
})
fm.eval_in_data_mask('x * y + z', data=data, n=1)

0    0.339521
1   -1.837353
2    1.559255
3    1.185388
4    1.558884
5   -0.467426
6    1.024077
7   -2.001041
8   -0.162371
9    4.528191
dtype: float64