# Probabilistic Language of Thought

[Piantadosi, S. T., Tenenbaum, J. B., & Goodman, N. D. (2016). The logical primitives of thought: Empirical foundations for compositional cognitive models. Psychological review, 123(4), 392.](https://doi.org/10.1037/a0039980)

In [1]:
from flippy import infer, condition, flip, draw_from, keep_deterministic, mem, \
    Categorical
from flippy.distributions import Dirichlet
from math import exp, log as log_
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from frozendict import frozendict

def log(x):
    return log_(x) if x > 0 else float('-inf')

In [2]:
# The inverse temperature controls the sharpness of the right-hand side 
# expansion probabilities. When rules that branch have lower Dirichlet weights
# than rules that do not, a higher inverse temperature leads to a bias towards
# shorter rules.
inv_temp = 1.0

@mem
def expansion_dist(rhs_probs: dict) -> Categorical:
    dirichlet_weights = [v**inv_temp for v in list(rhs_probs.values())]
    probs = Dirichlet(dirichlet_weights).sample()
    return Categorical(list(rhs_probs.keys()), probabilities=probs)

def simple_boolean():
    def PREDICATE():
        feature_dist = expansion_dist({
            "SIZE": 2.0,
            "SHAPE": 1.4,
            "COLOR": 1.9,
        })
        attribute = feature_dist.sample()
        if attribute == 'COLOR':
            color_dist = expansion_dist({
                "yellow": .6,
                "green": .5,
                "blue": 1.5
            })
            return f"(lambda obj: obj['color'] == '{color_dist.sample()}')"
        elif attribute == 'SHAPE':
            shape_dist = expansion_dist({
                "triangle": 1.0,
                "rectangle": 0.4,
                "circle": 1.9
            })
            return f"(lambda obj: obj['shape'] == '{shape_dist.sample()}')"
        elif attribute == 'SIZE':
            size_dist = expansion_dist({
                "large": 1.0,
                "medium": 0.3,
                "small": 1,
            })
            return f"(lambda obj: obj['size'] == '{size_dist.sample()}')"

    def BOOL():
        bool_dist = expansion_dist({
            "or": 1.,
            "and": 0.25,
            "not": 2.0,
            "PREDICATE": 1.6,
        })
        symbol = bool_dist.sample()
        if symbol in ("or", "and"):
            expr = f"({BOOL()} {symbol} {BOOL()})"
        elif symbol == "not":
            expr = f"not {BOOL()}"
        elif symbol == "PREDICATE":
            expr = f"{PREDICATE()}(x)"
        condition("not not" not in expr)
        condition(len(expr) < 100)
        return expr

    def START():
        return f"lambda x: {BOOL()}"

    expr = START()
    return expr
        
@infer(method="LikelihoodWeighting", samples=3000)
def prior(pcfg):
    return pcfg()

@keep_deterministic
def convert_to_executable(func):
    return keep_deterministic(eval(func))


In [12]:
# TODO: fix how seed=None is cached by infer
@infer(method="MetropolisHastings", samples=1000, seed=None, burn_in=1000)
def posterior(pcfg, data):
    concept = pcfg()
    executable_concept = convert_to_executable(concept)
    for example, value in data:
        condition(executable_concept(example) == value)
    return concept

In [15]:
examples = (
    (frozendict({'color': 'yellow', 'shape': 'circle', 'size': 'small'}), True),
    (frozendict({'color': 'blue', 'shape': 'rectangle', 'size': 'medium'}), False),
    (frozendict({'color': 'yellow', 'shape': 'triangle', 'size': 'large'}), True),
    (frozendict({'color': 'green', 'shape': 'circle', 'size': 'small'}), False)
)
dist = posterior(simple_boolean, examples)

In [14]:
dist

Unnamed: 0,Element,Probability
0,lambda x: (lambda obj: obj['color'] == 'yellow')(x),1.0
