In [1]:
import json
import collections
import subprocess

def prettify(atom):

    s = atom['predicate']
    if 'terms' in atom:
        s += '('
        ts = [prettify(t) for t in atom['terms']]
        s += ','.join(ts)
        s += ')'
    return s

  
def parse_json_result(out):
    """Parse the provided JSON text and extract a dict
    representing the predicates described in the first solver result."""
    result = json.loads(out)
    assert len(result['Call']) > 0
    if 'Witnesses' not in result['Call'][0]:
        return []
    
    if len(result['Call'][0]['Witnesses']) == 0:
        return []
    
    all_preds = []
    ids = range(len(result['Call'][0]['Witnesses']))
    
    witness = result['Call'][0]['Witnesses'][0]['Value']

    class identitydefaultdict(collections.defaultdict):
        def __missing__(self, key):
            return key

    preds = collections.defaultdict(list)
    env = identitydefaultdict()

    for atom in witness:
        parsed,dummy = parse_terms(atom)
        preds[parsed[0]['predicate']].append(parsed)
    return preds

def solve(args):
    """Run clingo with the provided argument list and return the parsed JSON result."""

    args = ['clingo','--outf=2'] + args
    clingo = subprocess.Popen(
        ' '.join(args),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        shell=True
        )
    out, err = clingo.communicate()
            
    return parse_json_result(out)

def parse_terms(arguments):
    terms = []
    while len(arguments) > 0:
        l_paren = arguments.find('(')
        r_paren = arguments.find(')')
        comma = arguments.find(',')
        if l_paren < 0:
            l_paren = len(arguments)-1
        if r_paren < 0:
            r_paren = len(arguments)-1
        if comma < 0:
            comma = len(arguments)-1
        next = min(l_paren,r_paren,comma)
        next_c = arguments[next]
        if next_c == '(':
        
            pred = arguments[:next]
            sub_terms, arguments = parse_terms(arguments[next+1:]) 
            terms.append({'predicate':pred,'terms':sub_terms})
        elif next_c == ')':
            pred = arguments[:next]
            if pred != '':
                terms.append({'predicate':arguments[:next]})
            arguments = arguments[next+1:]
            return terms,arguments
        elif next_c == ',':
            pred = arguments[:next]
            if pred != '':
                terms.append({'predicate':arguments[:next]})
            arguments = arguments[next+1:]
        else:
            terms.append({'predicate':arguments})
            arguments = ''
    return terms, ''
   


In [2]:
filenames = ['pong.lp','kaboom.lp']

games = []
types = {}
facts = []
for filename in filenames:
    rules = open(filename,'rb').read().replace(' ','').replace('\n','').split('.')[:-1]
    rules = [parse_terms(rule)[0][0] for rule in rules]
    per_game_facts = []
    for rule in rules:
        if rule['predicate'] == 'type':
            types[rule['terms'][1]['predicate']] = rule['terms'][0]['predicate']
        else: 
            facts.append(rule)
            per_game_facts.append(rule)
    games.append([prettify(rule) for rule in per_game_facts])

In [3]:

def has_term(rule,term):
    
    if 'terms' in rule:
        
        for rule_term in rule['terms']:
                if has_term(rule_term,term):
                    return True
        return False
    elif rule['predicate'] == term:
        return True
    else:
        return False
def get_terms(rule):
    if 'terms' in rule:
        terms = []
        for rule_term in rule['terms']:
            terms += get_terms(rule_term)
        return terms
    else:
        return [rule['predicate']]
def get_higher_level(rule):
    if 'terms' in rule:
        
        terms = [prettify(rule)]
        for rule_term in rule['terms']:
            terms += get_higher_level(rule_term)
        return terms
    else:
        return []

In [4]:
import random
import sys
import numpy as np
import hashlib
import os


max_rules = 2
temperature = 5

In [5]:
def generalize(fact,probability=0.5):   
        
    if 'terms' in fact:
        terms = []
        if len(fact['terms']) == 1 and random.random() < probability:
            return {'predicate':fact['predicate'],
                'terms':[{'predicate':
                        fact['predicate'].upper()}]}
        for fact_term in fact['terms']:
            terms.append(generalize(fact_term,probability))
        return {'predicate':fact['predicate'],
                'terms':terms}
    else:
        pred = str(fact['predicate'])
        if random.random() < probability:
            pred = pred.upper()
        return {'predicate':pred}
    
def replace(fact,source,target):
    if 'terms' in fact:
        terms = []
        for fact_term in fact['terms']:
            terms.append(replace(fact_term,source,target))
        return {'predicate':fact['predicate'],
                'terms':terms}
    else:
        pred = fact['predicate']
        if pred == source:
            pred = target
        return {'predicate':pred}
    
def create_rule_graph(game,positives):
    terms_to_fact = {}
    
    all_terms = {}
    all_rules = {}
    for positive_id,positive in enumerate(positives):
        terms = get_terms(positive)
        terms_to_fact = {term:[-positive_id-1]  for term in terms}
        all_terms[-positive_id-1] = terms
        all_rules[-positive_id-1] = positive
        
    for rule_id,rule in enumerate(game):
        terms = get_terms(rule)
        all_terms[rule_id] = terms
        for term in terms:
            if term not in terms_to_fact:
                terms_to_fact[term] = []
            terms_to_fact[term].append(rule_id)
        all_rules[rule_id] = rule
            
    
    visited = set()
    connections = {}
    
    stack = [sorted(all_terms)[0]]
    
    while len(stack) > 0:
        
        current = stack.pop()
        visited.add(current)
        connections[current] = set()
        for term in all_terms[current]:
            for connection in terms_to_fact[term]:
                if connection not in visited:
                    stack.append(connection)
                elif connection != current:
                    connections[connection].add(current)
                    connections[current].add(connection)
        
    return connections,all_rules
  
def do_walk(rules,connections,min_to_add,max_to_add,visited = None):
    if visited:
        can_add = set(visited)        
    else:
        starting_points = []
        for connection in sorted(connections):
            if connection < 0:
                starting_points.append(connection)
        current = random.choice(starting_points)
        visited = set([current])
        can_add = set([current])
        
    number_of_facts = random.randint(min_to_add,max_to_add)

    while number_of_facts > 0 and len(can_add) > 0:
        
        branch = random.choice(list(can_add))
        can_visit = connections[branch] - visited
        can_visit = set(v for v in can_visit if v >= 0)
        visiting = random.choice(list(can_visit))
        if len(can_visit) == 1:
            can_add.remove(branch)
        visited.add(visiting)
        can_add.add(visiting)
        current = visiting
        number_of_facts -= 1
    visited = sorted(visited)
    return visited

In [36]:
def coarsenings(rules):
    possible_coarsenings = []
    for rule in rules:
        head,body = rule
        all_high_level_terms = set()
        term_usage = {}
        terms = get_terms(head)
        for term in terms:
            if term not in term_usage:
                term_usage[term] = []
            term_usage[term].append(-1)
        
        
        for pred_id,predicate in enumerate(body):
            high_level_terms = get_higher_level(predicate)
            all_high_level_terms |= set(high_level_terms)
            terms = get_terms(predicate)
            for term in terms:
                if term not in term_usage:
                    term_usage[term] = []
                term_usage[term].append(pred_id)
        safe_terms = set(all_high_level_terms)
        
        for high_level in all_high_level_terms:
            for term in term_usage:
                if len(term_usage[term]) > 1 and term in high_level:
                    safe_terms.remove(high_level)
                    break
        
        possible_coarsenings.append(list(safe_terms))
    return possible_coarsenings
 
def coarsen(per_rule_coarsenings,rules):
    new_rules = []
    
    for coarsenings,rule in zip(per_rule_coarsenings,rules):
        head,body = rule
        
        coarsening2ind = {coarsening:'V{}'.format(i) for i,coarsening in enumerate(coarsenings)}
        ind2coarsening = {'V{}'.format(i):coarsening for i,coarsening in enumerate(coarsenings)}
        
        new_body = []
        for b in body:
            
            pretty_b = prettify(b)
            for i in sorted(ind2coarsening):
                c = ind2coarsening[i]
                pretty_b = pretty_b.replace(c,i)
            
            new_body.append(parse_terms(pretty_b)[0][0])
        new_rules.append((head,new_body))
    return new_rules
   

In [37]:


def generate_rule(target_form,rules,connections,number_of_rules,predecessor=None):
    if predecessor:
        pass
    else:
        #random.shuffle(facts)
        #facts_to_use = [generalize(fact,0.95) for fact in facts[:number_of_rules]]
        facts_to_use = do_walk(rules,connections,number_of_rules,number_of_rules)
        facts_to_use = [rules[fact] for fact in facts_to_use if fact >= 0]
        uniques = set()
        
        for fact_id, fact in enumerate(facts_to_use):
            terms = set(get_terms(fact))
            uniques |= terms #set([(fact_id,term) for term in terms])
            #print prettify(fact)
        #print uniques
        by_type = {}
        for u in uniques:
            t = types[u.lower()]
            if t not in by_type:
                by_type[t] = []
            by_type[t].append(u)
            
        
        unique_mapping = {}
        can_be_used_by_type = {}
        for unique_id,u in enumerate(uniques):
            t = types[u.lower()]
            if t not in can_be_used_by_type:
                can_be_used_by_type[t] = []
            unique_mapping[u] = 'V{}{}'.format(t,unique_id) #random.randint(0,len(by_type[t])))
            can_be_used_by_type[t].append(unique_mapping[u])
            
        by_rule_mapping = {}
        
        for u,m in unique_mapping.items():
            if u[0] not in by_rule_mapping:
                by_rule_mapping[u] = {}
            by_rule_mapping[u][u] = m
            
            
            
        final_facts = [] 
        
        for fact_id, fact in enumerate(facts_to_use):
            
            terms = set(get_terms(fact))
            for term in terms:
                fact = replace(fact,term,by_rule_mapping[term][term])
            final_facts.append(fact)
        
        terms = list(set(get_terms(target_form)))
        for term in terms:
            if term not in can_be_used_by_type:
                return (None,None)
            target_form = replace(target_form,term,random.choice(can_be_used_by_type[term]))
            
            
        
        coarsening = coarsenings([(target_form,final_facts)])
        output = (target_form,final_facts)
        
        if random.random() < 0.5 and len(coarsening[0]) > 0:
            output = coarsen([[random.choice(coarsening[0])]],[(target_form,final_facts)])[0]
            
        
        return output
        
        
        

In [38]:
target_rule  = {'predicate':'player_controls','terms':[{'predicate':'entity'}]}


all_positives = []
all_raw_positives = []

positives = [{'predicate':'player_controls','terms':[{'predicate':'paddle_player'}]}]
all_raw_positives.append(positives[-1])
positives = [prettify(f) for f in positives]
all_positives.append(positives)


positives = [{'predicate':'player_controls','terms':[{'predicate':'basket'}]}]
all_raw_positives.append(positives[-1])
positives = [prettify(f) for f in positives]
all_positives.append(positives)

connections,rules = create_rule_graph(facts,all_raw_positives)

target_head, target_body = generate_rule(target_rule,rules,connections,number_of_rules = random.randint(1,3))
for r in target_body:
    print '\t',prettify(r)

	entity(Ventity0)


In [39]:
def score_rule(games,per_game_positives,generated_rules):
    probability = 0
    for game,positives in zip(games,per_game_positives):
        rule_string = '.\n'.join(game)
        for target_head,target_body in generated_rules:
            rule_string += prettify(target_head) + ':-' + ','.join([prettify(body) for body in target_body]) + '.\n'
        hashed_name = 'temp' + hashlib.sha224(rule_string).hexdigest()
        with open(hashed_name,'wb') as outfile:
            outfile.write('.\n'.join(game) + '.\n')
            for target_head,target_body in generated_rules:
                outfile.write(prettify(target_head) + ':-' + ','.join([prettify(body) for body in target_body]) + '.\n')
                outfile.write('#show {}/{}.'.format(target_head['predicate'],len(target_head['terms'])))
                
        
        solved = solve([hashed_name])
        
        is_good = True
        found = []
        total_found = 0
        for t in solved:
            for tt in solved[t]:
                for ttt in tt:
                    if prettify(ttt) in positives:
                        found.append(prettify(ttt))
                    else:
                        is_good = False
                        break
                if not is_good:
                    break
            if not is_good:
                break
            if is_good:
                total_found += 1
        if is_good:
            if total_found == 0:
                probability += np.log(1e-20)
            else:
                probability += np.log(float(total_found)/float(len(positives)))
        else:
            probability += np.log(1e-20)
        
                
    
    return -2*probability + np.log(len(games)+1)*np.sum([np.sum([len(get_terms(rule)) for rule in rules]) for _,rules in generated_rules])
population_size = 300

population = []
for ii in range(population_size):
    target_head = None

    while target_head == None:
        number_of_rules = random.randint(1,max_rules)

        generated_rules = []
        rule = 0
        while rule < number_of_rules:
            target_head, target_body = generate_rule(target_rule,rules,connections,number_of_rules = random.randint(1,3))
            if target_head:
                generated_rules.append((target_head,target_body))
            else:
                rule -= 1
            rule += 1
        population.append(generated_rules)

In [40]:

from multiprocessing import Pool
poolsize = 7
def curried(generated_rules):
    return np.exp(-score_rule(games,all_positives,generated_rules)/temperature)

pool = Pool(poolsize)



In [41]:
def copy(generated_rule):
    output = []
    for rule in generated_rule:
        output.append((rule[0],[generalize(r,-1) for r in rule[1]]))
    return output

crossover_probability = 0.15
mutation_probability = 0.35
generation_number = 5
for generation in range(generation_number):
    print 'GENERATION ', generation
    probs = []
    for ii in range(0,len(population),poolsize):
        probs += pool.map(curried,population[ii:(ii+poolsize)])
        
    probs = np.array(probs)
    probs /= np.sum(probs)
    
    index = np.argmax(probs == np.max(probs))
    print index, probs[index]
    for rule in population[index]:
        print prettify(rule[0]), ':-'
        for pred in rule[1]:
               print '\t', prettify(pred),','
    new_population = []
    chosen_indices = []
    for p in range(population_size):
        chosen = np.argmax(np.random.multinomial(1,probs,1))
        chosen_indices.append(chosen)
        new_population.append(copy(population[chosen]))
        
        
    if generation < generation_number-1:
        crossovers = crossover_probability*population_size
        iters = 0
        while crossovers > 0 and iters < population_size:
            iters +=1
            p1 = random.randint(0,len(new_population)-1)

            p2 = random.randint(0,len(new_population)-1)
            parent1 = new_population[p1]
            parent2 = new_population[p2]
            if len(parent1) > 1 and len(parent2) > 1:
                pt1 = random.randint(0,len(parent1)-1)
                pt2 = random.randint(0,len(parent2)-1)

                c1 = parent1[:pt1] + parent2[pt2:]
                c2 = parent2[:pt2] + parent1[pt1:]

                new_population[p1] = c1
                new_population[p2] = c2
                crossovers -= 1

        mutations = mutation_probability*population_size
        iters = 0
        while mutations > 0 and iters < population_size:
            mutations -= 1
            iters +=1
            p = random.randint(0,len(new_population)-1)
            member = new_population[p]
            to_delete = 0
            if len(member) != 1:
                to_delete = random.randint(0,len(member)-1)
            to_add = max_rules-len(member)+to_delete
            if to_add > 0:
                to_add = random.randint(0,to_add)
            else:
                to_add = 0

            while to_delete > 0:
                random.shuffle(member)
                member.pop()
                to_delete -= 1

            while to_add > 0:
                target_head, target_body = generate_rule(target_rule,rules,connections,number_of_rules = random.randint(1,3))
                if target_head:
                    member.append((target_head,target_body))
                    to_add -= 1
            to_modify = random.randint(1,len(member))
            while to_modify > 0:
                random.shuffle(member)
                rule = list(member[0])
                modified = False
                to_delete = 0
                if len(rule[1]) != 1:
                    to_delete = random.randint(0,len(rule[1])-1)
                to_add = 4-len(rule[1])+to_delete
                if to_add > 0:
                    to_add = random.randint(0,to_add)
                else:
                    to_add = 0
                if to_add > 0 or to_delete > 0:
                    modified = True
                
                while to_delete > 0:
                    random.shuffle(rule[1])
                    rule[1].pop()
                    to_delete -= 1

                while to_add > 0:
                    target_head, target_body = generate_rule(target_rule,rules,connections,number_of_rules = 1)
                    if target_head:
                        rule[1] += target_body
                        to_add -= 1
                        
                if modified:
                    to_modify -= 1
                member[0] = tuple(rule)
                
    population = new_population
os.system('rm temp*')
probs = []
for ii in range(0,len(population),poolsize):
    probs += pool.map(curried,population[ii:(ii+poolsize)])

probs = np.array(probs)  
            
        
    
#rules_ = solve(['temp.lp'])

GENERATION  0
220 0.823180513304
player_controls(Ventity6) :-
	result(Voutcome8,moves(Ventity1,Vdirection0,Vscalar4)) ,
	precondition(control_event(V0),Voutcome2) ,
	result(Voutcome2,moves(Ventity6,Vdirection7,Vscalar4)) ,
GENERATION  1
14 0.0125243316063
player_controls(Ventity3) :-
	precondition(control_event(V0),Voutcome5) ,
	result(Voutcome5,moves(Ventity3,Vdirection4,Vscalar1)) ,
GENERATION  2
57 0.0113185141611
player_controls(Ventity3) :-
	precondition(control_event(V0),Voutcome5) ,
	result(Voutcome5,moves(Ventity3,Vdirection4,Vscalar1)) ,
GENERATION  3
13 0.0113425089721
player_controls(Ventity3) :-
	precondition(control_event(V0),Voutcome5) ,
	result(Voutcome5,moves(Ventity3,Vdirection4,Vscalar1)) ,
GENERATION  4
2 0.009119857791
player_controls(Ventity3) :-
	precondition(control_event(V0),Voutcome5) ,
	result(Voutcome5,moves(Ventity3,Vdirection4,Vscalar1)) ,


In [42]:
index = np.argmax(probs == np.max(probs))
print index, probs[index]
for rule in population[index]:
    print prettify(rule[0]), ':-'
    for pred in rule[1]:
           print '\t', prettify(pred),','

0 0.267580520587
player_controls(Ventity3) :-
	precondition(control_event(V0),Voutcome5) ,
	result(Voutcome5,moves(Ventity3,Vdirection4,Vscalar1)) ,


In [None]:
score_rule(games,all_positives,population[index])

In [None]:

per_rule_coarsenings = coarsenings(population[index])
all_combos = []
import itertools
         
for coarsenings in per_rule_coarsenings:
    combos = [[c] for c in coarsenings]
    for ii in range(2,len(coarsenings)):
        combos = itertools.combinations(coarsenings,ii)
    
    valid_combos = set()
    coarsenings = list(coarsenings)
    for c1 in range(len(coarsenings)):
        for c2 in range(c1,len(coarsenings)):
            if coarsenings[c1] not in coarsenings[c2] and coarsenings[c2] not in coarsenings[c1]:
                valid_combos.add((coarsenings[c1],coarsenings[c2]))
                valid_combos.add((coarsenings[c2],coarsenings[c1]))
                
    good_combos = []
    for combo in combos:
        bad_combo = False
        for c1 in combo:
            for c2 in combo:
                if c1 != c2 and (c1,c2) not in valid_combos(c1,c2):
                    bad_combo = True
        if not bad_combo:
            good_combos.append(combo)
    all_combos.append(good_combos)

all_best = []
for combos, rule in zip(all_combos,population[index]):
    coarsened = coarsen([[]], [rule])
    best = score_rule(games,all_positives,coarsened)
    print best,coarsened
    best_coarsening = coarsened
    for combo in combos:
        coarsened = coarsen([combo], [rule])
        score = score_rule(games,all_positives,coarsened)
        if score < best:
            best = score
            best_coarsening = coarsened
            print best,coarsened
    all_best.append(best_coarsening)
    
    
for rule in all_best:
    
    print prettify(rule[0][0]) , ':- '
    rule_facts = [prettify(fact) for fact in rule[0][1]]
    print '\t'+',\n\t'.join(rule_facts)+'.'