In [4]:
from problog.tasks import sample
from problog.program import PrologString
import pandas as pd
from random import random

modeltext = """
0.6::high_load:-ll2,ll3,pl2,pl3.
0.95::low_load:-high_load.
0.8::low_load:-ll1,pl1.
0.8::low_load:-ll2,pl2.
0.8::low_load:-ll3,pl3.
0.95::high_supply:-a2,a3.
0.95::high_supply:-a2,a4.
0.95::high_supply:-a3,a4.
0.95::low_supply:-a1.
1.0::low_supply:-high_supply.
0.95::failure:-high_load,\+high_supply.
0.95::failure:-low_load,\+low_supply.
0.98::emergency:-\+a3,\+a4.
0.7::ll1:-emergency.
0.7::pl1:-emergency.
0.75::a2:-a3.
0.75::a2:-a4.
0.85::a1.
0.95::a3.
0.95::a4.
0.8::ll2.
0.8::pl2.
0.8::ll3.
0.8::pl3.
query(low_load).
query(low_supply).
query(high_load).
query(high_supply).
query(emergency).
query(failure).
query(a1).
query(a2).
query(a3).
query(a4).
query(ll1).
query(ll2).
query(ll3).
query(pl1).
query(pl2).
query(pl3).
"""

model = PrologString(modeltext)


def generate_prob_line(randm=False):
    result = sample.sample(model, n=5, format='dict')
    count = {}
    total = 0
    for s in result:
        total += 1
        for var in s:
            if not var in count.keys():
                count[var] = 0
            if s[var] == True:
                count[var] += 1 
    for var in count:
        count[var] /= float(total)
        count[var] += (random() - 0.5) / 5
        count[var] = max(count[var], 0)
        count[var] = min(count[var], 1)
    return count


def generate_det_line():
    result = sample.sample(model, n=1, format='dict')
    count = {}
    total = 0
    for s in result:
        total += 1
        for var in s:
            if not var in count.keys():
                count[var] = 0
            if s[var] == True:
                count[var] += 1 
    for var in count:
        count[var] /= float(total)
        count[var] += (random() - 0.5) / 5
        count[var] = max(count[var], 0)
        count[var] = min(count[var], 1)
    return count


f = lambda x: 1 if x==True else 0

def generate_dataset(size, probabilistic=False, randm=False):
    datapoints = []
    if probabilistic:
        for i in range(size):
            datapoints.append(generate_prob_line(randm=randm))
        output = pd.DataFrame(datapoints)
    else:
        result = sample.sample(model, n=size, format='dict')
        for s in result:
            datapoints.append(s)
        output = pd.DataFrame(datapoints)
        output = output.applymap(f)
    return output


def generate_and_write_datasets(start_size, end_size, delta=100):
    for i in range(int(start_size/delta), int(end_size/delta + 1)):
        size=delta*i
        dataset = generate_dataset(size)
        file_name='dataset_{}.csv'.format(str(size))
        dataset.to_csv(file_name, index=False)

# Write Propositional Dataset

In [5]:
generate_and_write_datasets(50, 100, delta=50)