In [25]:
import numpy as np
import pandas as pd

### Get positive negative data

In [26]:
from scar import load_scar
train, valid, test = load_scar()

In [27]:
x, y, s = train

### Labeling

In [28]:
from labeling import LabelingMechanism, label_frequency

def get_sample(data, lm, is_train=False):
    x, y, _ = data
    if is_train:
        lm.fit(x)
    score = lm.propensity_score(x)
    s = (y * (np.random.uniform(size=x.shape[0]) < score)).astype(int)
    return x, y, s

In [29]:
lm = LabelingMechanism([0,1],[1,1], min_prob=0, max_prob=1)

In [30]:
train = get_sample(train, lm, is_train=True)
valid = get_sample(valid, lm)
test = get_sample(test, lm)

In [31]:
x, y, s = train

In [32]:
c = label_frequency(x, y, lm)
print(c)

0.17378894009288337


### Save param

In [33]:
import json
param = {"minx" : list(lm.minx),
         "maxx" : list(lm.maxx),
         "c" : c}
json_file = open("pg/param.json", "w")
json.dump(param, json_file)
print(param)

{'minx': [-3.607771873474121, -3.41719388961792], 'maxx': [6.421125411987305, 6.955304145812988], 'c': 0.17378894009288337}


### Visualize created data

In [34]:
from utils import plot_x_y
plot_x_y(x, y)

In [35]:
from utils import plot_x_s
plot_x_s(x, s)

### Save data

In [36]:
with open("pg/train.csv", "wt") as f:
    x, y, s = train
    f.write("x_0,x_1,y,s\n")
    for i in range(len(x)):
        f.write(f"{x[i][0]},{x[i][1]},{y[i]},{s[i]}\n")

with open("pg/valid.csv", "wt") as f:
    x, y, s = valid
    f.write("x_0,x_1,y,s\n")
    for i in range(len(x)):
        f.write(f"{x[i][0]},{x[i][1]},{y[i]},{s[i]}\n")

with open("pg/test.csv", "wt") as f:
    x, y, s = test
    f.write("x_0,x_1,y,s\n")
    for i in range(len(x)):
        f.write(f"{x[i][0]},{x[i][1]},{y[i]},{s[i]}\n")