In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import networkx
import matplotlib.pyplot as plt
import numpy as np

from ws_crl_lite.datasets.dataset import build_manual_datasets

In [3]:
def save(data_file, dataset, format="normal", order=1):
    data = dataset[:]
    if format=="normal":
        intervention_labels = \
            np.packbits(data[3], bitorder='big', axis=2) >> (8 - data[3].shape[2])
        torch.save((*data[:2], data[3], intervention_labels.squeeze()), data_file)
    elif format=="og":
        intervention_labels = data[2].squeeze()
        if order == 1:
            intervention_labels[intervention_labels==5] = 2
            intervention_labels[intervention_labels==7] = 3
        if order == 2:
            # [ 0,  1,  9, 13, 15])
            intervention_labels[intervention_labels==9] = 2
            intervention_labels[intervention_labels==13] = 3
            intervention_labels[intervention_labels==15] = 4

        torch.save(
            (
                data[0][:, 0], 
                data[0][:, 1],
                data[1][:, 0], 
                data[1][:, 1], 
                intervention_labels.long(),
                data[3].squeeze()
            ),
            data_file
        )

    else:
        raise NotImplementedError


In [3]:
train, dcitrain, val, test = build_manual_datasets([100000, 100000, 10000, 10000], "atomic_markov1", 42)

100%|██████████| 100000/100000 [01:52<00:00, 891.19it/s]
100%|██████████| 100000/100000 [01:49<00:00, 917.21it/s]
100%|██████████| 10000/10000 [00:22<00:00, 445.84it/s]
100%|██████████| 10000/10000 [00:10<00:00, 943.92it/s]


In [5]:
!mkdir -p ../results_scaling_markov1/data/scaling_custom/dim_3_mode_random_seed_42/

In [6]:
save("../data/3d_markov1_train.pt", train)
save("../data/3d_markov1_dci_train.pt", dcitrain)
save("../data/3d_markov1_val.pt", val)
save("../data/3d_markov1_test.pt", test)

save("../results_scaling_markov1/data/scaling_custom/dim_3_mode_random_seed_42/train.pt", train, format="og")
save("../results_scaling_markov1/data/scaling_custom/dim_3_mode_random_seed_42/dci_train.pt", dcitrain, format="og")
save("../results_scaling_markov1/data/scaling_custom/dim_3_mode_random_seed_42/val.pt", val, format="og")
save("../results_scaling_markov1/data/scaling_custom/dim_3_mode_random_seed_42/test.pt", test, format="og")



## 4 node graph

In [43]:
train_4d, dcitrain_4d, val_4d, test_4d = build_manual_datasets([100000, 100000, 10000, 10000], "atomic_4d_markov1", 42)

100%|██████████| 100000/100000 [02:00<00:00, 832.75it/s]
100%|██████████| 100000/100000 [01:53<00:00, 880.50it/s]
100%|██████████| 10000/10000 [00:11<00:00, 897.89it/s]
100%|██████████| 10000/10000 [00:11<00:00, 880.54it/s]


In [44]:
!mkdir -p ../results_scaling_markov1/data/scaling_custom_linear/dim_4_mode_random_seed_42/

In [45]:
save("../results_scaling_markov1/data/scaling_custom_linear/dim_4_mode_random_seed_42/train.pt", train_4d, format="og", order=2)
save("../results_scaling_markov1/data/scaling_custom_linear/dim_4_mode_random_seed_42/dci_train.pt", dcitrain_4d, format="og", order=2)
save("../results_scaling_markov1/data/scaling_custom_linear/dim_4_mode_random_seed_42/val.pt", val_4d, format="og", order=2)
save("../results_scaling_markov1/data/scaling_custom_linear/dim_4_mode_random_seed_42/test.pt", test_4d, format="og", order=2)



In [4]:
x = torch.load("../results_scaling_markov1/data/scaling_custom/dim_4_mode_random_seed_42/train.pt")

In [5]:
np.unique(x[4])

array([0, 1, 2, 3, 4])

# Load their dataset

Convert it to our visualization tool

In [46]:
theirs = torch.load("../results_scaling_markov1/data/scaling/dim_4_mode_random_seed_42/test.pt")

In [13]:
torch.save(
    (
        torch.permute(torch.stack([theirs[0], theirs[1]]), (1,0,2)),
        torch.permute(torch.stack([theirs[2], theirs[3]]), (1,0,2)),
        theirs[5][:, None, :], 
        theirs[4]
    ), 
    "../data/3d_markov1_theirs.pt"
)

In [46]:
# Define new datasets

[autoreload of ws_crl_lite.datasets.dataset failed: Traceback (most recent call last):
  File "/network/scratch/d/david-a.dobre/.conda/envs/weakly-supervised-causal/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 273, in check
    superreload(m, reload, self.old_objects)
  File "/network/scratch/d/david-a.dobre/.conda/envs/weakly-supervised-causal/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 496, in superreload
    update_generic(old_obj, new_obj)
  File "/network/scratch/d/david-a.dobre/.conda/envs/weakly-supervised-causal/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 393, in update_generic
    update(a, b)
  File "/network/scratch/d/david-a.dobre/.conda/envs/weakly-supervised-causal/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 345, in update_class
    if update_generic(old_obj, new_obj):
  File "/network/scratch/d/david-a.dobre/.conda/envs/weakly-supervised-causal/lib/python3.8/site-packages/IPyth

In [4]:
import numpy as np
import networkx as nx
from torch.distributions import Normal
from ws_crl_lite.datasets.intervset import IntervSet, IntervTable
from ws_crl_lite.datasets.dataset import GraphObjBase

In [5]:
class ATOMIC_4D_MARKOV1(GraphObjBase):
    def __init__(self, seed: int = None):
        super().__init__(seed)

        # FIRST, CREATE A GRAPH
        self.G = nx.DiGraph()

        # Add edges to the graph
        self.edges = [('A', 'C'), ('A', 'D'), ('C', 'D')]
        self.G.add_edges_from(self.edges)

        self.x = IntervSet(self.G, markov=1)  #, set_of_all_intervs=[(), (0,), (1,), (2,)])
        self.dict_of_tables = {
            0: np.ones(self.x.num_interv_ids), 
        }

        self.alpha_vec = np.random.uniform(0.1,1, size=(2,))

        # PASS THE TABLE AND ALPHAS TO THE INTERVSET CALCULATOR
        self.switch_case = IntervTable(self.dict_of_tables, self.alpha_vec)

        self.x.set_tables(self.switch_case)
        self.x.kill(intervs_of_size=2)
        self.x.kill(intervs_of_size=3)
        self.x.kill(intervs_of_size=4) 

        # DEFINE THE RELATIONSHIP OF EACH NODE TO ITS PARENT
        # (to automate this, just an affine transform given the parents)
        self.links = {
            'A': lambda parents: Normal(0.0, 1.0).sample(),
            'B': lambda parents: Normal(0.3 * parents[0], 0.16).sample(),
            'C': lambda parents: Normal(0.2 * parents[0], 0.2).sample(),
            'D': lambda parents: Normal(- 0.3 * parents[0], 0.4).sample(),
        }

        # DEFINE HOW THE NODES BEHAVE WHEN THEY GET INTERVENED ON
        # (to automate this, just sample from a normal or something of the sort)
        self.unlinks = {
            'A': lambda: self.links['A'](None),
            'B': lambda: Normal(0.4, 0.2).sample(),
            'C': lambda: Normal(0.1, 0.4).sample(),
            'D': lambda: Normal(-0.3, 0.3).sample()
        }

In [6]:
train, dcitrain, val, test = build_manual_datasets([100000, 100000, 10000, 10000], graph_def_obj=ATOMIC_4D_MARKOV1().dataset_kwargs(), seed=42)

  9%|▉         | 9327/100000 [00:12<01:57, 773.84it/s]