# Example usage of the Environment class to load custom (static) datasets.

This notebook illustrates the example usage of the Environment class for loading custom (static) datasets from .csv files. It supports both observational and interventional data.

In [None]:

import os

import networkx as nx
import pandas as pd

from src.config import EnvironmentConfig
from src.environments.environment import Environment
from src.environments.generic_environments import ErdosRenyi
from src.utils.graphs import graph_from_csv


First, we generate a ground truth environment/SCM to generate some dummy data.


In [None]:
# init environment
env_cfg = EnvironmentConfig()
env_cfg.generate_static_obs_dataset = True
env_cfg.num_observational_train_samples = 30
env_cfg.num_observational_test_samples = 20
env_cfg.generate_static_intr_dataset = True
env_cfg.num_train_interventions = 5
env_cfg.num_interventional_train_samples = 10
env_cfg.num_test_interventions = 1
env_cfg.num_interventional_test_samples = 10
env_cfg.normalise_data = False
env = ErdosRenyi(num_nodes=5, cfg=env_cfg)

# plot true graph
nx.draw(env.graph, nx.circular_layout(env.graph), labels=dict(zip(env.graph.nodes, env.graph.nodes)))


Now we export the observational and interventional data as .csv files to some test directory. You can explore the generated .csv files to see the data format.

In [None]:

testdir = '../test/'
os.makedirs(testdir, exist_ok=True)
env.export_to_csv(testdir)


The following we use the previously exported data to illustrate how to load a static dataset. In the simples case, we only load the graph and observational training data.

In [None]:

graph = graph_from_csv(os.path.join(testdir, f'{env.name}-adj-mat.csv'))
obs_train = pd.read_csv(os.path.join(testdir, f'{env.name}-obs-train.csv'))

loaded_env = Environment.load_static_dataset(graph, obs_train)
print(loaded_env.get_adj_mat())
print(loaded_env.observational_train_data[0].interventions)
print(loaded_env.observational_train_data[0].num_batches)
print(loaded_env.observational_train_data[0].batch_size)


We can also load observational test data and interventional data like so. It's also possible to only load interventional (training) data, etc.

In [None]:
graph = graph_from_csv(os.path.join(testdir, f'{env.name}-adj-mat.csv'))
obs_train = pd.read_csv(os.path.join(testdir, f'{env.name}-obs-train.csv'))
obs_test = pd.read_csv(os.path.join(testdir, f'{env.name}-obs-test.csv'))

intr_train = [pd.read_csv(os.path.join(testdir, f'{env.name}-intr-train-1.csv')),
              pd.read_csv(os.path.join(testdir, f'{env.name}-intr-train-2.csv'))]

intr_test = [pd.read_csv(os.path.join(testdir, f'{env.name}-intr-test-1.csv')),
             pd.read_csv(os.path.join(testdir, f'{env.name}-intr-test-2.csv'))]

loaded_env = Environment.load_static_dataset(graph, obs_train, obs_test, intr_train, intr_test, normalise=False)

print(loaded_env.interventional_train_data[0].interventions)
print(loaded_env.interventional_train_data[0].num_batches)
print(loaded_env.interventional_train_data[0].batch_size)

Finally, it's also possible to normalise the data to zero mean and unit variance by using the `normalise` option. The normalisation constants are computed from the observational training data in this case.

In [None]:
graph = graph_from_csv(os.path.join(testdir, f'{env.name}-adj-mat.csv'))
obs_train = pd.read_csv(os.path.join(testdir, f'{env.name}-obs-train.csv'))

intr_train = [pd.read_csv(os.path.join(testdir, f'{env.name}-intr-train-1.csv')),
              pd.read_csv(os.path.join(testdir, f'{env.name}-intr-train-2.csv'))]

loaded_env = Environment.load_static_dataset(graph, obs_train, intr_train_data=intr_train, normalise=True)

print(loaded_env.interventional_train_data[0].interventions)
print(loaded_env.interventional_train_data[0].num_batches)
print(loaded_env.interventional_train_data[0].batch_size)