In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs

import pandas as pd
from utils.configuration import *

## Graph & Model Specifics

In [2]:
experiment_name = "create_configration_test"      ## <--- set experiment name
LOG_DIR="/home/bule/TramDag/dev_experiment_logs"  ## <--- set log directory

In [3]:
EXPERIMENT_DIR = os.path.join(LOG_DIR, experiment_name)
DATA_PATH = EXPERIMENT_DIR # <----------- change to different source if needed
CONF_DICT_PATH = os.path.join(EXPERIMENT_DIR, f"configuration.json")

In [4]:
os.makedirs(EXPERIMENT_DIR,exist_ok=True)
# check if configration dict already exists if not create:
if os.path.exists(CONF_DICT_PATH):
    configuration_dict=load_configuration_dict(CONF_DICT_PATH)
    print(f"Loaded existing configuration from {CONF_DICT_PATH}")
else:
    configuration_dict=create_and_write_new_configuration_dict(experiment_name,CONF_DICT_PATH,EXPERIMENT_DIR,DATA_PATH,LOG_DIR)
    print(f"Created new configuration file at {CONF_DICT_PATH}")

Created new configuration file at /home/bule/TramDag/dev_experiment_logs/create_configration_test/configuration.json


In [5]:
# here you need to specify which varibles are in the graph and what type they are (continous or ordinal) for the model builder 
data_type= {'x1':'continous',
            'x2':'continous',
            'x3':'continous'} 

write_data_type_to_configuration(data_type, CONF_DICT_PATH)

Variable 'x1' is modeled as a continuous variable. for target and predictor.
Variable 'x2' is modeled as a continuous variable. for target and predictor.
Variable 'x3' is modeled as a continuous variable. for target and predictor.
Configuration updated successfully.


In [6]:
interactive_adj_matrix(CONF_DICT_PATH,seed=13)

No matrix found. Please fill out the DAG and click 'Generate'.


VBox(children=(Label(value="Fill in the adjacency matrix (upper triangle only). Use 'ls', 'cs', etc. row:FROM …

In [None]:
interactive_nn_names_matrix(CONF_DICT_PATH, seed=5)

VBox(children=(Label(value='Edit only the existing model names (non-zero entries).'), GridBox(children=(Label(…

## Data Specific

In [9]:
## Use data here to compute quantiles for normalization and write them to the config file
# 1. Generate synthetic blob data
X, _ = make_blobs(n_samples=1000,centers=3, n_features=3)
df = pd.DataFrame(X, columns=["x1", "x2", "x3"])

# 1. Split the data
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# 2. Compute quantiles from training data
quantiles = train_df.quantile([0.05, 0.95])
min_vals = quantiles.loc[0.05]
max_vals = quantiles.loc[0.95]

In [10]:
levels_dict=create_levels_dict(df,data_type)

In [11]:
levels_dict

{}

In [12]:



write_nodes_information_to_configuration(CONF_DICT_PATH, min_vals, max_vals,levels_dict)


{'x1': {'Modelnr': 0, 'data_type': 'continous', 'node_type': 'source', 'parents': [], 'parents_datatype': {}, 'transformation_terms_in_h()': {}, 'min': -0.8380432818906335, 'max': 9.267086792414801, 'batch_size': 512, 'epochs': 100, 'learning_rate': 0.01, 'use_scheduler': False, 'transformation_term_nn_models_in_h()': {}}, 'x2': {'Modelnr': 1, 'data_type': 'continous', 'node_type': 'sink', 'parents': ['x1'], 'parents_datatype': {'x1': 'continous'}, 'transformation_terms_in_h()': {'x1': np.str_('ls')}, 'min': -9.67055777020922, 'max': 10.329009019906549, 'batch_size': 512, 'epochs': 100, 'learning_rate': 0.01, 'use_scheduler': False, 'transformation_term_nn_models_in_h()': {'x1': np.str_('LinearShift')}}, 'x3': {'Modelnr': 2, 'data_type': 'continous', 'node_type': 'source', 'parents': [], 'parents_datatype': {}, 'transformation_terms_in_h()': {}, 'min': 1.0632587352833298, 'max': 9.533947605560408, 'batch_size': 512, 'epochs': 100, 'learning_rate': 0.01, 'use_scheduler': False, 'transfo