# setPath

In [None]:
from sys import path
path.append(r"../")

dataDir='../Data/Causal/'
storage_path='../Data/'

# dataGenerator

In [None]:
from utils import Syn_Generator, IHDP_Generator, Twins_Generator

Syn_244 = Syn_Generator(n=10000, ate=0,sc=1,sh=0,one=1,depX=0.05,depU=0.05,VX=1,mV=2,mX=4,mU=4,init_seed=7,seed_coef=10,details=1,storage_path=storage_path)
Syn_244.run(n=10000, num_reps=10)

IHDP_242 = IHDP_Generator(mV=2, mX=4, mU=2,details=1,dataDir=dataDir, storage_path=storage_path)
IHDP_242.run(100)

IHDP_260 = IHDP_Generator(mV=2, mX=6, mU=0,details=1,dataDir=dataDir, storage_path=storage_path)
IHDP_260.run(100)

Twins_553 = Twins_Generator(sc=1, sh=-2, mV=5, mX=5, mU=3, details=1,dataDir=dataDir, storage_path=storage_path)
Twins_553.run(10)

Twins_580 = Twins_Generator(sc=1, sh=-2, mV=5, mX=8, mU=0, details=1,dataDir=dataDir, storage_path=storage_path)
Twins_580.run(10)

In [None]:
Datasets = [Syn_244, IHDP_242, IHDP_260, Twins_553, Twins_580]

# Pretrain: Stage1

In [None]:
import os
import argparse
import pandas as pd
import numpy as np
import torch

from utils import log, CausalDataset
from module.Regression import run as run_Reg

os.environ["CUDA_VISIBLE_DEVICES"] = '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

def get_args():
    argparser = argparse.ArgumentParser(description=__doc__)
    # About run setting !!!!
    argparser.add_argument('--seed',default=2021,type=int,help='The random seed')
    argparser.add_argument('--mode',default='vx',type=str,help='The choice of v/x/vx/xx')
    argparser.add_argument('--rewrite_log',default=False,type=bool,help='Whether rewrite log file')
    argparser.add_argument('--use_gpu',default=True,type=bool,help='The use of GPU')
    # About data setting ~~~~
    argparser.add_argument('--num',default=10000,type=int,help='The num of train\val\test dataset')
    argparser.add_argument('--num_reps',default=10,type=int,help='The num of train\val\test dataset')
    argparser.add_argument('--ate',default=0,type=float,help='The ate of constant')
    argparser.add_argument('--mV',default=2,type=int,help='The dim of Instrumental variables V')
    argparser.add_argument('--mX',default=4,type=int,help='The dim of Confounding variables X')
    argparser.add_argument('--mU',default=4,type=int,help='The dim of Unobserved confounding variables U')
    argparser.add_argument('--storage_path',default='../Data/',type=str,help='The dir of data storage')
    # About Regression_t
    argparser.add_argument('--regt_batch_size',default=500,type=int,help='The size of one batch')
    argparser.add_argument('--regt_lr',default=0.05,type=float,help='The learning rate')
    argparser.add_argument('--regt_num_epoch',default=3,type=int,help='The num of total epoch')
    args = argparser.parse_args(args=[])
    return args

args = get_args()

if args.use_gpu:
    device = torch.device('cuda' if torch.cuda.is_available() and args.use_gpu else "cpu")

In [None]:
# run Syn_244_xx
for mode in ['xx','v','x']:
    data = Datasets[0]
    which_benchmark = data.which_benchmark
    which_dataset = data.which_dataset
    args.num_reps = data.num_reps
    args.mV = data.mV
    args.mX = data.mX
    args.mU = data.mU
    args.mode = mode

    resultDir = args.storage_path + f'/results/{which_benchmark}_{which_dataset}/'
    dataDir = f'{args.storage_path}/data/{which_benchmark}/{which_dataset}/'
    os.makedirs(os.path.dirname(resultDir), exist_ok=True)
    logfile = f'{resultDir}/log.txt'

    if args.rewrite_log:
        f = open(logfile,'w')
        f.close()

    for exp in range(args.num_reps):
        train_df = pd.read_csv(dataDir + f'{exp}/train.csv')
        val_df = pd.read_csv(dataDir + f'{exp}/val.csv')
        test_df = pd.read_csv(dataDir + f'{exp}/test.csv')

        train = CausalDataset(train_df, variables = ['u','x','v','z','p','s','m','t','g','y','f','c'])
        val = CausalDataset(val_df, variables = ['u','x','v','z','p','s','m','t','g','y','f','c'])
        test = CausalDataset(test_df, variables = ['u','x','v','z','p','s','m','t','g','y','f','c'])

        train,val,test = run_Reg(exp, args, dataDir, resultDir, train, val, test, device)

In [None]:
# run vx
for data in Datasets:
    which_benchmark = data.which_benchmark
    which_dataset = data.which_dataset
    args.num_reps = data.num_reps
    args.mV = data.mV
    args.mX = data.mX
    args.mU = data.mU
    args.mode = 'vx'

    resultDir = args.storage_path + f'/results/{which_benchmark}_{which_dataset}/'
    dataDir = f'{args.storage_path}/data/{which_benchmark}/{which_dataset}/'
    os.makedirs(os.path.dirname(resultDir), exist_ok=True)
    logfile = f'{resultDir}/log.txt'

    if args.rewrite_log:
        f = open(logfile,'w')
        f.close()

    for exp in range(args.num_reps):
        train_df = pd.read_csv(dataDir + f'{exp}/train.csv')
        val_df = pd.read_csv(dataDir + f'{exp}/val.csv')
        test_df = pd.read_csv(dataDir + f'{exp}/test.csv')

        train = CausalDataset(train_df, variables = ['u','x','v','z','p','s','m','t','g','y','f','c'])
        val = CausalDataset(val_df, variables = ['u','x','v','z','p','s','m','t','g','y','f','c'])
        test = CausalDataset(test_df, variables = ['u','x','v','z','p','s','m','t','g','y','f','c'])

        train,val,test = run_Reg(exp, args, dataDir, resultDir, train, val, test, device)