# 1. Genearte IV and Plug VIV into IV Regression Methods for estimating effects 

In [None]:
from utils import trainEnv, trainParams, Log, cat, evaluation, set_seed
from TwinsGenerator import Generator
from VIV.viv_twins import get_IV, generate_IV
import time
import numpy as np
import pandas as pd
import os
import argparse

def set_args():
    argparser = argparse.ArgumentParser(description=__doc__)
    #### Environment
    argparser.add_argument('--seed',default=2023,type=int,help='The random seed')
    argparser.add_argument('--clear',default=False,type=bool,help='Weather clear the txt of the dir')
    #### Data
    argparser.add_argument('--data',default='Twins',type=str,help='The data dir')
    argparser.add_argument('--rho',default=0.5,type=float,help='The num of train\val\test dataset')
    argparser.add_argument('--alpha',default=0.,type=float,help='The num of train\val\test dataset')
    argparser.add_argument('--beta',default=1.,type=float,help='The num of train\val\test dataset')
    argparser.add_argument('--num',default=10000,type=int,help='The num of sample (PM25:1343;IHDP:470)')
    argparser.add_argument('--reps',default=10,type=int,help='The num of reps')
    argparser.add_argument('--mX',default=2,type=int,help='The num of reps')
    #### Model
    argparser.add_argument('--loss_x',default=1,type=float,help='loss x')
    argparser.add_argument('--loss_y',default=0.1,type=float,help='loss y')
    argparser.add_argument('--loss_t',default=10,type=float,help='loss t')
    argparser.add_argument('--kl_loss',default=0.1,type=float,help='loss kl')
    argparser.add_argument('--ad_loss',default=10,type=float,help='loss tc')

    #### Train
    argparser.add_argument('--epochs',default=100,type=int,help='The num of epochs')
    argparser.add_argument('--batch_size',default=100,type=int,help='The size of one batch')
    argparser.add_argument('--rep_dim',default=100,type=int,help='The dims of embeddings')
    argparser.add_argument('--dropout',default=0.5,type=float,help='The dropout for networks')
    argparser.add_argument('--layers',default=[128, 64, 32],type=list,help='The per layers')
    argparser.add_argument('--activation',default="relu",type=str,help='activation')
    argparser.add_argument('--type',default='tensor',type=str,help='The type of data')
    argparser.add_argument('--GPU',default=True,type=bool,help='The type of data')
    try:
        args = argparser.parse_args()
    except:
        args = argparser.parse_args(args=[])
    
    return args
def run_single(run, exp, data, train_dict, log, device, resultDir, others, method):
    set_seed(train_dict['seed'])
    start = time.time()
    estimation = run(exp, data, train_dict, log, device, resultDir, others)
    end = time.time()
    train_res = evaluation(data.train, estimation, resultDir, method, 'train', exp)
    test_res = evaluation(data.test, estimation, resultDir, method, 'test', exp)
    print("exp {}: {:.2f}s".format(exp, end-start))

    return cat([train_res, test_res], 1), end-start

def run_reps(run, reps, log, device, resultDir, others, key, method,args):
    Results, Plots, Times = [], [], []
    train_dict = Params.train_dict[key]
    K = str(args.loss_x)+"_"+str(args.loss_y)+"_"+str(args.loss_t)+"_"+str(args.kl_loss)+"_"+str(args.ad_loss)
    method = method.format(K)
    if reps > train_dict['reps'] or reps <= 0: reps = train_dict['reps']
    for exp in range(reps):
        data = Gen.get_exp(exp)
        iv_dir = f'{resultDir}{args.loss_x}_{args.loss_y}_{args.loss_t}_{args.kl_loss}_{args.ad_loss}/'
        
        if not os.path.exists(iv_dir+f'z_{exp}.npz'):
            generate_IV(data,resultDir,exp,args,iv_dir)
        else:
            get_IV(data, exp,iv_dir)
        single_result, single_time = run_single(run, exp, data, train_dict, log, device, resultDir, others, method)
        Results.append(single_result)
        Times.append(single_time)

    Results = cat(Results,0)
    Times = np.array(Times)

    mean = np.mean(Results,axis=0, keepdims=True)
    std = np.std(Results,axis=0,keepdims=True)
    Results = cat([Results, mean, std], 0)

    Results_df = pd.DataFrame(Results, index=list(range(len(Results)-2))+['mean','std'], columns=[f'{mode}-{loss}' for mode in ['train','test'] for loss in ['g(0)','f(0,x)','f(0,x)+u','g(s)','f(s,x)','f(s,x)+u','g(t)','f(t,x)','f(t,x)+u']]).round(4)
    Results_df.to_csv(f'{iv_dir}{method}-{key}.csv')
    return Results, Plots, Times, Results_df

Env = trainEnv(CUDA=6,seed=2023)
device = Env.device
args = set_args()
Params = trainParams(args)
Params.save_json()
resultDir = Params.resultDir


log = Log(Params.log_dict)
Gen = Generator(Params.gens_dict, G=False)
others = {}


In [None]:
from Module.Instruments.TwoSLS.Poly import run as run0
from Module.Instruments.NN.DirectNN import run as run1
from Module.Instruments.DeepIV.DeepIV import run as run2
from Module.Instruments.KernelIV.KernelIV import run as run3
from Module.Instruments.OneSIV.OneSIV import run as run4
from Module.Instruments.DeepGMM.DeepGMM import run as run5
from Module.Instruments.AGMM.AGMM import run as run6

re0 = run_reps(run0, args.reps, log, device, resultDir, others, 'nn', 'VIV{}-Poly2Stage',args)
re1 = run_reps(run1, args.reps, log, device, resultDir, others, 'nn', 'VIV{}-NN2Stage',args)
re2 = run_reps(run2, args.reps, log, device, resultDir, others, 'deepiv', 'VIVper{}-DeepIV',args)
re3 = run_reps(run3, args.reps, log, device, resultDir, others, 'dfiv', 'VIV{}-KernelIV',args)
re4 = run_reps(run4, args.reps, log, device, resultDir, others, 'onesiv', 'VIV{}-OneSIV',args)
re5 = run_reps(run5, args.reps, log, device, resultDir, others, 'deepgmm', 'VIV{}-DeepGMM',args)
re6 = run_reps(run6, args.reps, log, device, resultDir, others, 'agmm', 'VIV{}-AGMM',args)





