# Setting things up

In [1]:
from general import *
from exp1 import *
import time
import pandas as pd
import multiprocessing as mp

random_state=42
np.random.seed(random_state)
cpu=mp.cpu_count() 
cpu

8

In [2]:
####
alpha=.05 #Significance level
iterations=100 #MC simulations to calculate type-1 error and power
ps=[10] #Z dim
p=ps[0]
m=400 #Training set size 1200
n=100 #Test set size
B=100 #Number of permutations/samplings (RESIT,CRT,CPT)
npar = 1 #Number of different values for parameters like gamma, theta, c etc.
nbas = 25 #Number of different generative configurations (different pairs (a,b))

####
skews = [0]
losses = ['mse']

In [3]:
bas = [[np.random.normal(0, 1, p).reshape(p,1), np.random.normal(0, 1, p).reshape(p,1)] for j in range(nbas)]

In [13]:
def exp1(it, theta, gamma, c, a, b, skew, m, n, p, loss, alpha, B, 
         tests={'stfr':True, 'resit':True, 'gcm':True, 'crt':True, 'cpt':True,'rbpt':True, 'rbpt2':True}):
    print("it",it,"theta", theta,"gamma", gamma, "c",c, "a",a,"b", b, skew, m, n, p, loss, alpha, B)
    #Gen. training data
    Z_train=sample_z(m,p)
    X_train=sample_x(Z_train, b)
    Y_train=sample_y(X_train, Z_train, a, b, c, gamma, skew)

    #Gen. test data
    Z_test=sample_z(n,p)
    X_test=sample_x(Z_test, b)
    Y_test=sample_y(X_test, Z_test, a, b, c, gamma, skew)   
            
    #Fitting models
    g1 = g()
    g1.fit(X_train, Z_train, Y_train)
    g2 = g()
    g2.fit(None, Z_train, Y_train)
    g3 = g()
    g3.fit(None, Z_train, X_train)
                
    #STFR
    if tests['stfr']: 
        start_time = time.time()
        reject_stfr = (get_pval_stfr(X_test, Z_test, Y_test, g1, g2, loss=loss) <= alpha)
        time_stfr = time.time() - start_time
    else: 
        reject_stfr = np.nan
        time_stfr = np.nan
    
        
    #RESIT
    if tests['resit']: 
        start_time = time.time()
        reject_resit = (get_pval_resit(X_test, Z_test, Y_test, g2, g3, B=B) <= alpha)
        time_resit = time.time() - start_time
    else: 
        reject_resit = np.nan
        time_resit = np.nan
    
    
    #GCM
    if tests['gcm']: 
        start_time = time.time()
        reject_gcm = (get_pval_gcm(X_test, Z_test, Y_test, g2, g3) <= alpha)
        time_gcm = time.time() - start_time
    else: 
        reject_gcm = np.nan
        time_gcm = np.nan
    
    
    #CRT
    if tests['crt']: 
        start_time = time.time()
        reject_crt = (get_pval_crt(X_test, Z_test, Y_test, b, g1, g2, theta, B, loss=loss) <= alpha)
        time_crt = time.time() - start_time
    else: 
        reject_crt = np.nan   
        time_crt = np.nan   
    
    
    #CPT
    if tests['cpt']: 
        start_time = time.time()
        reject_cpt = (get_pval_cpt(X_test, Z_test, Y_test, b, g1, g2, theta, B, loss=loss) <= alpha)
        time_cpt = time.time() - start_time
    else: 
        reject_cpt = np.nan
        time_cpt = np.nan
    
    
    #RBPT
    if tests['rbpt']: 
        start_time = time.time()
        reject_rbpt = (get_pval_rbpt(X_test, Z_test, Y_test, b, g1, theta, loss=loss) <= alpha)
        time_rbpt = time.time() - start_time
    else: 
        reject_rbpt = np.nan
        time_rbpt = np.nan
    
    
    #RBPT2
    if tests['rbpt2']: 
        start_time = time.time()
        h = GridSearchCV(KernelRidge(kernel='poly'), cv=2, n_jobs=1, scoring='neg_mean_squared_error',
                      param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3], 
                                  "degree": [2]})
        #h = GridSearchCV(KernelRidge(kernel='rbf'), cv=2, n_jobs=1, scoring='neg_mean_squared_error',
        #         param_grid={"alpha": np.logspace(-1,-5,10),
        #                     "gamma": np.logspace(-1,-5,10)}) 
        
        h.fit(Z_train, g1.predict(X_train,Z_train).squeeze())
        reject_rbpt2 = (get_pval_rbpt2(X_test, Z_test, Y_test, g1, h, loss=loss) <= alpha)
        time_rbpt2 = time.time() - start_time
    else: 
        reject_rbpt2 = np.nan
        time_rbpt2 = np.nan
    
    
    #Output
    return [reject_stfr, reject_resit, reject_gcm, reject_crt, reject_cpt, reject_rbpt, reject_rbpt2,
            time_stfr, time_resit, time_gcm, time_crt, time_cpt, time_rbpt, time_rbpt2] 

In [None]:
ocal_state = np.random.RandomState(seed)
    print local_state.uniform(0, 1, 5)

In [None]:
random_state = 1

In [62]:
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy.stats import skewnorm




In [63]:
def exp2(it, theta, gamma, c, a, b, skew, m, n, p):
    import time
    random_state = it
    Z_train=sample_z(m, p, random_state)
    X_train=sample_x(Z_train, b, random_state)
    Y_train=sample_y(X_train, Z_train, a, b, c, gamma, skew, random_state)
    print(X_train[0])
    return(it)

In [65]:
results = {'set':[], 'p':[],
           'ba':[], 'c':[], 'theta':[], 'skew':[], 'gamma':[], 'loss':[],
           'stfr':[], 'resit':[], 'gcm':[], 'crt':[], 'cpt':[],'rbpt':[], 'rbpt2':[]}
np.random.seed(0)
count=0
cs = [.2]
for p in ps:
    for c in cs:
        print("c",c)

        for skew in skews:
            for loss in losses:

                #### Power ####
                tests={'stfr':False, 'resit':False, 'gcm':False, 'crt':False, 'cpt':False,'rbpt':True, 'rbpt2':False}
                theta = 0
                gamma = 0
                for ba in bas:
                    # Parameters a and b
                    b = ba[0]
                    a = ba[1]
                    
                    # Running
                    pool = mp.Pool(processes=cpu)
                    
                    #out = pool.starmap(exp1, [(it, theta, gamma, c, a, b, skew, m, n, p, loss, alpha, B, tests) for it in range(2)])
                    out = pool.starmap(exp2, [(it, theta, gamma, c, a, b, skew, m, n, p) for it in range(20)])

                    print(out)
                    pool.close()
                    
                    print(np.random.normal(1))
                    
                  
                    out = np.array(out).mean(axis=0)
                    print('power',out[5])

                    # Storing results
                    results['set'].append('power')
                    results['p'].append(p)
                    results['ba'].append(count); results['c'].append(c); results['theta'].append(theta); results['skew'].append(skew); results['gamma'].append(gamma); results['loss'].append(loss) 
                    results['stfr'].append(out[0]); results['resit'].append(out[1]); results['gcm'].append(out[2]); results['crt'].append(out[3]); results['cpt'].append(out[4]); results['rbpt'].append(out[5]); results['rbpt2'].append(out[6])

        # Update 'count'
        count+=1

c 0.2
[1.47209308][1.64638292][41.6402054][8.80982596][2.91842434][1.74356023][1.93943619][0.30637589]







[1.57712681][1.24151472][1.33208016][0.96048922][12.33603808][11.17068712][24.32000951][0.47764861]







[1.47724098][0.50653438][0.15401241][1.8949941]


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
2.764052345967664



IndexError: invalid index to scalar variable.

In [16]:
results = {'set':[], 'p':[],
           'ba':[], 'c':[], 'theta':[], 'skew':[], 'gamma':[], 'loss':[],
           'stfr':[], 'resit':[], 'gcm':[], 'crt':[], 'cpt':[],'rbpt':[], 'rbpt2':[]}

count=0
for p in ps:
    for c in cs:
        print("c",c)

        for skew in skews:
            for loss in losses:

                #### Power ####
                tests={'stfr':False, 'resit':False, 'gcm':False, 'crt':False, 'cpt':False,'rbpt':True, 'rbpt2':False}
                theta = 0
                gamma = 0
                for ba in bas:
                    # Parameters a and b
                    b = ba[0]
                    a = ba[1]
                    # Running
                    
                    
                    out = [exp1(it, theta, gamma, c, a, b, skew, m, n, p, loss, alpha, B, tests) for it in range(2)]
                   
                    out = np.array(out).mean(axis=0)
                    print('power',out[5])

                    # Storing results
                    results['set'].append('power')
                    results['p'].append(p)
                    results['ba'].append(count); results['c'].append(c); results['theta'].append(theta); results['skew'].append(skew); results['gamma'].append(gamma); results['loss'].append(loss) 
                    results['stfr'].append(out[0]); results['resit'].append(out[1]); results['gcm'].append(out[2]); results['crt'].append(out[3]); results['cpt'].append(out[4]); results['rbpt'].append(out[5]); results['rbpt2'].append(out[6])

        # Update 'count'
        count+=1

c 0.2
it 0 theta 0 gamma 0 c 0.2 a [[-0.46341769]
 [-0.46572975]
 [ 0.24196227]
 [-1.91328024]
 [-1.72491783]
 [-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]] b [[ 0.49671415]
 [-0.1382643 ]
 [ 0.64768854]
 [ 1.52302986]
 [-0.23415337]
 [-0.23413696]
 [ 1.57921282]
 [ 0.76743473]
 [-0.46947439]
 [ 0.54256004]] 0 400 100 10 mse 0.05 100
it 1 theta 0 gamma 0 c 0.2 a [[-0.46341769]
 [-0.46572975]
 [ 0.24196227]
 [-1.91328024]
 [-1.72491783]
 [-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]] b [[ 0.49671415]
 [-0.1382643 ]
 [ 0.64768854]
 [ 1.52302986]
 [-0.23415337]
 [-0.23413696]
 [ 1.57921282]
 [ 0.76743473]
 [-0.46947439]
 [ 0.54256004]] 0 400 100 10 mse 0.05 100
power 1.0
it 0 theta 0 gamma 0 c 0.2 a [[-0.60170661]
 [ 1.85227818]
 [-0.01349722]
 [-1.05771093]
 [ 0.82254491]
 [-1.22084365]
 [ 0.2088636 ]
 [-1.95967012]
 [-1.32818605]
 [ 0.19686124]] b [[ 1.46564877]
 [-0.2257763 ]
 [ 0.0675282 ]
 [-1.42474819]
 [-0.54438272]
 [ 0.1109

it 1 theta 0 gamma 0 c 0.2 a [[-1.26088395]
 [ 0.91786195]
 [ 2.1221562 ]
 [ 1.03246526]
 [-1.51936997]
 [-0.48423407]
 [ 1.26691115]
 [-0.70766947]
 [ 0.44381943]
 [ 0.77463405]] b [[-0.79252074]
 [-0.11473644]
 [ 0.50498728]
 [ 0.86575519]
 [-1.20029641]
 [-0.33450124]
 [-0.47494531]
 [-0.65332923]
 [ 1.76545424]
 [ 0.40498171]] 0 400 100 10 mse 0.05 100
power 1.0
it 0 theta 0 gamma 0 c 0.2 a [[ 1.44127329]
 [-1.43586215]
 [ 1.16316375]
 [ 0.01023306]
 [-0.98150865]
 [ 0.46210347]
 [ 0.1990597 ]
 [-0.60021688]
 [ 0.06980208]
 [-0.3853136 ]] b [[-0.92693047]
 [-0.05952536]
 [-3.24126734]
 [-1.02438764]
 [-0.25256815]
 [-1.24778318]
 [ 1.6324113 ]
 [-1.43014138]
 [-0.44004449]
 [ 0.13074058]] 0 400 100 10 mse 0.05 100
it 1 theta 0 gamma 0 c 0.2 a [[ 1.44127329]
 [-1.43586215]
 [ 1.16316375]
 [ 0.01023306]
 [-0.98150865]
 [ 0.46210347]
 [ 0.1990597 ]
 [-0.60021688]
 [ 0.06980208]
 [-0.3853136 ]] b [[-0.92693047]
 [-0.05952536]
 [-3.24126734]
 [-1.02438764]
 [-0.25256815]
 [-1.24778318]


it 1 theta 0 gamma 0 c 0.2 a [[-0.00797264]
 [ 1.47994414]
 [ 0.07736831]
 [-0.8612842 ]
 [ 1.52312408]
 [ 0.53891004]
 [-1.03724615]
 [-0.19033868]
 [-0.87561825]
 [-1.38279973]] b [[-0.12791759]
 [-0.95554044]
 [-1.60644632]
 [ 0.20346364]
 [-0.75635075]
 [-1.42225371]
 [-0.64657288]
 [-1.081548  ]
 [ 1.68714164]
 [ 0.88163976]] 0 400 100 10 mse 0.05 100
power 0.5
