In [9]:
#%%timeit
#%%timeit -n 1 -r 1 # time cost for 1 run with 1 loop

############################################################### Packages
import numpy as np
import numpy.random as npr
from scipy.stats import levy_stable  
from sklearn.covariance import empirical_covariance
import matplotlib.pyplot as plt
#import random
import math 
import pandas as pd
import scipy.special
#from scipy.stats import rankdata
import numba
import timeit
import time
import warnings
import psutil


############################################################### Miscellenous

warnings.filterwarnings('ignore')
npr.seed(0)

############################################################### Section 1: Generating N*n*d real matrix as data sample for different models


######## Inductive model exhibing pairwise independence but not triplewise independence

@numba.njit(parallel=True, fastmath=True)
def inductive_model_iterated_numba(initializing_normal_sample,N,n,d):
    X=np.empty((N,n,d))
    for k in numba.prange(N):
        for i in numba.prange(n):
            X[k,i,0] = initializing_normal_sample[k,i,0]
            X[k,i,1] = initializing_normal_sample[k,i,1]
            for j in numba.prange(2,d):
                if X[k,i,j-1]+X[k,i,j-2] > 1:
                     X[k,i,j] =X[k,i,j-1]+X[k,i,j-2]-1
                else:
                    X[k,i,j] =X[k,i,j-1]+X[k,i,j-2]
    return X

######## d-variate Gumbel distribution. Algorithm by Marshall-Olkin - @numba.njit
######## https://cran.r-project.org/web/packages/copula/vignettes/nacopula-pkg.pdf

def Stable_distrib_data(N,n,t):
    if t == 1:
        V=levy_stable.rvs(1, 1, loc=1, scale=np.cos(math.pi/(2)), size=(N,n,1), random_state=None)
    else:
        V=levy_stable.rvs(1/t, 1, loc=0, scale=np.cos(math.pi/(2*t))**t, size=(N,n,1), random_state=None)
    return V

@numba.njit(parallel=True, fastmath=True)
def Gumbel_Marshall_Olkin_iterated_numba(V,d,t):
    n = V.shape[1]
    X=npr.exponential(1,size=(N,n,d))
    G=np.empty_like(X)
    for k in numba.prange(N):
        for i in numba.prange(n):
            for j in numba.prange(d):
                G[k,i,j] = np.exp(  -(X[k,i,j]/V[k,i,0]**(1/t)  ))
    return G
                                                        

######## Geisser-Mantel Model - @numba.njit

@numba.njit(parallel=True, fastmath=True)
def my_mean_numba(a):
    out = np.empty((1,a.shape[1]))
    for j in numba.prange(a.shape[1]):
        out[0,j] = np.sum(a[:,j])/a.shape[0]
    return out

@numba.njit(parallel=True, fastmath=True)
def empirical_cov_numba(G): 
    d=G.shape[1]
    n=G.shape[0]
    M=np.empty((1,d)) 
    M[0,:]=my_mean_numba(G)[0,:] # same as M[0,:]=np.mean(G,axis=0)
    I = np.ones((n,1))
    out=np.dot( np.transpose(G-np.dot(I,M)), G-np.dot(I,M) )
    return (1/(n))*out # n or n-1 ? the case n matches with scikit-learn empirical_covariance function

@numba.njit(parallel=True, fastmath=True)
def pairwise_corr_numba_numba(G,p,m): # G is a (p+m)*p real matrix
    U = empirical_cov_numba(G) # d*d matrix
    T = np.tril(U,-1) # set to 0 the upper triangular part of the square matrix U, including the diagonal
    T=T.flatten() # flatten the matrix to a row vector
    A = np.nonzero(T) # mask of indices giving nonzero values of T
    out = T[A] # 1*d array of shape (d,) 
    return np.reshape(out, (1,int(p*(p-1)/2))) # reshaping to get an array (1,d) 

@numba.njit(parallel=False, fastmath=True) # Cannot set parallel=True because of issues with slicing
def Geisser_Mantel_numba(G_n,n,p,m): 
    X = np.empty( (n,int(p*(p-1)/2)))
    for k in numba.prange(n):
        X[k,:] = pairwise_corr_numba_numba(G_n[k,:],p,m)
    return X

@numba.njit(parallel=False, fastmath=True)
def Geisser_Mantel_iterated_numba(G_N,N,n,p,m): # d=p*(p-1)/2 
    d = int(p*(p-1)/2)
    X = np.empty((N,n,d))
    for k in numba.prange(N):
        X[k,:,:] = Geisser_Mantel_numba(G_N[k,:,:,:],n,p,m)
    return X


######## Elliptical model: Gaussian vector with determined Kendall's tau L^2 norm - no njit

@numba.jit(parallel=True, fastmath=True) 
def iterated_gaussian_vector_sample(N,n,d,tau):
    p = math.sin((math.pi/2)*math.sqrt((2*tau)/(d*(d-1))))
    cov = p*np.ones((d,d))
    np.fill_diagonal(cov, 1)
    return npr.multivariate_normal(np.zeros(d),cov,size=(N,n))

    
######## Truncated Romano-Siegel Model, Section 4.2 of Genest and Rémillard (2004) - pairwise indep but not jointly indep

def Romano_Siegel_iterated_data(N,n):
    Z = npr.normal(0,1,size=(N,n,5))
    for k in range(N):
        for i in range(n):
            Z[k,i,0]= np.absolute(Z[k,i,0])*np.sign(Z[k,i,1]*Z[k,i,2])
            Z[k,i,4]= Z[k,i,3]/2 + np.sqrt(3)*Z[k,i,4]/2
    return Z

@numba.njit(parallel=True, fastmath=True)
def Romano_Siegel_Numba_iterated_data(g,N,n): #g = npr.normal(0,1,size=(N,n,5))
    for k in numba.prange(N):
        for i in numba.prange(n):
            g[k,i,0]= np.absolute(g[k,i,0])*np.sign(g[k,i,1]*g[k,i,2])
            g[k,i,4]= g[k,i,3]/2 + np.sqrt(3)*g[k,i,4]/2
    return g

@numba.njit(parallel=True, fastmath=True)
def Trunc_Romano_Siegel_Numba_iterated_data(g,N,n): #g = npr.normal(0,1,size=(N,n,3))
    for k in numba.prange(N):
        for i in numba.prange(n):
            g[k,i,0]= np.absolute(g[k,i,0])*np.sign(g[k,i,1]*g[k,i,2])
    return g

@numba.njit(parallel=True, fastmath=True)
def Trunc_d_Romano_Siegel_Numba_iterated_data(g,N,n,p): #g = npr.normal(0,1,size=(N,n,3*p))
    for k in numba.prange(N):
        for i in numba.prange(n):
            for j in numba.prange(p):
                g[k,i,3*j+0]= np.absolute(g[k,i,3*j+0])*np.sign(g[k,i,3*j+1]*g[k,i,3*j+2])
    return g

############################################################### Section 2: Computing the statistics - @numba.njit

@numba.njit(parallel=True, fastmath=True)
def rank(U):
    R = np.empty_like(U)
    for j in numba.prange(U.shape[1]):
        R[:, j] = np.argsort(np.argsort(U[:, j]))+1
    return R

@numba.njit(parallel=True, fastmath=True)
def I_multiarray_Genest_Remillard(U): # Giving a data sample U of size n*d, return the 3D-array of the elementary block I^{(p)}_{i,j} from Genest-Rémillard statistic
    d = U.shape[1]
    n = U.shape[0]
    J = np.empty((d,n,n))
    R = rank(U)
    for p in numba.prange(0,d):
        for i in numba.prange(0,n):
            for j in numba.prange(0,n):
                J[p,i,j] = 1/3 + 1/(6*n) + (R[i,p]*(R[i,p]-1))/(2*n*(n+1)) + (R[j,p]*(R[j,p]-1))/(2*n*(n+1)) - max(R[i,p],R[j,p])/(n+1)
    return J

@numba.njit(parallel=True, fastmath=True)
def raw_statistics_Genest_Remillard_234(U,binom_2,binom_3,binom_4): # U data sample of size n*d
    S_2 = 0
    S_3 = 0
    S_4 = 0
    J = I_multiarray_Genest_Remillard(U)
    d = J.shape[0]
    n = J.shape[1]
    out = np.empty((1,3))
    for p in numba.prange(0,d):
        for q in numba.prange(0,p):
            S_2 += (1/n)*np.sum(np.multiply(J[p,:,:],J[q,:,:]))- 1/36 + 1/(36*n)
            for r in numba.prange(0,q):
                S_3 += (1/n)*np.sum(np.multiply(np.multiply(J[p,:,:],J[q,:,:]),J[r,:,:])) - (n-1)*(n-2)/(216*n*n) 
                for s in numba.prange(0,r):
                        S_4 += (1/n)*np.sum(np.multiply(np.multiply(np.multiply(J[p,:,:],J[q,:,:]),J[r,:,:]),J[s,:,:]))  - ((n - 1)*(n**2 - 3*n + 3))/(1296*n**3)
    out[0,0] = S_2
    out[0,1] = S_3
    out[0,2] = S_4
    return out # np.array of size 1*3 column (S2,S3,S4)
                                  
                                  
############################################################### Section 3: Output data 

@numba.njit(parallel=True, fastmath=True)
def iterated_raw_statistics_GR_234_from_iterated_sample(iterated_data_sample,binom_2,binom_3,binom_4): # data_sample is a N*n*d matrix
    N = iterated_data_sample.shape[0]
    n = iterated_data_sample.shape[1]
    d = iterated_data_sample.shape[2]
    X = np.empty((N,3))
    U = np.empty((n,d))
    for k in numba.prange(N):
        U = iterated_data_sample[k,:,:]
        X[k,:] = raw_statistics_Genest_Remillard_234(U,binom_2,binom_3,binom_4).reshape((3))
    return X # np.array of size N*3 with column: (S2, S3, S4)


@numba.njit(parallel=True, fastmath=True)
def scaled_statistics_GR_234_from_iterated(n,X,binom_2,binom_3,binom_4,choice_scaling): #X is N*3 np.array
    out=np.empty((N,5))
    variance_2 = ((n-2)*(n-2)*(n-1)*(8*n+1))/(32400*n*n*(n+1)*(n+1))
    scaling_finite_2 = 1/math.sqrt(variance_2*binom_2)
    scaling_2 = 90/math.sqrt(2*binom_2)
    variance_3 = (n-2)*(n-1)*(16*n - 96 + 359/n - 269/n**2 - 963/n**3 - 370/n**4)/(5832000*(n+1)**3) 
    scaling_finite_3 = 1/math.sqrt(variance_3*binom_3)
    scaling_3 = 90*math.sqrt(90)/math.sqrt(2*binom_3)
    scaling_4 = (90**2)/math.sqrt(2*binom_4)
    scaling_finite_4 = scaling_4
    for k in numba.prange(N):
        if choice_scaling == 1: #finite variance scaling
            out[k,0] = scaling_finite_2*X[k,0]
            out[k,1] = scaling_finite_3*X[k,1]
            out[k,2] = scaling_finite_4*X[k,3]
            out[k,3] = out[k,0]+out[k,1]
            out[k,4] = out[k,0]+out[k,1]+out[k,2]
        else : #theoretical/asymptotic scaling
            out[k,0] = scaling_2*X[k,0]
            out[k,1] = scaling_3*X[k,1]
            out[k,2] = scaling_4*X[k,2]
            out[k,3] = out[k,0]+out[k,1]
            out[k,4] = out[k,0]+out[k,1]+out[k,2]
    return out # np.array of size N*5 with column: (S2, S3, S4, T3, T4)-rescaled
                        
                                  

############ All-in-one


def all_data_exponential234(N,mean):
    for n in [16,32,64,128]:
        for d in [4,8,16,32,64,256]:
            binom_2 = scipy.special.binom(d,2)
            binom_3 = scipy.special.binom(d,3)
            binom_4 = scipy.special.binom(d,4)
            iterated_data_sample = npr.exponential(1,size=(N,n,d))
            X=iterated_raw_statistics_GR_234_from_iterated_sample(iterated_data_sample,binom_2,binom_3,binom_4)
            pd.DataFrame(X).to_csv("Raw_Exponential_iid_234{}.csv".format((N,n,d)), header=False, index=False)
            for choice_scaling in [0,1]:
                A=scaled_statistics_GR_234_from_iterated(n,X,binom_2,binom_3,binom_4,choice_scaling)
                pd.DataFrame(A).to_csv("GR_234_Exponential_iid{}.csv".format((N,n,d,mean,choice_scaling)), header=False, index=False)
    return 'done!'


def all_data_inductive234(N):
    for n in [16,32,64,128]:
        for d in [4,8,16,32,64,128]:
            binom_2 = scipy.special.binom(d,2)
            binom_3 = scipy.special.binom(d,3)
            binom_4 = scipy.special.binom(d,4)
            initializing_normal_sample=npr.uniform(0,1,size=(N,n,2))
            iterated_data_sample = inductive_model_iterated_numba(initializing_normal_sample,N,n,d)
            X=iterated_raw_statistics_GR_234_from_iterated_sample(iterated_data_sample,binom_2,binom_3,binom_4)
            pd.DataFrame(X).to_csv("Raw_inductive_234{}.csv".format((N,n,d)), header=False, index=False)
            for choice_scaling in [0,1]:
                A=scaled_statistics_GR_234_from_iterated(n,X,binom_2,binom_3,binom_4,choice_scaling)
                pd.DataFrame(A).to_csv("GR_234_inductive{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
    return 'done!'


def all_data_Gaussian_Vector234(N):
    for tau in [0.7]:
        for n in [16,32]:
            for d in [256]:
                binom_2 = scipy.special.binom(d,2)
                binom_3 = scipy.special.binom(d,3)
                binom_4 = scipy.special.binom(d,4)
                iterated_data_sample = iterated_gaussian_vector_sample(N,n,d,tau)
                X=iterated_raw_statistics_GR_234_from_iterated_sample(iterated_data_sample,binom_2,binom_3,binom_4)
                pd.DataFrame(X).to_csv("Raw_Gaussian_Vector_234{}.csv".format((N,n,d,tau)), header=False, index=False)
                for choice_scaling in [0,1]:
                    A=scaled_statistics_GR_234_from_iterated(n,X,binom_2,binom_3,binom_4,choice_scaling)
                    pd.DataFrame(A).to_csv("GR_234_Gaussian_vector{}.csv".format((N,n,d,tau,choice_scaling)), header=False, index=False)
    return 'done!'


def all_data_Geisser_Mantel234(N):
    for n in [16]:
        for d in [4]:
            p = max(math.floor(math.sqrt(2*d)),3)
            q=int(p*(p-1)/2)
            m = math.floor(p/2)
            G = npr.multivariate_normal(np.zeros(p),np.eye(p),size=p+m)
            G_N = npr.multivariate_normal(np.zeros(p),np.eye(p),size=(int(N),int(n),int(p+m)))
            binom_2 = scipy.special.binom(d,2)
            binom_3 = scipy.special.binom(d,3)
            binom_4 = scipy.special.binom(d,4)
            iterated_data_sample=Geisser_Mantel_iterated_numba(G_N,N,n,p,m)
            X=iterated_raw_statistics_GR_234_from_iterated_sample(iterated_data_sample,binom_2,binom_3,binom_4)
            pd.DataFrame(X).to_csv("Raw_Geisser_Mantel_234{}.csv".format((N,n,q)), header=False, index=False)
            for choice_scaling in [0,1]:
                A=scaled_statistics_GR_234_from_iterated(n,X,binom_2,binom_3,binom_4,choice_scaling)
                pd.DataFrame(A).to_csv("GR_234_Geisser_Mantel{}.csv".format((N,n,q,choice_scaling)), header=False, index=False)
    return 'done!'

def all_data_Romano_Siegel234(N):
    for n in [16]:
        for d in [4]:
            p=int(max(math.floor(d/3),1))
            q=int(3*p)
            g = npr.normal(0,1,size=(N,n,q))
            iterated_data_sample=Trunc_d_Romano_Siegel_Numba_iterated_data(g,N,n,p)
            binom_2 = scipy.special.binom(d,2)
            binom_3 = scipy.special.binom(d,3)
            binom_4 = scipy.special.binom(d,4)
            X=iterated_raw_statistics_GR_234_from_iterated_sample(iterated_data_sample,binom_2,binom_3,binom_4)
            pd.DataFrame(X).to_csv("Raw_Trunc_d_Romano_Siegel_234{}.csv".format((N,n,q)), header=False, index=False)
            for choice_scaling in [0,1]:
                A=scaled_statistics_GR_234_from_iterated(n,X,binom_2,binom_3,binom_4,choice_scaling)
                pd.DataFrame(A).to_csv("GR_234_Trunc_d_Romano_Siegel{}.csv".format((N,n,q,choice_scaling)), header=False, index=False)
    return 'done!'

############################################################### Section 4: Global variables

#start = time.time()

N = 500 # Number of iterations
n = 16
d = 4

#choice_scaling = 0 # 1 is finite variance, 0 is for asymptotic/theoretical scaling


### Pre-computations of the binomial coefficients 
binom_2 = scipy.special.binom(d,2)
binom_3 = scipy.special.binom(d,3)
binom_4 = scipy.special.binom(d,4)

### Pre-computations of the normalizing sequences
#variance_2 = ((n-2)*(n-2)*(n-1)*(8*n+1))/(32400*n*n*(n+1)*(n+1))
#scaling_finite_2 = 1/math.sqrt(variance_2*binom_2)
#scaling_2 = 90/math.sqrt(2*binom_2)
#variance_3 = (n-2)*(n-1)*(16*n - 96 + 359/n - 269/n**2 - 963/n**3 - 370/n**4)/(5832000*(n+1)**3) 
#scaling_finite_3 = 1/math.sqrt(variance_3*binom_3)
#scaling_3 = 90*math.sqrt(90)/math.sqrt(2*binom_3)
#scaling_4 = (90**2)/math.sqrt(2*binom_4)
#scaling_finite_4 = scaling_4

mean = 1

### Pre-sampling the intput data for Geisser-Mantel Numba Model
#p = max(math.floor(math.sqrt(2*d)),3)
#m = math.floor(p/2)
#G = npr.multivariate_normal(np.zeros(p),np.eye(p),size=p+m)
#G_N = npr.multivariate_normal(np.zeros(p),np.eye(p),size=(int(N),int(n),int(p+m)))
#G_n = npr.multivariate_normal(np.zeros(p),np.eye(p),size=(n,p+m))

### Pre-sampling the intput data for Marshall-Olkin Algorithm
#t = 1
#V = Stable_distrib_data(N,n,t)
                                  
                                  
### Pre-sampling the intput data for the inductive model
#initializing_normal_sample=npr.uniform(0,1,size=(N,n,2))

### Parameters for the Gaussian vector model
#tau = 0.1
#tau = 0.3
#tau = 0.7




    

############################################################### Section 5: Application

#all_data_exponential234(N,mean)
#all_data_Gaussian_Vector234_to_delete(N)
#all_data_Romano_Siegel234(N)
#all_data_Geisser_Mantel234(N)
#all_data_Gaussian_Vector234(N)
#all_data_inductive234(N)


########################### Section 5.1: Generating iterated data samples of size N*n*d
                                  
#p=int(max(math.floor(d/3),1))
#g = npr.normal(0,1,size=(N,n,3*p))
#iterated_data_sample = Trunc_d_Romano_Siegel_Numba_iterated_data(g,N,n,p)
#iterated_data_sample = npr.normal(size=(N,n,d))
iterated_data_sample = npr.exponential(1,size=(N,n,d))
#iterated_data_sample = npr.multivariate_normal(np.zeros(d),random_cov,size=(int(N),int(n),int(d)))
#iterated_data_sample = npr.multivariate_normal(np.zeros(d),toeplitz_cov,size=((N),int(n)))
#iterated_data_sample = inductive_model_iterated_numba(initializing_normal_sample,N,n,d)
#iterated_data_sample = Gumbel_Marshall_Olkin_iterated_numba(V,d,t)
#iterated_data_sample = Geisser_Mantel_iterated_numba(G_N,N,n,p,m)
#iterated_data_sample = iterated_gaussian_vector_sample(N,n,d,tau)


########################### Section 5.3: Computing the statistics from the iterated data samples

X=iterated_raw_statistics_GR_234_from_iterated_sample(iterated_data_sample,binom_2,binom_3,binom_4)
#A=scaled_statistics_GR_234_from_iterated(n,X,binom_2,binom_3,binom_4,choice_scaling)
                                  
########################### Section 5.4: Displaying (histogram) and converting into Panda files the computed statistics

#A_df = pd.DataFrame(A)   
#A_df.hist()

########################### Section 5.5: Storing in Excel files the iterated raw statistics N*2 or N*3 with column (S2,S3,S4)
                                  
#pd.DataFrame(X).to_csv("Raw_Normal_iid{}.csv".format((N,n,d)), header=False, index=False)
#pd.DataFrame(X).to_csv("Raw_Inductive{}.csv".format((N,n,d)), header=False, index=False)
#pd.DataFrame(X).to_csv("Raw_Random_Cov{}.csv".format((N,n,d)), header=False, index=False)
#pd.DataFrame(X).to_csv("Raw_Toeplitz{}.csv".format((N,n,d)), header=False, index=False)
#pd.DataFrame(X).to_csv("Raw_Gumbel{}.csv".format((N,n,d,t)), header=False, index=False)
#pd.DataFrame(X).to_csv("Raw_Geisser_Mantel{}.csv".format((N,n,p,m)), header=False, index=False)
#pd.DataFrame(X).to_csv("Raw_Gaussian_Vector{}.csv".format((N,n,d,tau)), header=False, index=False)
pd.DataFrame(X).to_csv("Raw_Exponential_iid{}.csv".format((N,n,d)), header=False, index=False)
#pd.DataFrame(X).to_csv("Raw_Romano{}.csv".format((N,n,d)), header=False, index=False)


########################### Section 5.6: Storing in Excel files the final statistics (rescaled raw)
                                  
#pd.DataFrame(A).to_csv("GR_23_inductive{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Gumbel_Marshall_Olkin{}.csv".format((N,n,d,t,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Geisser_Mantel{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Gaussian_vector{}.csv".format((N,n,d,tau,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Exponential_iid{}.csv".format((N,n,d,mean,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Random_Cov_Gauss{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Toeplitz_Cov_Gauss{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Tan{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_23_Trunc_d_Romano_Siegel{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)

#pd.DataFrame(A).to_csv("GR_234_inductive{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Gumbel_Marshall_Olkin{}.csv".format((N,n,d,t,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Geisser_Mantel{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Gaussian_vector{}.csv".format((N,n,d,tau,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Exponential_iid{}.csv".format((N,n,d,mean,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Random_Cov_Gauss{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Toeplitz_Cov_Gauss{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Tan{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)
#pd.DataFrame(A).to_csv("GR_234_Trunc_d_Romano_Siegel{}.csv".format((N,n,d,choice_scaling)), header=False, index=False)


#end = time.time()
#print("Time: ", end-start)

In [27]:
#%%timeit
#%%timeit -n 1 -r 1 # time cost for 1 run with 1 loop

############################################################### Packages

import numpy as np
import numpy.random as npr
from scipy.stats import levy_stable  
from sklearn.covariance import empirical_covariance
import matplotlib.pyplot as plt
#import random
import math 
import pandas as pd
import scipy.special
#from scipy.stats import rankdata
import numba
import timeit
import time
import warnings
import psutil
import array_to_latex as a2l


#################################################### Miscellenous: time measure and warning filter
#%%timeit
#%%timeit -n 1 -r 1 # time cost for 1 run with 1 loop
warnings.filterwarnings('ignore')
npr.seed(0)
######################################################### Computing the powers
def power(A,x):
    B=A[A>x]
    return B.size/A.shape[0]
######################################################### OLD VERSIONS
def power_for_each_columns_23(A):
    X=np.empty((1,3))
    for i in range(2):
        X[0,i]=power(A[:,i],1.645) # 95-percentile of a N(0,1)
    X[0,2]=power((1/np.sqrt(2))*(A[:,2]),1.645) 
    return X
def power_for_each_columns_234(A):
    X=np.empty((1,5))
    for i in range(3):
        X[0,i]=power(A[:,i],1.645) # 95-percentile of a N(0,1)
    X[0,3]=power((1/np.sqrt(2))*(A[:,3]),1.645) 
    X[0,4]=power((1/np.sqrt(3))*(A[:,0]+A[:,1]+A[:,2]),1.645) 
    return X
####################################################### NEW VERSION
def power_for_each_columns_234_bis(A,choice_scaling):
    if choice_scaling == 0:
        X=np.empty((1,5))
        for i in range(3):
            X[0,i]=power(A[:,i],1.645) # 95-percentile of a N(0,1)
        X[0,3]=power((1/np.sqrt(2))*(A[:,3]),1.645) 
        X[0,4]=power((1/np.sqrt(3))*(A[:,4]),1.645)
    elif choice_scaling == 1:
        X=np.empty((1,3))
        for i in range(2):
            X[0,i]=power(A[:,i],1.645) # 95-percentile of a N(0,1)
        X[0,2]=power((1/np.sqrt(2))*(A[:,0]+A[:,1]),1.645) 
    return X
######################################################### OLD VERSIONS
def total_power_Exponential_iid_23(N,mean,choice_scaling):
    X=np.empty((4,3,6))
    for j in range(3):
        for n in [16, 32, 64, 128]:
            for d in [4,8,16,32,64,128]:
                d2=int(math.log(d)/math.log(2))-2
                n2=int(math.log(n)/math.log(2))-4
                A=pd.read_csv('GR_23_Exponential_iid{}.csv'.format((N,n,d,mean,choice_scaling)),header=None)
                A=pd.DataFrame(A).to_numpy()
                X[n2,j,d2]=power_for_each_columns_23(A)[0,j]
    A_1=X[0,:,:]
    B=X[1,:,:]
    C=X[2,:,:]
    D=X[3,:,:]
    out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
    pd.DataFrame(out).to_csv("Tabular_Exponential_iid_23{}.csv".format((N,mean,choice_scaling)), header=False, index=False)
    return out 
def total_power_Exponential_iid_234_old(N,mean,choice_scaling):
    X=np.empty((4,5,6))
    gr=1
    for j in range(5):
        for n in [16, 32, 64, 128]:
            for d in [4,8,16,32,64,128]:
                d2=int(math.log(d)/math.log(2))-2
                n2=int(math.log(n)/math.log(2))-4
                A=pd.read_csv('GR_234_Exponential_iid{}.csv'.format((N,n,d,mean,choice_scaling,gr)),header=None)
                A=pd.DataFrame(A).to_numpy()
                X[n2,j,d2]=power_for_each_columns_234(A)[0,j]
    A_1=X[0,:,:]
    B=X[1,:,:]
    C=X[2,:,:]
    D=X[3,:,:]
    out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
    pd.DataFrame(out).to_csv("Tabular_Exponential_iid_234_old{}.csv".format((N,mean,choice_scaling,gr)), header=False, index=False)
    return out 
##################################################### NEW VERSIONS
def total_power_Exponential_iid_234(N,mean,choice_scaling):
    if choice_scaling == 0:
        X=np.empty((4,5,7))
        for j in range(5):
            for n in [16, 32, 64, 128]:
                for d in [4,8,16,32,64,128,256]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Exponential_iid{}.csv'.format((N,n,d,mean,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Exponential_iid_234{}.csv".format((N,mean,choice_scaling)), header=False, index=False)
    elif choice_scaling == 1:
        X=np.empty((4,3,7))
        for j in range(3):
            for n in [16, 32, 64, 128]:
                for d in [4,8,16,32,64,128,256]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Exponential_iid{}.csv'.format((N,n,d,mean,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Exponential_iid_234{}.csv".format((N,mean,choice_scaling)), header=False, index=False)
    return out


def total_power_Gaussian_vector_234(N,tau,choice_scaling):
    if choice_scaling == 0:
        X=np.empty((4,5,7))
        for j in range(5):
            for n in [16, 32, 64, 128]:
                for d in [4,8,16,32,64,128,256]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Gaussian_vector{}.csv'.format((N,n,d,tau,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Gaussian_Vector_234{}.csv".format((N,tau,choice_scaling)), header=False, index=False)
    elif choice_scaling == 1:
        X=np.empty((4,3,7))
        for j in range(3):
            for n in [16, 32, 64, 128]:
                for d in [4,8,16,32,64,128,256]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Gaussian_vector{}.csv'.format((N,n,d,tau,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Gaussian_Vector_234{}.csv".format((N,tau,choice_scaling)), header=False, index=False)
    return out

def total_power_inductive_234(N,choice_scaling):
    if choice_scaling == 0:
        X=np.empty((4,5,7))
        for j in range(5):
            for n in [16, 32, 64, 128]:
                for d in [4,8,16,32,64,128,256]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_inductive{}.csv'.format((N,n,d,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Inductive_234{}.csv".format((N,choice_scaling)), header=False, index=False)
    elif choice_scaling == 1:
        X=np.empty((4,3,7))
        for j in range(3):
            for n in [16, 32, 64, 128]:
                for d in [4,8,16,32,64,128,256]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_inductive{}.csv'.format((N,n,d,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Inductive_234{}.csv".format((N,choice_scaling)), header=False, index=False)
    return out

def total_power_Geisser_Mantel_234(N,choice_scaling):
    if choice_scaling == 0:
        X=np.empty((4,5,7))
        for j in range(5):
            for n in [16, 32, 64, 128]:
                for d in [3,6,10,28,55,120,231]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Geisser_Mantel{}.csv'.format((N,n,d,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Geisser_Mantel_234{}.csv".format((N,choice_scaling)), header=False, index=False)
    elif choice_scaling == 1:
        X=np.empty((4,3,7))
        for j in range(3):
            for n in [16, 32, 64, 128]:
                for d in [3,6,10,28,55,120,231]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Geisser_Mantel{}.csv'.format((N,n,d,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Geisser_Mantel_234{}.csv".format((N,choice_scaling)), header=False, index=False)
    return out

def total_power_Romano_Siegel_234(N,choice_scaling):
    if choice_scaling == 0:
        X=np.empty((4,5,7))
        for j in range(5):
            for n in [16, 32, 64, 128]:
                for d in [3,6,15,30,63,126,255]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Trunc_d_Romano_Siegel{}.csv'.format((N,n,d,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Romano_Siegel_234{}.csv".format((N,choice_scaling)), header=False, index=False)
    elif choice_scaling == 1:
        X=np.empty((4,3,7))
        for j in range(3):
            for n in [16, 32, 64, 128]:
                for d in [3,6,15,30,63,126,255]:
                    d2=int(math.log(d)/math.log(2))-2
                    n2=int(math.log(n)/math.log(2))-4
                    A=pd.read_csv('GR_234_Trunc_d_Romano_Siegel{}.csv'.format((N,n,d,choice_scaling)),header=None)
                    A=pd.DataFrame(A).to_numpy()
                    X[n2,j,d2]=power_for_each_columns_234_bis(A,choice_scaling)[0,j]
        A_1=X[0,:,:]
        B=X[1,:,:]
        C=X[2,:,:]
        D=X[3,:,:]
        out = np.concatenate((np.concatenate((np.concatenate((A_1,B),axis=0),C),axis=0),D),axis=0)
        pd.DataFrame(out).to_csv("Tabular_Romano_Siegel_234{}.csv".format((N,choice_scaling)), header=False, index=False)
    return out


N = 500
mean = 1
choice_scaling = 1
tau1 = 0.1
tau2 = 0.3
tau3 = 0.7
#total_power_Exponential_iid_23(N,mean,choice_scaling)
#total_power_Exponential_iid_234_old(N,mean,choice_scaling)
total_power_Exponential_iid_234(N,mean,choice_scaling)
total_power_Gaussian_vector_234(N,tau1,choice_scaling)
total_power_Gaussian_vector_234(N,tau2,choice_scaling)
total_power_Gaussian_vector_234(N,tau3,choice_scaling)
total_power_inductive_234(N,choice_scaling)
total_power_Geisser_Mantel_234(N,choice_scaling)
total_power_Romano_Siegel_234(N,choice_scaling)



array([[0.036, 0.068, 0.052, 0.046, 0.04 , 0.07 , 0.028],
       [0.166, 0.234, 0.264, 0.294, 0.324, 0.338, 0.534],
       [0.126, 0.132, 0.15 , 0.2  , 0.242, 0.276, 0.224],
       [0.022, 0.048, 0.05 , 0.036, 0.042, 0.054, 0.024],
       [0.824, 0.576, 0.398, 0.342, 0.362, 0.336, 0.988],
       [0.458, 0.326, 0.222, 0.204, 0.242, 0.244, 0.982],
       [0.022, 0.052, 0.036, 0.038, 0.046, 0.068, 0.042],
       [1.   , 1.   , 0.848, 0.638, 0.462, 0.372, 1.   ],
       [1.   , 0.904, 0.606, 0.416, 0.308, 0.266, 1.   ],
       [0.008, 0.036, 0.042, 0.04 , 0.05 , 0.048, 0.034],
       [1.   , 1.   , 1.   , 0.992, 0.856, 0.624, 1.   ],
       [1.   , 1.   , 0.992, 0.918, 0.668, 0.464, 1.   ]])

In [None]:
A=npr.randint(5,size=(5,3))
print(A)
print(building_col_sum_234(A))