# Simulation Studies

In [1]:
import sys
sys.path.insert(0,'..')
from dfply import *
from grass_DR import *
import scipy.io
import matplotlib.pyplot as plt
from compute_centroid import *
from PGA import *
from PNG import *
import torch
import pandas as pd
from plotnine import *
from joblib import Parallel, delayed
import multiprocessing
import numpy as np

## 1. Comparison between geodesic distance and projection distance

In [2]:
def exp1(k, s_vec):
    N, n, m, p = 50, 10, 3, 1

    v_ratio_dist = pd.DataFrame(columns=['rep', 'sig', 'method', 'var_ratio'])
    
    gr_low = Grassmann(m, p)
    gr = Grassmann(n, p)
    gr_map = Grassmann(n, m)
    
    for j, sig in enumerate(s_vec):
        np.random.seed(12345 + j + k)
        X_low = np.array([gr_low.rand() for i in range(N)]) # N x m x p
        W = gr_map.rand() # n x m
        X_ = np.array([np.matmul(W, X_low[i]) for i in range(N)]) # N x n x p
        X = np.array([gr.exp(X_[i], sig * gr.randvec(X_[i])) for i in range(N)]) # perturb the emdedded X
        
        FM_X = compute_centroid(gr, X)
        var_X = var(gr, X, FM_X)
        
        X_low_proj, _, _ = NG_dr(X, m, verbosity = 0)
        X_low_geod, _, _ = NG_dr_geod(X, m, verbosity = 0)
        
        FM_proj = compute_centroid(gr_low, X_low_proj)
        var_proj = var(gr_low, X_low_proj, FM_proj)
        
        FM_geod = compute_centroid(gr_low, X_low_geod)
        var_geod = var(gr_low, X_low_geod, FM_geod)
        
        v_ratio_dist = v_ratio_dist.append({'rep':k,
                            'sig':sig, 
                            'method':'geod',
                            'var_ratio':var_geod/var_X}, ignore_index = True)
        v_ratio_dist = v_ratio_dist.append({'rep':k,
                            'sig':sig, 
                            'method':'proj',
                            'var_ratio':var_proj/var_X}, ignore_index = True)
        
    return v_ratio_dist

In [3]:
repetition = 100
s_vec = np.linspace(0.5, 2, 20)

In [None]:
num_cores = multiprocessing.cpu_count()
    
tmp = Parallel(n_jobs=num_cores)(delayed(exp1)(k, s_vec) for k in range(repetition))

v_ratio_dist = tmp[0]
for i in range(1,repetition):
    v_ratio_dist = v_ratio_dist.append(tmp[i],ignore_index=True)

np.save('v_ratio_dist.npy', {'v_ratio_dist':v_ratio_dist})

## 2. Comparison of PNG and PGA

In [34]:
def exp2(k, s_vec):
    N, n, m, p = 50, 10, 5, 2
    n_c = 5

    v_ratio = pd.DataFrame(columns=['rep', 'sig', 'method', 'component', 'var_ratio'])
    gr_low = Grassmann(m, p, N)
    gr = Grassmann(n, p)
    gr_map = Grassmann(n, m)
    
    for j, sig in enumerate(s_vec):
        np.random.seed(12345 + j + k)
        X_low = gr_low.rand() # N x m x p
        A = gr_map.rand() # n x m
        #B = np.random.normal(0, 0.1, (n, p)) # n x p
        B = np.zeros((n,p))
        AAT = np.matmul(A, A.T) 
        IAATB = np.matmul(np.eye(n) - AAT, B)
        X_ = np.array([np.linalg.qr(np.matmul(A, X_low[i]) + IAATB)[0] for i in range(N)]) # N x n x p
        X = np.array([gr.exp(X_[i], sig * gr.randvec(X_[i])) for i in range(N)]) # perturb the emdedded X
        
        scores_PNG = PNG(X, log = False, verbosity = 0)
        
        png = PCA(n_components = n_c)
        png.fit(scores_PNG)
        for l in range(n_c):
            v_ratio = v_ratio.append({'rep':k,
                            'sig':sig, 
                            'method':'PNG',
                            'component':l+1, 
                            'var_ratio':png.explained_variance_ratio_[l]}, ignore_index = True)
    
        pga = PGA(X, n_c, gr)
        for l in range(n_c):
            v_ratio = v_ratio.append({'rep':k,
                            'sig':sig, 
                            'method':'PGA',
                            'component':l+1, 
                            'var_ratio':pga.explained_variance_ratio_[l]}, ignore_index = True)
            
    return v_ratio
    

In [35]:
repetition = 100
s_vec = np.array([0.01, 0.05,0.1, 0.5])

In [None]:
num_cores = multiprocessing.cpu_count()
    
tmp = Parallel(n_jobs=num_cores)(delayed(exp2)(k, s_vec) for k in range(repetition))

v_ratio = tmp[0]
for i in range(1,repetition):
    v_ratio = v_ratio.append(tmp[i],ignore_index=True)

In [None]:
np.save('v_ratio.npy', {'v_ratio':v_ratio})