# Simulation Studies

In [3]:
import sys
sys.path.insert(0,'..')
from dfply import *
from grass_DR import *
import scipy.io
import matplotlib.pyplot as plt
from pymanopt.optimizers.nelder_mead import compute_centroid
from PGA import *
from PNG import *
import torch
import pandas as pd
from plotnine import *
from joblib import Parallel, delayed
import multiprocessing
import numpy as np

## 1. Comparison between geodesic distance and projection distance

In [10]:
def exp1(k, s_vec):
    N, n, m, p = 50, 10, 3, 1

    v_ratio_dist = pd.DataFrame(columns=['rep', 'sig', 'method', 'var_ratio'])
    
    gr_low = Grassmann(m, p)
    gr = Grassmann(n, p)
    gr_map = Grassmann(n, m)
    
    for j, sig in enumerate(s_vec):
        np.random.seed(12345 + j + k)
        X_low = np.array([gr_low.random_point() for i in range(N)]) # N x m x p
        W = gr_map.random_point() # n x m
        X_ = np.array([np.matmul(W, X_low[i]) for i in range(N)]) # N x n x p
        X = np.array([gr.exp(X_[i], sig * gr.random_tangent_vector(X_[i])) for i in range(N)]) # perturb the emdedded X
        
        FM_X = compute_centroid(gr, X)
        var_X = var(gr, X, FM_X)
        
        X_low_proj, _, _ = NG_dr(X, m, verbosity = 0)
        X_low_geod, _, _ = NG_dr_geod(X, m, verbosity = 0)
        
        FM_proj = compute_centroid(gr_low, X_low_proj)
        var_proj = var(gr_low, X_low_proj, FM_proj)
        
        FM_geod = compute_centroid(gr_low, X_low_geod)
        var_geod = var(gr_low, X_low_geod, FM_geod)
        
        v_ratio_dist = v_ratio_dist.append({'rep':k,
                            'sig':sig, 
                            'method':'geod',
                            'var_ratio':var_geod/var_X}, ignore_index = True)
        v_ratio_dist = v_ratio_dist.append({'rep':k,
                            'sig':sig, 
                            'method':'proj',
                            'var_ratio':var_proj/var_X}, ignore_index = True)
        
    return v_ratio_dist

In [18]:
repetition = 1
s_vec = np.linspace(0.5, 2, 20)

In [20]:
num_cores = multiprocessing.cpu_count()
    
tmp = Parallel(n_jobs=num_cores)(delayed(exp1)(k, s_vec) for k in range(repetition))

v_ratio_dist = tmp[0]
for i in range(1,repetition):
    v_ratio_dist = v_ratio_dist.append(tmp[i],ignore_index=True)

#np.save('v_ratio_dist.npy', {'v_ratio_dist':v_ratio_dist})

Optimizing...
Iteration    Cost                       Gradient norm     
---------    -----------------------    --------------    
   1         +9.3050587263623941e-01    1.39353651e-01    
   2         +7.3753672895481259e-01    2.92254637e-01    
   3         +7.1002798856517002e-01    5.09575555e-01    
   4         +6.4585763435168664e-01    1.56525034e-01    
   5         +6.3657999869097104e-01    3.35729218e-01    
   6         +6.2983874903004611e-01    2.43867735e-01    
   7         +6.2366602889626677e-01    9.31137477e-02    
   8         +6.2182561994623864e-01    1.94535556e-01    
   9         +6.1870794366011905e-01    9.19875574e-02    
  10         +6.1731782412410219e-01    1.19948192e-01    
  11         +6.1716479065911067e-01    1.97824522e-01    
  12         +6.1658370735273105e-01    1.77896818e-01    
  13         +6.1480471438520590e-01    1.00499547e-01    
  14         +6.1390370192603472e-01    1.46676366e-01    
  15         +6.1220010340661679e-01    8.

KeyboardInterrupt: 

## 2. Comparison of PNG and PGA

In [26]:
def exp2(k, s_vec):
    N, n, m, p = 50, 10, 5, 2
    n_c = 5

    v_ratio = pd.DataFrame(columns=['rep', 'sig', 'method', 'component', 'var_ratio'])
    gr_low = Grassmann(m, p, k = N)
    gr = Grassmann(n, p)
    gr_map = Grassmann(n, m)
    
    for j, sig in enumerate(s_vec):
        np.random.seed(12345 + j + k)
        X_low = gr_low.random_point() # N x m x p
        A = gr_map.random_point() # n x m
        #B = np.random.normal(0, 0.1, (n, p)) # n x p
        B = np.zeros((n,p))
        AAT = np.matmul(A, A.T) 
        IAATB = np.matmul(np.eye(n) - AAT, B)
        X_ = np.array([np.linalg.qr(np.matmul(A, X_low[i]) + IAATB)[0] for i in range(N)]) # N x n x p
        X = np.array([gr.exp(X_[i], sig * gr.random_tangent_vector(X_[i])) for i in range(N)]) # perturb the emdedded X
        
        scores_PNG = PNG(X, log = False, verbosity = 0)
        
        png = PCA(n_components = n_c)
        png.fit(scores_PNG)
        for l in range(n_c):
            v_ratio = v_ratio.append({'rep':k,
                            'sig':sig, 
                            'method':'PNG',
                            'component':l+1, 
                            'var_ratio':png.explained_variance_ratio_[l]}, ignore_index = True)
    
        pga = PGA(X, n_c, gr)
        for l in range(n_c):
            v_ratio = v_ratio.append({'rep':k,
                            'sig':sig, 
                            'method':'PGA',
                            'component':l+1, 
                            'var_ratio':pga.explained_variance_ratio_[l]}, ignore_index = True)
            
    return v_ratio
    

In [22]:
repetition = 1
s_vec = np.array([0.01, 0.05,0.1, 0.5])

In [27]:
num_cores = multiprocessing.cpu_count()
    
tmp = Parallel(n_jobs=num_cores)(delayed(exp2)(k, s_vec) for k in range(repetition))

v_ratio = tmp[0]
for i in range(1,repetition):
    v_ratio = v_ratio.append(tmp[i],ignore_index=True)

Optimizing...
Iteration    Cost                       Gradient norm     
---------    -----------------------    --------------    
   1         +4.4689739008069540e-01    4.27115400e-01    
   2         +2.9619863489112025e-01    4.92579637e-01    
   3         +2.1882314382138043e-01    4.37965968e-01    
   4         +1.3675601933673270e-01    2.78922103e-01    
   5         +8.5481890303470462e-02    5.08265138e-01    
   6         +4.6250587788228631e-02    3.81186759e-01    
   7         +1.0530411934136345e-02    1.18555049e-01    
   8         +7.2026183696502445e-03    1.63203717e-01    
   9         +1.4255848264008936e-03    4.46359414e-02    
  10         +7.1923148247006994e-04    3.23860468e-02    
  11         +3.6819682236334170e-04    2.18578585e-02    
  12         +1.9313880376130136e-04    1.61740248e-02    
  13         +1.8928159638297625e-04    2.69837968e-02    
  14         +1.7423772146470663e-04    2.56381925e-02    
  15         +1.2053131734284784e-04    2.

KeyboardInterrupt: 

In [None]:
np.save('v_ratio.npy', {'v_ratio':v_ratio})