# Examples of planar shapes

In [1]:
import sys
sys.path.insert(0,'..')
from pymanopt.manifolds import ComplexGrassmann
import numpy as np
from PNG import *
from PGA import *
from Complex_PCA import *
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from shape_transform import *
import rpy2.robjects as robjects
import pandas as pd
from plotnine import *
from joblib import Parallel, delayed
import multiprocessing

In [2]:
def example(data, n_c, **kwargs):
    X = np.load(data)
    X = X.transpose((1,2,0))
    X = shape_transform(X)
    
    scores_PNG = PNG(X, **kwargs)
    
    n_c = 5
    png = Complex_PCA(n_components = n_c)
    png.fit(scores_PNG)
    #cpca = PCA(n_components = n_c)
    #cpca.fit(np.hstack((scores_PNG.real, scores_PNG.imag)))

    
    N, n, p = X.shape
    man = ComplexGrassmann(n, p)
    pga, X_trans = Complex_PGA(X, n_c, man)
    
    #FM = compute_centroid(man, X)
    #logX = np.zeros(X.shape, dtype = X.dtype)
    #logX = logX.reshape((N, -1))
    #for i in range(N):
    #    logX[i] = man.log(FM, X[i]).reshape(-1)
    
    #cpga = PCA(n_components = n_c)
    #cpga.fit(np.hstack((logX.real, logX.imag)))

    
    pnss_ratio = robjects.r('''
                         library(shapes)
                         sink("/dev/null")
                         out <- pnss3d({}, n.pc = {})
                         sink()
                         out$percent   
                         '''.format(data.replace('npy','dat'), 2*n_c))
    
    res = pd.DataFrame(columns=['data', 'method', 'component', 'var_ratio'])
    for l in range(n_c):
        res = res.append({'data':data.replace('.npy',''),
                        'method':'PNG',
                        'component':l+1, 
                        'var_ratio':np.cumsum(png.explained_variance_ratio_)[l]*100}, ignore_index = True)
        res = res.append({'data':data.replace('.npy',''),
                        'method':'PGA',
                        'component':l+1, 
                        'var_ratio':np.cumsum(pga.explained_variance_ratio_)[l]*100}, ignore_index = True)
        res = res.append({'data':data.replace('.npy',''),
                        'method':'PNSS',
                        'component':l+1, 
                        'var_ratio':np.cumsum(pnss_ratio)[np.arange(1, 2*n_c, 2)][l]}, ignore_index = True)
    
    '''
    plt.rcParams.update({'font.size': 16})
    plt.plot(np.linspace(1,n_c, n_c), np.cumsum(cpca.explained_variance_ratio_)*100, '-', linewidth=3)
    plt.plot(np.linspace(1,n_c, n_c), np.cumsum(cpga.explained_variance_ratio_)*100, '--', linewidth=3)
    plt.plot(np.linspace(1,n_c, n_c), np.cumsum(pnss_ratio)[np.arange(1, 2*n_c, 2)], '-.', linewidth=3)
    plt.xlabel('Principal Components')
    plt.ylabel('Cum. Var. (%)')
    plt.legend(['PNG', 'PGA', 'PNSS'], loc='lower right')
    plt.title('Dataset: '+'{}'.format(data.replace('.npy', '')))
    plt.tight_layout()
    plt.savefig(data.replace('npy', 'pdf'))
    plt.show()
    '''
    return res
    
    

In [None]:
data = ['digit3.npy', 'gorf.npy', 'gorm.npy']
n_c = 5

num_cores = multiprocessing.cpu_count()
tmp = Parallel(n_jobs=num_cores)(delayed(example)(d, n_c) for d in data for i in range(10))

v_ratio = tmp[0]
for i in range(1,len(tmp)):
    v_ratio = v_ratio.append(tmp[i],ignore_index=True)
    
v_ratio_sum = v_ratio >> group_by('data', 'method', 'component') \
    >> summarize(mean_ratio = X.var_ratio.mean(), sd_ratio = X.var_ratio.std())

In [None]:
p = ggplot(v_ratio_sum) + \
    facet_grid('. ~ data') + \
    aes(x = 'component', y = 'mean_ratio', color = 'method') + \
    geom_line(aes(group = 'method'), size = 1) + \
    labs(y = 'Cum. Var. Ratio (%)', x = 'Principal Components', color = '') + \
    scale_color_manual(values = ['red', 'blue', 'green'])

ggsave(plot=p, filename='shape_var_ratio.png', width = 30, height = 10, units = 'cm', dpi = 320)
