In [1]:
import numpy as np
import time
import elpigraph
#import elpigraphgpu # If you have a GPU. Requires Cupy
import matplotlib.pyplot as plt
from copy import deepcopy
import rpy2.robjects.packages as rpackages
import rpy2.robjects
import rpy2.robjects.numpy2ri
import rpy2.robjects.pandas2ri
r_elpigraph = rpackages.importr("ElPiGraph.R")
rpy2.robjects.numpy2ri.activate()
rpy2.robjects.pandas2ri.activate()
plt.style.use('seaborn')
np.random.seed(0)

# I - Checking output
### Step 1 :  generate output for R and Python

In [5]:
# Load example data
X =  np.genfromtxt('data/tree_data.csv', delimiter=',')

# Create desired list of inputs for R and Python
input_data = [X]*5
epg_n_nodes = [20,30,35,20,20]
epg_lambda = [.1,.02,.7,.03,.08]
epg_mu = [.02,.07,.01,.04,.06]
epg_trimmingradius = [float('inf'),.7,.8,.6,.5]
epg_finalenergy = ['Penalized','Base','Penalized','Base','Base']
epg_alpha = [.01,.03,.05,.08,.04]
epg_beta = [.03,.02,.04,.07,.01]
epg_mode = [2,1,1,2,1]
epg_n_processes = [1,2,1,2,1]
epg_collapse_mode = ['PointNumber','PointNumber_Extrema','PointNumber_Leaves','EdgesNumber','EdgesLength']
epg_collapse_par = [5,7,4,6,3]
epg_maxsteps = [float('inf'),1000,100,20,200]
                                  # Python uses WeightedCentroid not Weigthed (corrected typo)
epg_ext_mode =   ['QuantDists','QuantCentroid','WeightedCentroid','QuantCentroid','WeightedCentroid']
r_epg_ext_mode = ['QuantDists','QuantCentroid','WeigthedCentroid','QuantCentroid','WeigthedCentroid'] 
epg_ext_par = [.5,.6,.8,.9,.5]
epg_shift_mode = ['NodeDensity','NodePoints','NodeDensity','NodePoints','NodeDensity']
epg_shift_radius = [0.05,0.07,0.04,0.08,0.03]
epg_shift_max = [5,7,4,8,6]



# Results storage Python
epg_main = []
epg_obj_collapse = []
epg_obj_shift = []
epg_obj_extend = []
epg_obj_fineTune = []
# Results storage R
r_epg_main = []
r_epg_obj_collapse = []
r_epg_obj_shift = []
r_epg_obj_extend = []
r_epg_obj_fineTune = []

for i in range(len(input_data)):
    
    ############################ Run functions, Python version ###################################
    
    epg_main.append(elpigraph.computeElasticPrincipalTree(X=input_data[i],NumNodes = epg_n_nodes[i], 
                                                          Lambda=epg_lambda[i], Mu=epg_mu[i],
                                                          TrimmingRadius = epg_trimmingradius[i],
                                                          FinalEnergy = epg_finalenergy[i],
                                                          alpha = epg_alpha[i],
                                                          beta = epg_beta[i],                                                    
                                                          Do_PCA=False,CenterData=False,
                                                          n_cores = epg_n_processes[i],
                                                          nReps=1,
                                                          EmbPointProb=1.0,
                                                          drawPCAView=False,
                                                          Mode = epg_mode[i],
                                                          MaxSteps = epg_maxsteps[i])[0])
    
    # util functions input
    epg_obj = epg_main[i]
    init_nodes_pos = epg_obj['NodePositions']
    init_edges = epg_obj['Edges'][0]
    #########################################
    try:
        epg_obj_collapse.append(elpigraph.CollapseBranches(X = input_data[i], PG = epg_obj, Mode = epg_collapse_mode[i], ControlPar = epg_collapse_par[i]))
    except:
        epg_obj_collapse.append('bug')

    epg_obj_shift.append(elpigraph.ShiftBranching(X = input_data[i], 
                                                  PG = epg_obj, 
                                                  TrimmingRadius = epg_trimmingradius[i],                       
                                                  SelectionMode = epg_shift_mode[i], 
                                                  DensityRadius = epg_shift_radius[i],
                                                  MaxShift = epg_shift_max[i]))
    
    epg_obj_extend.append(elpigraph.ExtendLeaves(X = input_data[i], 
                                                 PG = epg_obj,
                                                 TrimmingRadius = epg_trimmingradius[i],
                                                 Mode = epg_ext_mode[i], 
                                                 ControlPar = epg_ext_par[i],
                                                 PlotSelected = False,
                                                 DoSA_maxiter=4000)) #number of iterations for simulated annealing
    
    epg_obj_fineTune.append(elpigraph.fineTuneBR(X=input_data[i],
                                                MaxSteps = epg_maxsteps[i],
                                                Mode = 2,
                                                NumNodes = epg_n_nodes[i], 
                                                InitNodePositions = init_nodes_pos,
                                                InitEdges=init_edges,
                                                Lambda=epg_lambda[i], Mu=epg_mu[i],
                                                TrimmingRadius= epg_trimmingradius[i],
                                                FinalEnergy = epg_finalenergy[i],
                                                alpha = epg_alpha[i],
                                                beta = epg_beta[i],                                                    
                                                Do_PCA=False,CenterData=False,
                                                drawAccuracyComplexity = False, drawEnergy = False,drawPCAView = False,
                                                n_cores = epg_n_processes[i],
                                                nReps=1,
                                                ProbPoint=1.0)[0])
    
    ############################ Run functions, R version ###################################

    tmp = r_elpigraph.computeElasticPrincipalTree(X=input_data[i],NumNodes = epg_n_nodes[i], 
                                                  Lambda=epg_lambda[i], Mu=epg_mu[i],
                                                  TrimmingRadius= epg_trimmingradius[i],
                                                  FinalEnergy = epg_finalenergy[i],
                                                  alpha = epg_alpha[i],
                                                  beta = epg_beta[i],                                                    
                                                  Do_PCA=False,CenterData=False,
                                                  n_cores = epg_n_processes[i],
                                                  nReps=1,
                                                  ProbPoint=1.0,
                                                  drawPCAView=False,
                                                  Mode = epg_mode[i],
                                                  MaxSteps = epg_maxsteps[i])[0]
    r_epg_main.append(dict(zip(tmp.names, map(list,np.array(tmp))))) # Convert R result to dict format
    
    # util functions input
    r_epg_obj = tmp
    init_nodes_pos = r_epg_obj[0]
    init_edges = r_epg_obj[1][0]
    #########################################
    obj_collapse = deepcopy(r_epg_obj)
    r_epg_obj_collapse.append(r_elpigraph.CollapseBrances(X = input_data[i], TargetPG = obj_collapse, Mode = epg_collapse_mode, ControlPar = epg_collapse_par))

    r_epg_obj_shift.append(r_elpigraph.ShiftBranching(X = input_data[i], 
                                                      TargetPG = r_epg_obj, 
                                                      TrimmingRadius = epg_trimmingradius[i],                       
                                                      SelectionMode = epg_shift_mode[i], 
                                                      DensityRadius = epg_shift_radius[i],
                                                      MaxShift = epg_shift_max[i]))
    
    obj_extend = deepcopy(r_epg_obj)
    tmp_ext = r_elpigraph.ExtendLeaves(X = input_data[i], 
                                       TargetPG = obj_extend,
                                       TrimmingRadius = epg_trimmingradius[i],
                                       Mode = r_epg_ext_mode[i], 
                                       ControlPar = epg_ext_par[i],
                                       PlotSelected = False)
    r_epg_obj_extend.append(dict(zip(tmp_ext.names, map(list,np.array(tmp_ext)))))
    
    tmp_fineTune = r_elpigraph.fineTuneBR(X=input_data[i],
                                          MaxSteps = epg_maxsteps[i],
                                          Mode = 2,
                                          NumNodes = epg_n_nodes[i], 
                                          InitNodePositions = init_nodes_pos,
                                          InitEdges=init_edges,
                                          Lambda=epg_lambda[i], Mu=epg_mu[i],
                                          TrimmingRadius= epg_trimmingradius[i],
                                          FinalEnergy = epg_finalenergy[i],
                                          alpha = epg_alpha[i],
                                          beta = epg_beta[i],                                                    
                                          Do_PCA=False,CenterData=False,
                                          drawAccuracyComplexity = False, drawEnergy = False,drawPCAView = False,
                                          n_cores = epg_n_processes[i],
                                          nReps=1,
                                          ProbPoint=1.0)[0]
    r_epg_obj_fineTune.append(dict(zip(tmp_fineTune.names, map(list,np.array(tmp_fineTune)))))

Generating the initial configuration
Creating a chain in the 1st PC with 2 nodes
90% of the points have been used as initial conditions. Resetting.
Constructing tree 1 of 1 / Subset 1 of 1
The elastic matrix is being used. Edge configuration will be ignored
Computing EPG with  20  nodes on  492  points and  3  dimensions
Nodes =  2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 

BARCODE	ENERGY	NNODES	NEDGES	NRIBS	NSTARS	NRAYS	NRAYS2	MSE	MSEP	FVE	FVEP	UE	UR	URN	URN2	URSD

2||20	0.0841	20	19	14	2	0	0	0.0357	0.0343	0.9337	0.9363	0.0475	0.0008	0.0165	0.3296	0


MSDEnergyPlot not yet implemented
accuracyComplexityPlot not yet implemented
0.7122  seconds elapsed
Removing the terminal branch with nodes: [14 17]
Moving the branching point at node 0
Moving the branching point at node 14
Performing simulated annealing. This may take a while
Performing simulated annealing. This may take a while
Performing simulated annealing. This may take a while
Performing simulated annealing. This may take a whi

### Step 2 : check final output : computePrincipalTree, ExtendLeaves, fineTuneBR
#### For dict keys in (NodePositions, Edges, FinalReport, ElasticMatrix) -> prints key, iteration index, function if a difference is found in the result dictionary

In [4]:
funcs = ['computeElasticPrincipalTree','ExtendLeaves','fineTuneBR']
j = 0  #funcs index
for res_py,res_R in [(epg_main,r_epg_main),(epg_obj_extend,r_epg_obj_extend),(epg_obj_fineTune,r_epg_obj_fineTune)]: # for func in funcs
    for i in range(len(input_data)): # check each set of output
        one_res_py = res_py[i]
        one_res_R = res_R[i]
        
        for key in one_res_py:
            if key == 'NodePositions':
                try: assert np.allclose(one_res_py[key], one_res_R[key])

                except: print(key,i,funcs[j])

            if key == 'Edges':
                try: assert all(map(lambda x:np.all(x),[one_res_py[key][0]==(one_res_R[key][0]-1), #correcting R indexing that starts at one
                                                        one_res_py[key][1][~np.isnan(one_res_py[key][1])]==one_res_R[key][1][~np.isnan(one_res_R[key][1])],
                                                        one_res_py[key][2][~np.isnan(one_res_py[key][2])]==one_res_R[key][2][~np.isnan(one_res_R[key][2])]]))

                except: print(key,i,funcs[j])

            if key == 'FinalReport':
                try: assert(np.allclose(np.array(list(one_res_py[key].values()))[1:].astype(float), 
                                        np.array(one_res_R[key]).flatten()[1:].astype(float)))
                except: print(key,i, funcs[j])

            if key == 'ElasticMatrix':
                try: assert np.all(one_res_py[key] == one_res_R[key])
                except: print(key,i,funcs[j])
    j+=1

IndexError: list index out of range

In [54]:
i=1
res_py = elpigraph.computeElasticPrincipalTree(X=input_data[i],NumNodes = epg_n_nodes[i], 
                                                Lambda=epg_lambda[i], Mu=epg_mu[i],
                                                TrimmingRadius= epg_trimmingradius[i],
                                                FinalEnergy = epg_finalenergy[i],
                                                alpha = epg_alpha[i],
                                                beta = epg_beta[i],                                                    
                                                Do_PCA=False,CenterData=False,
                                                n_cores = epg_n_processes[i],
                                                nReps=1,
                                                ProbPoint=1.0,
                                                drawPCAView=False,
                                                Mode = epg_mode[i],
                                                MaxSteps = epg_maxsteps[i])[0]



tmp=r_elpigraph.computeElasticPrincipalTree(X=input_data[i],NumNodes = epg_n_nodes[i], 
                                                Lambda=epg_lambda[i], Mu=epg_mu[i],
                                                TrimmingRadius= epg_trimmingradius[i],
                                                FinalEnergy = epg_finalenergy[i],
                                                alpha = epg_alpha[i],
                                                beta = epg_beta[i],                                                    
                                                Do_PCA=False,CenterData=False,
                                                n_cores = epg_n_processes[i],
                                                nReps=1,
                                                ProbPoint=1.0,
                                                drawPCAView=False,
                                                Mode = epg_mode[i],
                                                MaxSteps = epg_maxsteps[i])[0]


epg_obj = res_py
init_nodes_pos = epg_obj['NodePositions']
init_edges = epg_obj['Edges'][0]

fine_tune= elpigraph.(X=input_data[i],
                    MaxSteps = epg_maxsteps[i],
                    Mode = 2,
                    NumNodes = epg_n_nodes[i], 
                    InitNodePositions = init_nodes_pos,
                    InitEdges=init_edges,
                    Lambda=epg_lambda[i], Mu=epg_mu[i],
                    TrimmingRadius= epg_trimmingradius[i],
                    FinalEnergy = epg_finalenergy[i],
                    alpha = epg_alpha[i],
                    beta = epg_beta[i],                                                    
                    Do_PCA=False,CenterData=False,
                    drawAccuracyComplexity = False, drawEnergy = False,drawPCAView = False,
                    n_cores = epg_n_processes[i],
                    nReps=1,
                    ProbPoint=1.0)[0]

r_epg_obj = tmp
init_nodes_pos_r = tmp[0]
init_edges_r = tmp[1][0]

fine_tune_r = r_elpigraph.fineTuneBR(X=input_data[i],
                                      MaxSteps = epg_maxsteps[i],
                                      Mode = 2,
                                      NumNodes = epg_n_nodes[i], 
                                      InitNodePositions = init_nodes_pos,
                                      InitEdges=init_edges_r,
                                      Lambda=epg_lambda[i], Mu=epg_mu[i],
                                      TrimmingRadius= epg_trimmingradius[i],
                                      FinalEnergy = epg_finalenergy[i],
                                      alpha = epg_alpha[i],
                                      beta = epg_beta[i],                                                    
                                      Do_PCA=False,CenterData=False,
                                      drawAccuracyComplexity = False, drawEnergy = False,drawPCAView = False,
                                      n_cores = epg_n_processes[i],
                                      nReps=1,
                                      ProbPoint=1.0)[0]




Constructing tree 1 of 1 / Subset 1 of 1
Computing EPG with  30  nodes on  492  points and  3  dimensions
Using grammar optimization
Nodes =  30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 

R[write to console]: Error in ElPiGraph.R:::ApplyOptimalGraphGrammarOpeation(X = X, NodePositions = UpdatedPG$NodePositions,  : 
  promise already under evaluation: recursive default argument reference or earlier problems?
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> <Anonymous>

R[write to console]: In addition: 

R[write to console]: 1: 
R[write to console]: In UseMethod("t") :
R[write to console]:  closing unused connection 4 (<-localhost:11500)

R[write to console]: 2: 
R[write to console]: In UseMethod("t") :
R[write to console]:  closing unused connection 3 (<-localhost:11500)



RRuntimeError: Error in ElPiGraph.R:::ApplyOptimalGraphGrammarOpeation(X = X, NodePositions = UpdatedPG$NodePositions,  : 
  promise already under evaluation: recursive default argument reference or earlier problems?
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> <Anonymous>


In [56]:
%load_ext rpy2.ipython

In [86]:
Data = X.copy()
Do_PCA = False
AdjustVect = [False]*len(InitNodePositions)
AdjustElasticMatrix = False

if ReduceDimension is None:
    ReduceDimension = np.array(range(np.min(Data.shape)))

elif not Do_PCA:
    print("Cannot reduce dimensionality witout doing PCA (parameter Do_PCA)")
    print("Dimensionality reduction will be ignored")
    ReduceDimension = np.array(range(np.min(Data.shape)))


if CenterData:
    DataCenters = np.mean(Data,axis=0)
    Data = Data - DataCenters
    InitNodePositions = InitNodePositions - DataCenters

if Do_PCA:
    print("Performing PCA")

    if isinstance(ReduceDimension,float):
        if ReduceDimension < 1:
            print("Dimensionality reduction via ratio of explained variance (full PCA will be computed)")
            vglobal, PCAData, explainedVariances = PCA(Data)
            ReduceDimension = range(np.min(np.where(np.cumsum(explainedVariances)/explainedVariances.sum() >= ReduceDimension))+1)
            perc = explainedVariances[ReduceDimension].sum()/explainedVariances.sum()*100

            InitNodePositions = InitNodePositions.dot(vglobal)
        else:
            raise ValueError("if ReduceDimension is a single value it must be < 1")

    else:
        if max(ReduceDimension+1) > min(Data.shape):
            print("Selected dimensions are outside of the available range. ReduceDimension will be updated")
            ReduceDimension = [i for i in ReduceDimension if i in range(min(Data.shape))]
        if(max(ReduceDimension+1) > min(Data.shape)*.75):
            print("Using standard PCA")
            vglobal, PCAData, explainedVariances = PCA(Data)
            perc = explainedVariances[ReduceDimension].sum()/explainedVariances.sum()*100

            InitNodePositions = InitNodePositions.dot(vglobal)

        else:
            print("Centering data and using PCA with truncated SVD")
            if not CenterData:
                # if data was not centered, center it (for SVD)
                DataCenters = np.mean(Data,axis=0)
                Data = Data - DataCenters
                InitNodePositions = InitNodePositions - DataCenters
            PCAData, explainedVariances, U, S, Vt = elpigraph.src.PCA.TruncPCA(data_centered,algorithm='randomized',n_components=max(ReduceDimension+1))
            ExpVariance = np.sum(np.var(Data, axis=0))
            perc = np.sum(explainedVariances)/ExpVariance*100

            vglobal = Vt.T
            InitNodePositions = InitNodePositions.dot(vglobal)


    print(len(ReduceDimension), "dimensions are being used")
    print(np.round(perc,2), "% of the original variance has been retained")

    X = PCAData[:,ReduceDimension]
    InitNodePositions = InitNodePositions[:,ReduceDimension]


else:
    X = Data

if Lambda_Initial is None:
    Lambda_Initial = Lambda

if Mu_Initial is None:
    Mu_Initial = Mu

if ElasticMatrix is None:
    InitElasticMatrix = elpigraph.src.core.Encode2ElasticMatrix(Edges = InitEdges, Lambdas = Lambda_Initial, Mus = Mu_Initial)
else:
    print("The elastic matrix is being used. Edge configuration will be ignored")
    InitElasticMatrix = ElasticMatrix


if InitElasticMatrix.shape[0] != InitNodePositions.shape[0] or InitElasticMatrix.shape[1] != InitNodePositions.shape[0]:
    raise ValueError("Elastic matrix incompatible with the node number. Impossible to proceed.")


# Computing the graph

print("Computing EPG with ", NumNodes," nodes on ", Data.shape[0], " points and ", Data.shape[1], " dimensions")

ElData = elpigraph.src.BaseElPi.ElPrincGraph(X = X, NumNodes = NumNodes, NumEdges = NumEdges, Lambda = Lambda, Mu = Mu,
                         MaxNumberOfIterations = MaxNumberOfIterations, eps = eps, TrimmingRadius = TrimmingRadius,
                         NodePositions = InitNodePositions, ElasticMatrix = InitElasticMatrix, AdjustVect = AdjustVect,
                         CompileReport = True, ShowTimer = ShowTimer,
                         FinalEnergy = FinalEnergy, alpha = alpha, beta = beta, Mode = Mode,
                         GrowGrammars = GrowGrammars, ShrinkGrammars = ShrinkGrammars,
                         GrammarOptimization = GrammarOptimization, MaxSteps = MaxSteps, GrammarOrder = GrammarOrder,
                         ComputeMSEP = ComputeMSEP,
                         verbose = verbose, AvoidSolitary = AvoidSolitary,
                         EmbPointProb = EmbPointProb, AdjustElasticMatrix = AdjustElasticMatrix,
                         AdjustElasticMatrix_Initial = AdjustElasticMatrix_Initial,
                         DisplayWarnings=DisplayWarnings,
                         n_cores=n_cores,MinParOp=MinParOp)

NodePositions = ElData['NodePositions']
Edges = elpigraph.src.core.DecodeElasticMatrix(ElData['ElasticMatrix'])

if drawEnergy and ElData['ReportTable'] is not None:
    print('MSDEnergyPlot not yet implemented')
#     plotMSDEnergyPlot(ReportTable = ElData['ReportTable'])

if drawAccuracyComplexity and ElData['ReportTable'] is not None:
    print('accuracyComplexityPlot not yet implemented')
#     accuracyComplexityPlot(ReportTable = ElData['ReportTable'])

if Do_PCA:
    NodePositions = NodePositions.dot(vglobal[:,ReduceDimension].T)

FinalPG = dict(NodePositions = NodePositions, Edges = Edges, ReportTable = ElData['ReportTable'],
                  FinalReport = ElData['FinalReport'], ElasticMatrix = ElData['ElasticMatrix'],
                  Lambda = ElData['Lambda'], Mu = ElData['Mu'], TrimmingRadius = TrimmingRadius,
                  Mode = ElData['Mode'],
                  MaxNumberOfIterations = ElData['MaxNumberOfIterations'],
                  eps = ElData['eps'], Date = ST, TicToc = EndTimer)

if drawPCAView:
    print(PlotPG(Data, FinalPG))

if Do_PCA or CenterData:
    FinalPG['NodePositions'] = NodePositions + DataCenters

Cannot reduce dimensionality witout doing PCA (parameter Do_PCA)
Dimensionality reduction will be ignored
Computing EPG with  30  nodes on  492  points and  3  dimensions
Using grammar optimization
Nodes =  30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 3

NameError: name 't' is not defined

In [91]:
import copy

In [98]:
X=input_data[i]
NumNodes = epg_n_nodes[i]
NumEdges = float('inf')
InitNodes = 2
Lambda=epg_lambda[i]
Mu=epg_mu[i]
MaxSteps = epg_maxsteps[i]
MaxNumberOfIterations = 10
TrimmingRadius= epg_trimmingradius[i]
eps = .01
Do_PCA = False
NodePositions = init_nodes_pos
InitEdges=init_edges
AdjustVect = [False]*len(InitNodePositions)
ElasticMatrix = InitElasticMatrix
CenterData = False
ComputeMSEP = True
verbose = False
ShowTimer = False
ReduceDimension = None
drawAccuracyComplexity = False
drawPCAView = False
drawEnergy = False
n_cores = epg_n_processes[i]
#ClusType = "Sock",
MinParOp = 20
nReps = 1
#ParallelRep = False,
Subsets = list()
ProbPoint = 1
Mode = 2
FinalEnergy = epg_finalenergy[i]
alpha = epg_alpha[i]
beta = epg_beta[i]
#gamma = 0,
#FastSolve = False,
Configuration = "Line"
ICOver = None
DensityRadius = None
AvoidSolitary = False
EmbPointProb = 1
SampleIC = True
AvoidResampling = True
AdjustElasticMatrix = False
AdjustElasticMatrix_Initial = None
Lambda_Initial = None
Mu_Initial = None
DisplayWarnings = False

# if(NumNodes > len(InitNodePositions)):
#     GrammarOptimization = False
#     GrowGrammars = np.array([['bisectedge_3']])
#     ShrinkGrammars = np.array([['shrinkedge_3']])
#     GrammarOrder = ["Grow", "Shrink", "Grow"]
# else:
GrammarOptimization = True
GrowGrammars = np.array([['bisectedge_3']])
ShrinkGrammars = np.array([['shrinkedge_3']])
GrammarOrder = ["Shrink", "Grow"]


    

AdjustVect       
CompileReport = True
ShowTimer = False
ComputeMSEP = True
MaxBlockSize = 100000000
MaxFailedOperations = float('inf')

AvoidSolitary = False
EmbPointProb = 1
DisplayWarnings = False

if GrammarOptimization:
    print("Using grammar optimization")
    if np.isinf(MaxSteps):
        print("When setting GrammarOptimization to TRUE, MaxSteps must be finite. Using MaxSteps = 1")
        MaxSteps = 1

if not isinstance(X,np.ndarray):
    raise TypeError("Please provide data matrix as an np array")

if not CompileReport:
    verbose = False

if isinstance(ElasticMatrix,np.ndarray):
    if np.any(ElasticMatrix != ElasticMatrix.T):
        raise ValueError('Elastic matrix must be square and symmetric')

if AdjustElasticMatrix_Initial is not None:
    ElasticMatrix, _ = AdjustElasticMatrix_Initial(ElasticMatrix,AdjustVect,verbose=True)

InitNodePositions = elpigraph.src.core.PrimitiveElasticGraphEmbedment(X = X, NodePositions = NodePositions,
                                                   MaxNumberOfIterations = MaxNumberOfIterations,
                                                   TrimmingRadius = TrimmingRadius, eps = eps,
                                                   ElasticMatrix = ElasticMatrix, Mode = Mode)[0]

UpdatedPG = dict(ElasticMatrix = ElasticMatrix, NodePositions = InitNodePositions, AdjustVect = AdjustVect)

ReportTable = []
SquaredX = np.sum(X**2,axis=1,keepdims=1)

#     if n_cores > 1:
#         print('Copying data to shared memory for parallel processing...',end='')
# #         ray.init(num_cpus=n_cores)
# #         Xremote = ray.put(X)
# #         SquaredXremote = ray.put(SquaredX)  
#         X_shape = X.shape
#         SquaredX_shape = SquaredX.shape

#         X_remote = multiprocessing.RawArray('d', X_shape[0] * X_shape[1])
#         SquaredX_remote = multiprocessing.RawArray('d', SquaredX_shape[0] * SquaredX_shape[1])

#         # Wrap remote objects as numpy arrays so we can easily manipulate their data.
#         X_np = np.frombuffer(X_remote).reshape(X_shape)
#         SquaredX_np = np.frombuffer(SquaredX_remote).reshape(SquaredX_shape)

#         # Copy data to our shared array.
#         np.copyto(X_np, X)
#         np.copyto(SquaredX_np, SquaredX)
#         multiproc_shared_variables = (X_remote,X_shape,SquaredX_remote,SquaredX_shape)
#         # Initialize dictionary storing the variables passed from the init_worker.
#         var_dict = {}

#         print('Done')


if verbose:
    print('BARCODE\tENERGY\tNNODES\tNEDGES\tNRIBS\tNSTARS\tNRAYS\tNRAYS2\tMSE\tMSEP\tFVE\tFVEP\tUE\tUR\tURN\tURN2\tURSD\n')

# now we grow the graph up to NumNodes

if (UpdatedPG['NodePositions'].shape[0] >= NumNodes) and not(GrammarOptimization):
    FinalReport = elpigraph.src.reporting.ReportOnPrimitiveGraphEmbedment(X = X, NodePositions = UpdatedPG['NodePositions'],ElasticMatrix = UpdatedPG['ElasticMatrix'],PartData = PartitionData(X = X,NodePositions = UpdatedPG['NodePositions'],MaxBlockSize=100000000,SquaredX = SquaredX,TrimmingRadius = TrimmingRadius),ComputeMSEP = ComputeMSEP)

    print( dict(NodePositions = UpdatedPG['NodePositions'], ElasticMatrix = UpdatedPG['ElasticMatrix'],
           ReportTable = FinalReport, FinalReport = FinalReport, Lambda = Lambda, Mu = Mu))

FailedOperations = 0
Steps = 0
FirstPrint = True
while (UpdatedPG['NodePositions'].shape[0] < NumNodes) or GrammarOptimization:
    nEdges = len(np.triu(UpdatedPG['ElasticMatrix'], 1).nonzero()[0])
    if (((UpdatedPG['NodePositions'].shape[0]) >= NumNodes) or (nEdges >= NumEdges)) and not GrammarOptimization:
        break

    if(not verbose and ShowTimer):
        print("Nodes = ", UpdatedPG['NodePositions'].shape[0])

    if(not verbose and not ShowTimer):
        if(FirstPrint):
            print("Nodes = ",end=' ')
            FirstPrint = False
        print(UpdatedPG['NodePositions'].shape[0],end=" ")
    OldPG = copy.deepcopy(UpdatedPG)

    for OpType in GrammarOrder:
        if OpType == "Grow" and len(GrowGrammars)>0:        

            for k in range(GrowGrammars.shape[0]):
                if ShowTimer:
                    print("Growing")
                    t = time.time()

                UpdatedPG = elpigraph.src.grammar_operations.ApplyOptimalGraphGrammarOperation(X,
                                                              UpdatedPG['NodePositions'], 
                                                              UpdatedPG['ElasticMatrix'],
                                                              GrowGrammars[k],
                                                              MaxBlockSize = MaxBlockSize,
                                                              AdjustVect = UpdatedPG['AdjustVect'],
                                                              SquaredX = SquaredX,
                                                              verbose = False,
                                                              MaxNumberOfIterations = MaxNumberOfIterations,
                                                              eps = eps,
                                                              TrimmingRadius = TrimmingRadius,
                                                              Mode = Mode,
                                                              FinalEnergy = FinalEnergy,
                                                              alpha = alpha,
                                                              beta = beta,
                                                              EmbPointProb = EmbPointProb,
                                                              AvoidSolitary = AvoidSolitary,
                                                              AdjustElasticMatrix = AdjustElasticMatrix,
                                                              DisplayWarnings = DisplayWarnings,
                                                              n_cores=n_cores, MinParOp = MinParOp,
                                                              )

                if UpdatedPG == 'failed operation':
                    print('failed operation')
                    FailedOperations += 1
                    UpdatedPG = copy.deepcopy(OldPG)
                    break
                else:
                    FailedOperations = 0
                    if len(UpdatedPG['NodePositions']) == 3:
                        # this is needed to erase the star elasticity coefficient which was initially assigned to both leaf nodes,
                        # one can erase this information after the number of nodes in the graph is > 2

                        inds = np.where(np.sum(UpdatedPG['ElasticMatrix']-np.diag(np.diag(UpdatedPG['ElasticMatrix']))>0,axis=0)==1)

                        UpdatedPG['ElasticMatrix'][inds, inds] = 0

                if ShowTimer:
                    elapsed = time.time() - t
                    print(np.round(elapsed,4))  


        if OpType == "Shrink" and len(ShrinkGrammars)>0:
            for k in range(ShrinkGrammars.shape[0]):
                if ShowTimer:
                    print("Shrinking")
                    t = time.time()
                UpdatedPG = elpigraph.src.grammar_operations.ApplyOptimalGraphGrammarOperation(X,
                                                              UpdatedPG['NodePositions'], 
                                                              UpdatedPG['ElasticMatrix'],
                                                              ShrinkGrammars[k],
                                                              MaxBlockSize = MaxBlockSize,
                                                              AdjustVect = UpdatedPG['AdjustVect'],
                                                              SquaredX = SquaredX,
                                                              verbose = False,
                                                              MaxNumberOfIterations = MaxNumberOfIterations,
                                                              eps = eps,
                                                              TrimmingRadius = TrimmingRadius,
                                                              Mode = Mode,
                                                              FinalEnergy = FinalEnergy,
                                                              alpha = alpha,
                                                              beta = beta,
                                                              EmbPointProb = EmbPointProb,
                                                              AvoidSolitary = AvoidSolitary,
                                                              AdjustElasticMatrix = AdjustElasticMatrix,
                                                              DisplayWarnings = DisplayWarnings,
                                                              n_cores=n_cores,MinParOp=MinParOp,
                                                              )

                if UpdatedPG == 'failed operation':
                    print('failed operation')
                    FailedOperations += 1
                    UpdatedPG = copy.deepcopy(OldPG)
                    break
                else:
                    FailedOperations = 0

                if ShowTimer:
                    elapsed = time.time() - t
                    print(np.round(elapsed,4))  


    # Count the execution steps
    Steps += 1

    # If the number of execution steps is larger than MaxSteps stop the algorithm
    if Steps > MaxSteps or FailedOperations > MaxFailedOperations:
        break

Using grammar optimization
Nodes =  30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 3

In [118]:
GrowGrammars[0]

array(['bisectedge_3'], dtype='<U12')

In [146]:
UpdatedPG = elpigraph.src.grammar_operations.ApplyOptimalGraphGrammarOperation(X,
                                              InitNodePositions, 
                                              InitElasticMatrix,
                                              GrowGrammars[0],
                                              MaxBlockSize = MaxBlockSize,
                                              AdjustVect = AdjustVect,
                                              SquaredX = SquaredX,
                                              verbose = False,
                                              MaxNumberOfIterations = MaxNumberOfIterations,
                                              eps = eps,
                                              TrimmingRadius = TrimmingRadius,
                                              Mode = Mode,
                                              FinalEnergy = FinalEnergy,
                                              alpha = alpha,
                                              beta = beta,
                                              EmbPointProb = EmbPointProb,
                                              AvoidSolitary = AvoidSolitary,
                                              AdjustElasticMatrix = AdjustElasticMatrix,
                                              DisplayWarnings = DisplayWarnings,
                                              n_cores=n_cores, MinParOp = MinParOp,
                                              )
UpdatedPG['NodePositions']==InitNodePositions



False

In [139]:
%R -o nodep

In [138]:
%%R -i InitNodePositions -i InitElasticMatrix -i X -i AdjustVect

UpdatedPG <- ElPiGraph.R:::ApplyOptimalGraphGrammarOpeation(X = X,
                                                          NodePositions = InitNodePositions,
                                                          ElasticMatrix = InitElasticMatrix,
                                                          AdjustVect = AdjustVect,
                                                          operationtypes = ShrinkGrammars[[1]],
                                                          SquaredX = SquaredX,
                                                          FinalEnergy = FinalEnergy,
                                                          alpha = alpha,
                                                          beta = beta,
                                                          gamma = gamma,
                                                          Mode = Mode,
                                                          MaxNumberOfIterations = MaxNumberOfIterations,
                                                          eps = eps,
                                                          TrimmingRadius = TrimmingRadius,
                                                          verbose = FALSE,
                                                          n.cores = n.cores,
                                                          EnvCl = cl,
                                                          MinParOP = MinParOP,
                                                          FastSolve = FastSolve,
                                                          AvoidSolitary = AvoidSolitary,
                                                          EmbPointProb = EmbPointProb)
nodep<-UpdatedPG$NodePositions

In [149]:
%%R -i init_nodes_pos -i init_edges_r  -i epg_n_nodes -i epg_maxsteps -i InitElasticMatrix -i InitNodePositions -i AdjustVect
X=input_data[[i]]
NumNodes = epg_n_nodes[[i]]
NumEdges = Inf
InitNodes = 2
Lambda=epg_lambda[[i]]
Mu=epg_mu[[i]]
MaxSteps = epg_maxsteps[[i]]
MaxNumberOfIterations = 10
TrimmingRadius= epg_trimmingradius[[i]]
eps = .01
Do_PCA = FALSE
NodesPositions = init_nodes_pos
InitEdges=init_edges_r
ElasticMatrix = InitElasticMatrix
CenterData = FALSE
ComputeMSEP = TRUE
verbose = FALSE
ShowTimer = FALSE
ReduceDimension = NULL
drawAccuracyComplexity = FALSE
drawPCAView = FALSE
drawEnergy = FALSE
n.cores = 1
#ClusType = "Sock",
MinParOP = 20
nReps = 1
#ParallelRep = False,
Subsets = list()
ProbPoint = 1
Mode = 2
FinalEnergy = epg_finalenergy[[i]]
alpha = epg_alpha[[i]]
beta = epg_beta[[i]]
#gamma = 0,
#FastSolve = False,
Configuration = "Line"
ICOver = NULL
DensityRadius = NULL
AvoidSolitary = FALSE
EmbPointProb = 1
SampleIC = TRUE
AvoidResampling = TRUE
AdjustElasticMatrix = FALSE
AdjustElasticMatrix.Initial = NULL
Lambda_Initial = NULL
Mu_Initial = NULL
DisplayWarnings = FALSE
FastSolve = FALSE
# if(NumNodes > len(InitNodePositions)):
#     GrammarOptimization = False
#     GrowGrammars = np.array([['bisectedge_3']])
#     ShrinkGrammars = np.array([['shrinkedge_3']])
#     GrammarOrder = ["Grow", "Shrink", "Grow"]
# else:
GrammarOptimization = TRUE
GrowGrammars = c('bisectedge_3')
ShrinkGrammars = c('shrinkedge_3')
GrammarOrder = c("Shrink", "Grow")

CompileReport = TRUE
ShowTimer = TRUE
ComputeMSEP = TRUE
MaxBlockSize = 100000000
MaxFailedOperations = Inf

AvoidSolitary = FALSE
EmbPointProb = 1
DisplayWarnings = FALSE


  
if(GrammarOptimization){
print("Using grammar optimization")
if(is.infinite(MaxSteps)){
  warning("When setting GrammarOptimization to TRUE, MaxSteps must be finite. Using MaxSteps = 1")
  MaxSteps = 1
}
}

if(is.list(X)){
warning("Data matrix must be a numeric matrix. It will be converted automatically. This can introduce inconsistencies")
X <- data.matrix(X)
}

if(!CompileReport){
verbose = FALSE
}

if(!is.null(ElasticMatrix)){
if(any(ElasticMatrix != t(ElasticMatrix))){
  stop('ERROR: Elastic matrix must be square and symmetric')
}
}

if(verbose){
cat('BARCODE\tENERGY\tNNODES\tNEDGES\tNRIBS\tNSTARS\tNRAYS\tNRAYS2\tMSE\tMSEP\tFVE\tFVEP\tUE\tUR\tURN\tURN2\tURSD\n')
}

if(!is.null(AdjustElasticMatrix.Initial)){
tGraphInfo <- list(ElasticMatrix = ElasticMatrix, AdjustVect = AdjustVect)
ElasticMatrix <- AdjustElasticMatrix.Initial(tGraphInfo, ...)$ElasticMatrix

print(paste(sum(ElasticMatrix != tGraphInfo$ElasticMatrix), "values of the elastic matrix have been updated"))
}

InitNodePositions <- PrimitiveElasticGraphEmbedment(
X = X, NodePositions = NodesPositions,
MaxNumberOfIterations = MaxNumberOfIterations, TrimmingRadius = TrimmingRadius, eps = eps,
ElasticMatrix = ElasticMatrix, Mode = Mode)$EmbeddedNodePositions

UpdatedPG <- list(ElasticMatrix = ElasticMatrix, NodePositions = InitNodePositions, AdjustVect = AdjustVect)

ReportTable <- NULL
ToSrink <- c(2, 9, 10, 11, 12, 13, 14, 15, 16, 17)

SquaredX = rowSums(X^2)

# now we grow the graph up to NumNodes

GlobalCluster <- TRUE

if(all(class(n.cores) %in% c("numeric", "integer"))){
if(n.cores > 1){
  if(ClusType == "Fork"){
    print(paste("Creating a fork cluster with", n.cores, "nodes"))
    cl <- parallel::makeCluster(n.cores, type="FORK")
    GlobalCluster <- FALSE
  } else {
    print(paste("Creating a sock cluster with", n.cores, "nodes"))
    cl <- parallel::makeCluster(n.cores)
    GlobalCluster <- FALSE
    parallel::clusterExport(cl, varlist = c("X", "SquaredX", "MaxNumberOfIterations", "TrimmingRadius", "eps", "verbose",
                                            "EmbPointProb", "alpha", "beta", "FinalEnergy"), envir=environment())
  }
} else {
  print("Using a single core")
  cl <- NULL
}
} else {
if(all(c("SOCKcluster", "cluster") %in% class(n.cores))){
  print("Using a user supplied cluster. It must contains the data points in a matrix X")
  cl <- n.cores
  CheckX <- unlist(parallel::clusterCall(cl, function(){exists("X")}))
  if(all(CheckX)){
    GlobalCluster <- TRUE
    if(ClusType != "Fork"){
      print("Exporting the additional variables to the cluster")
      parallel::clusterExport(cl, varlist = c("SquaredX", "MaxNumberOfIterations", "TrimmingRadius", "eps", "verbose",
                                              "EmbPointProb", "alpha", "beta", "FinalEnergy"), envir=environment())
    }
    n.cores = length(CheckX)

  } else {
    print("Unable to find X on the cluster. Single processor computation will be used")
    n.cores = 1
  }
}
}

if(nrow(UpdatedPG$NodePositions) >= NumNodes & !GrammarOptimization){

FinalReport <- ElPiGraph.R:::ReportOnPrimitiveGraphEmbedment(X = X, NodePositions = UpdatedPG$NodePositions,
                                               ElasticMatrix = UpdatedPG$ElasticMatrix,
                                               PartData = PartitionData(X = X,
                                                                        NodePositions = UpdatedPG$NodePositions,
                                                                        SquaredX = SquaredX,
                                                                        TrimmingRadius = TrimmingRadius,
                                                                        nCores = 1),
                                               ComputeMSEP = ComputeMSEP)

return(
  list(NodePositions = UpdatedPG$NodePositions, ElasticMatrix = UpdatedPG$ElasticMatrix,
       ReportTable = unlist(FinalReport), FinalReport = FinalReport, Lambda = Lambda, Mu = Mu,
       FastSolve = FastSolve)
)
}

StartNodes <- nrow(UpdatedPG$NodePositions)

# print(FinalReport)

FailedOperations <- 0
Steps <- 0
FirstPrint <- TRUE

while((nrow(UpdatedPG$NodePositions) < NumNodes) | GrammarOptimization){

nEdges <- sum(UpdatedPG$ElasticMatrix[lower.tri(UpdatedPG$ElasticMatrix, diag = FALSE)] > 0)

if((nrow(UpdatedPG$NodePositions) >= NumNodes | nEdges >= NumEdges) & !GrammarOptimization){
  break()
}

if(!verbose & ShowTimer){
  print(paste("Nodes = ", nrow(UpdatedPG$NodePositions)))
}

if(!verbose & !ShowTimer){
  if(FirstPrint){
    cat("Nodes = ")
    FirstPrint <- FALSE
  }
  cat(nrow(UpdatedPG$NodePositions))
  cat(" ")
}

OldPG <- UpdatedPG

for(OpType in GrammarOrder){
  print(UpdatedPG$NodePositions[28:,])
  if(OpType == "Grow" & length(GrowGrammars)>0){
    for(k in 1:length(GrowGrammars)){
      if(ShowTimer){
        print("Growing")
        tictoc::tic()
      }

      UpdatedPG <- ElPiGraph.R:::ApplyOptimalGraphGrammarOpeation(X = X,
                                                                  NodePositions = UpdatedPG$NodePositions,
                                                                  ElasticMatrix = UpdatedPG$ElasticMatrix,
                                                                  AdjustVect = UpdatedPG$AdjustVect,
                                                                  operationtypes = GrowGrammars[[k]],
                                                                  SquaredX = SquaredX,
                                                                  FinalEnergy = FinalEnergy,
                                                                  alpha = alpha,
                                                                  beta = beta,
                                                                  gamma = gamma,
                                                                  Mode = Mode,
                                                                  MaxNumberOfIterations = MaxNumberOfIterations,
                                                                  eps = eps,
                                                                  TrimmingRadius = TrimmingRadius,
                                                                  verbose = FALSE,
                                                                  n.cores = n.cores,
                                                                  EnvCl = cl,
                                                                  MinParOP = MinParOP,
                                                                  FastSolve = FastSolve,
                                                                  AvoidSolitary = AvoidSolitary,
                                                                  EmbPointProb = EmbPointProb)


      if(!is.list(UpdatedPG)){

        FailedOperations <- FailedOperations + 1
        UpdatedPG <- OldPG
        break()

      } else {

        FailedOperations <- 0

        if(nrow(UpdatedPG$NodePositions) == 3){
          # this is needed to erase the star elasticity coefficient which was initially assigned to both leaf nodes,
          # one can erase this information after the number of nodes in the graph is > 2

          inds = which(colSums(UpdatedPG$ElasticMatrix-diag(diag(UpdatedPG$ElasticMatrix))>0)==1)

          UpdatedPG$ElasticMatrix[inds, inds] <- 0
        }

      }

      if(ShowTimer){
        tictoc::toc()
      }

    }
  }


  if(OpType == "Shrink" & length(ShrinkGrammars)>0){
    for(k in 1:length(ShrinkGrammars)){

      if(ShowTimer){
        print("Shrinking")
        tictoc::tic()
      }

      UpdatedPG <- ElPiGraph.R:::ApplyOptimalGraphGrammarOpeation(X = X,
                                                                  NodePositions = UpdatedPG$NodePositions,
                                                                  ElasticMatrix = UpdatedPG$ElasticMatrix,
                                                                  AdjustVect = UpdatedPG$AdjustVect,
                                                                  operationtypes = ShrinkGrammars[[k]],
                                                                  SquaredX = SquaredX,
                                                                  Mode = Mode,
                                                                  FinalEnergy = FinalEnergy,
                                                                  alpha = alpha,
                                                                  beta = beta,
                                                                  gamma = gamma,
                                                                  MaxNumberOfIterations = MaxNumberOfIterations,
                                                                  eps = eps,
                                                                  TrimmingRadius = TrimmingRadius,
                                                                  verbose = FALSE,
                                                                  n.cores = n.cores,
                                                                  MinParOP = MinParOP,
                                                                  EnvCl = cl,
                                                                  FastSolve = FastSolve,
                                                                  AvoidSolitary = AvoidSolitary,
                                                                  EmbPointProb = EmbPointProb
                                                                  )


      if(!is.list(UpdatedPG)){

        FailedOperations <- FailedOperations + 1
        UpdatedPG <- OldPG
        break()

      } else {

        FailedOperations <- 0

      }


      if(ShowTimer){
        tictoc::toc()
      }

    }
  }

}

if(CompileReport){
  tReport <- ElPiGraph.R:::ReportOnPrimitiveGraphEmbedment(X = X, NodePositions = UpdatedPG$NodePositions,
                                             ElasticMatrix = UpdatedPG$ElasticMatrix,
                                             PartData = PartitionData(X = X,
                                                                      NodePositions = UpdatedPG$NodePositions,
                                                                      SquaredX = SquaredX,
                                                                      TrimmingRadius = TrimmingRadius,
                                                                      nCores = 1),
                                             ComputeMSEP = ComputeMSEP)
  FinalReport <- tReport
  tReport <- unlist(tReport)
  tReport[ToSrink] <- sapply(tReport[ToSrink], function(x) {
    signif(as.numeric(x), 4)
  })

  ReportTable <- rbind(ReportTable, tReport)

  if(verbose){
    cat(ReportTable[nrow(ReportTable), ], sep = "\t")
    cat("\n")
  }
}

# Count the execution steps
Steps <- Steps + 1

# If the number of execution steps is larger than MaxSteps stop the algorithm
if(Steps > MaxSteps | FailedOperations > MaxFailedOperations){
  break()
}

}

# FinalReport <- NULL

if(!verbose){
if(!CompileReport){
  tReport <- ElPiGraph.R:::ReportOnPrimitiveGraphEmbedment(X = X, NodePositions = UpdatedPG$NodePositions,
                                             ElasticMatrix = UpdatedPG$ElasticMatrix,
                                             PartData = PartitionData(X = X,
                                                                      NodePositions = UpdatedPG$NodePositions,
                                                                      SquaredX = SquaredX,
                                                                      TrimmingRadius = TrimmingRadius,
                                                                      nCores = 1),
                                             ComputeMSEP = ComputeMSEP)
  FinalReport <- tReport
  tReport <- unlist(tReport)
  tReport[ToSrink] <- sapply(tReport[ToSrink], function(x) {
    signif(as.numeric(x), 4)
  })
} else {
  tReport <- ReportTable[nrow(ReportTable),]
  tReport <- unlist(tReport)
}

cat("\n")
cat('BARCODE\tENERGY\tNNODES\tNEDGES\tNRIBS\tNSTARS\tNRAYS\tNRAYS2\tMSE\tMSEP\tFVE\tFVEP\tUE\tUR\tURN\tURN2\tURSD\n')
cat(tReport, sep = "\t")
cat("\n")
}

# ReportTable <- rbind(ReportTable, tReport)

if(!GlobalCluster){
print("Stopping the cluster")
parallel::stopCluster(cl)
}

# print(ReportTable)

if(is.list(ReportTable)){
ReportTable <- unlist(ReportTable)
}

# print(ReportTable)

if(is.null(dim(ReportTable)) & !is.null(ReportTable)){
RPNames <- names(ReportTable)
ReportTable <- matrix(ReportTable, nrow = 1)
colnames(ReportTable) <- RPNames
}

RParsingError: R parsing

### Step 3 : check CollapseBranches, ShiftBranching

In [38]:
for i in range(len(input_data)):
    
    # CollapseBranches
    r_collapse_dict = dict(zip(r_epg_obj_collapse[i].names,r_epg_obj_collapse[i]))          
    try: assert all([np.all(np.array(epg_obj_collapse[i]['Edges'])==(r_collapse_dict['Edges']-1)), #correcting R indexing that starts at one
                     np.allclose(epg_obj_collapse[i]['Nodes'],r_collapse_dict['Nodes'])])
    except: print('CollapseBranches',i)

#     # ShiftBranching
#     r_shift_dict = dict(zip(r_epg_obj_collapse[i].names,r_epg_obj_shift[i]))          
#     try: assert all([np.all(np.array(epg_obj_shift[i]['Edges'])==(r_shift_dict['Edges']-1)),       #correcting R indexing that starts at one
#                      np.allclose(epg_obj_shift[i]['NodePositions'],r_shift_dict['NodePositions'])])
#     except: print('ShiftBranching',i)

CollapseBranches 0
CollapseBranches 1


  """


# II - Speed comparison

In [4]:
### Python
np.random.seed(0)
num_points = [1000,10000,100000]
num_nodes = [10,20,30,40,50,60]

run_points = []
for j in num_points:
    run_nodes = []
    for i in num_nodes:
        X=np.random.random(size=(j,10))
        s = time.time()
        res = elpigraph.computeElasticPrincipalTree(X = X,NumNodes = i,drawPCAView=False)
        end = time.time() - s
        run_nodes.append(end)
    run_points.append(run_nodes)

Generating the initial configuration
Creating a chain in the 1st PC with 2 nodes
90% of the points have been used as initial conditions. Resetting.
Constructing tree 1 of 1 / Subset 1 of 1
Performing PCA
Using standard PCA
10 dimensions are being used
100.0 % of the original variance has been retained
The elastic matrix is being used. Edge configuration will be ignored
Computing EPG with  10  nodes on  1000  points and  10  dimensions
Nodes =  2 3 4 5 6 7 8 9 

BARCODE	ENERGY	NNODES	NEDGES	NRIBS	NSTARS	NRAYS	NRAYS2	MSE	MSEP	FVE	FVEP	UE	UR	URN	URN2	URSD

1|0|0|0|0|0|0||10	0.6063	10	9	0	0	0	0	0.5868	0.5821	0.3003	0.306	0.0194	0.0	0.0004	0.0036	0


MSDEnergyPlot not yet implemented
accuracyComplexityPlot not yet implemented
0.4939  seconds elapsed
Generating the initial configuration
Creating a chain in the 1st PC with 2 nodes
90% of the points have been used as initial conditions. Resetting.
Constructing tree 1 of 1 / Subset 1 of 1
Performing PCA
Using standard PCA
10 dimensions are bein

In [5]:
### R
np.random.seed(0)
num_points = [1000,10000,100000]
num_nodes = [10,20,30,40,50,60]

run_points_r = []
for j in num_points:
    run_nodes = []
    for i in num_nodes:
        X=np.random.random(size=(j,10))
        s = time.time()
        res= r_elpigraph.computeElasticPrincipalTree(X = X,NumNodes = i)
        end = time.time() - s
        run_nodes.append(end)
    run_points_r.append(run_nodes)

[1] "Generating the initial configuration"
[1] "Creating a chain in the 1st PC with 2 nodes"
[1] "Constructing tree 1 of 1 / Subset 1 of 1"
[1] "Performing PCA on the data"
[1] "Using standard PCA"
[1] "10 dimensions are being used"
[1] "100% of the original variance has been retained"
[1] "The elastic matrix is being used. Edge configuration will be ignored"
[1] "Computing EPG with 10 nodes on 1000 points and 10 dimensions"
[1] "Using a single core"
Nodes = 2 3 4 5 6 7 8 9 
BARCODE	ENERGY	NNODES	NEDGES	NRIBS	NSTARS	NRAYS	NRAYS2	MSE	MSEP	FVE	FVEP	UE	UR	URN	URN2	URSD
1|0|0|0|0|0|0||10	0.6063	10	9	0	0	0	0	0.5868	0.5821	0.3003	0.306	0.01943	3.598e-05	0.0003598	0.003598	0
1.678 sec elapsed
[[1]]

[1] "Generating the initial configuration"
[1] "Creating a chain in the 1st PC with 2 nodes"
[1] "Constructing tree 1 of 1 / Subset 1 of 1"
[1] "Performing PCA on the data"
[1] "Using standard PCA"
[1] "10 dimensions are being used"
[1] "100% of the original variance has been retained"
[1] "The el

In [6]:
### Plotting
for i in range(len(num_points)):

    #plt.plot(num_nodes,np.array(run_points_colab_hybrid[i])/60,marker='.') # run hybrid version if you have a gpu (or get results from colab)
    plt.plot(num_nodes,np.array(run_points[i])/60,marker='.')
    plt.plot(num_nodes,np.array(run_points_r[i])/60,marker='.')

    plt.xlabel('Number of nodes',fontsize=16)
    plt.ylabel('Time (minutes)',fontsize=16)
    plt.legend(['Python_Hybrid_cpu_gpu','Python_one_cpu','R_one_cpu'],fontsize=13)
    plt.title('Number of points (10 dimensions) : '+str(num_points[i]),fontsize=16)
    plt.show()

NameError: name 'num_points' is not defined