diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml index 1c2fb9d1772..634503dcf38 100644 --- a/scripts/algorithms/l2-svm.dml +++ b/scripts/algorithms/l2-svm.dml @@ -50,7 +50,7 @@ fmt = ifdef($fmt, "text") intercept = ifdef($icpt, FALSE) epsilon = ifdef($tol, 0.001) -lambda = ifdef($reg, 1.0) +reg = ifdef($reg, 1.0) maxIterations = ifdef($maxiter, 100) verbose = ifdef($verbose, FALSE) @@ -62,7 +62,7 @@ negative_label = min(Y) dimensions = ncol(X) w = l2svm(X=X, Y=Y, intercept=intercept, - epsilon=epsilon, lambda=lambda, + epsilon=epsilon, reg=reg, maxIterations=maxIterations, verbose=verbose) diff --git a/scripts/builtin/als.dml b/scripts/builtin/als.dml index 8048f2f41d0..5fa18ff6da7 100644 --- a/scripts/builtin/als.dml +++ b/scripts/builtin/als.dml @@ -29,18 +29,18 @@ # ---------------------------------------------------------------------------------------------------------------------- # X Matrix[Double] --- Location to read the input matrix X to be factorized # rank Integer 10 Rank of the factorization -# reg String "L2" Regularization: +# regType String "L2" Regularization: # "L2" = L2 regularization; # f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2) -# + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2)) +# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2)) # "wL2" = weighted L2 regularization # f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2) -# + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) +# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) # + sum (V ^ 2 * col_nonzeros)) -# lambda Double 0.000001 Regularization parameter, no regularization if 0.0 -# maxi Integer 50 Maximum number of iterations -# check Boolean TRUE Check for convergence after every iteration, i.e., updating U and V once -# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared +# reg Double 0.000001 Regularization parameter, no regularization if 0.0 +# maxi Integer 50 Maximum number of iterations +# check Boolean TRUE Check for convergence after every iteration, i.e., updating U and V once +# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared # if the decrease in loss in any two consecutive iterations falls below this threshold; # if check is FALSE thr is ignored # ---------------------------------------------------------------------------------------------------------------------- @@ -53,15 +53,15 @@ # V Matrix An m x r matrix where r is the factorization rank # ---------------------------------------------------------------------------------------------------------------------- -m_als = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Double lambda = 0.000001, +m_als = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001, Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Boolean verbose = TRUE) return (Matrix[Double] U, Matrix[Double] V) { N = 10000; # for large problems, use scalable alsCG if( reg != "L2" | nrow(X) > N | ncol(X) > N ) - [U, V] = alsCG(X=X, rank=rank, reg=reg, lambda=lambda, + [U, V] = alsCG(X=X, rank=rank, regType=regType, reg=reg, maxi=maxi, check=check, thr=thr, verbose=verbose); else - [U, V] = alsDS(X=X, rank=rank, lambda=lambda, maxi=maxi, + [U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxi, check=check, thr=thr, verbose=verbose); } diff --git a/scripts/builtin/alsCG.dml b/scripts/builtin/alsCG.dml index d001c41a68b..2d0be0dd808 100644 --- a/scripts/builtin/alsCG.dml +++ b/scripts/builtin/alsCG.dml @@ -29,18 +29,18 @@ # ---------------------------------------------------------------------------------------------------------------------- # X Matrix[Double] --- Location to read the input matrix X to be factorized # rank Integer 10 Rank of the factorization -# reg String "L2" Regularization: +# regType String "L2" Regularization: # "L2" = L2 regularization; # f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2) -# + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2)) +# + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2)) # "wL2" = weighted L2 regularization # f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2) -# + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) +# + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) # + sum (V ^ 2 * col_nonzeros)) -# lambda Double 0.000001 Regularization parameter, no regularization if 0.0 -# maxi Integer 50 Maximum number of iterations -# check Boolean TRUE Check for convergence after every iteration, i.e., updating U and V once -# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared +# reg Double 0.000001 Regularization parameter, no regularization if 0.0 +# maxi Integer 50 Maximum number of iterations +# check Boolean TRUE Check for convergence after every iteration, i.e., updating U and V once +# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared # if the decrease in loss in any two consecutive iterations falls below this threshold; # if check is FALSE thr is ignored # ---------------------------------------------------------------------------------------------------------------------- @@ -53,7 +53,7 @@ # V Matrix[Double] An m x r matrix where r is the factorization rank # ---------------------------------------------------------------------------------------------------------------------- -m_alsCG = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Double lambda = 0.000001, Integer maxi = 50, +m_alsCG = function(Matrix[Double] X, Integer rank = 10, String regType = "L2", Double reg = 0.000001, Integer maxi = 50, Boolean check = TRUE, Double thr = 0.0001, Boolean verbose = TRUE) return (Matrix[Double] U, Matrix[Double] V) { @@ -73,26 +73,26 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Doubl # check for regularization row_nonzeros = matrix(0,rows=1,cols=1); col_nonzeros = matrix(0,rows=1,cols=1); - if( reg == "L2" ) { + if( regType == "L2" ) { # Loss Function with L2: # f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2) - # + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2)) + # + 0.5 * reg * (sum (U ^ 2) + sum (V ^ 2)) if( verbose ) - print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda); + print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH REG - " + reg); row_nonzeros = matrix(1, nrow(W), 1); col_nonzeros = matrix(1, ncol(W), 1); } - else if( reg == "wL2" ) { + else if( regType == "wL2" ) { # Loss Function with weighted L2: # f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2) - # + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros)) + # + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros)) if( verbose ) - print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda); + print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH REG - " + reg); row_nonzeros = rowSums(W); col_nonzeros = t(colSums(W)); } else { - stop ("wrong regularization! " + reg); + stop ("wrong regularization! " + regType); } is_U = TRUE; # start optimizing U, alternated @@ -101,7 +101,7 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Doubl loss_init = 0.0; # only used if check is TRUE if( check ) { loss_init = 0.5 * sum( (X != 0) * (U %*% t(V) - X) ^ 2); - loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros)); + loss_init = loss_init + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros)); if( verbose ) print ("----- Initial train loss: " + loss_init + " -----"); } @@ -111,9 +111,9 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Doubl while( as.integer(it/2) < max_iter & ! converged ) { it = it + 1; if( is_U ) - G = ((X != 0) * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros; + G = ((X != 0) * (U %*% t(V) - X)) %*% V + reg * U * row_nonzeros; else - G = t(t(U) %*% ((X != 0) * (U %*% t(V) - X))) + lambda * V * col_nonzeros; + G = t(t(U) %*% ((X != 0) * (U %*% t(V) - X))) + reg * V * col_nonzeros; R = -G; S = R; @@ -124,12 +124,12 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Doubl tt = 0.000000001; while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter ) { if( is_U ) { - HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros; + HS = (W * (S %*% t(V))) %*% V + reg * S * row_nonzeros; alpha = norm_R2 / sum (S * HS); U = U + alpha * S; # OK since U is not used in HS } else { - HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros; + HS = t(t(U) %*% (W * (U %*% t(S)))) + reg * S * col_nonzeros; alpha = norm_R2 / sum (S * HS); V = V + alpha * S; # OK since V is not used in HS } @@ -146,7 +146,7 @@ m_alsCG = function(Matrix[Double] X, Integer rank = 10, String reg = "L2", Doubl # check for convergence if( check & (it%%2 == 0) ) { loss_cur = 0.5 * sum( (X != 0) * (U %*% t(V) - X) ^ 2); - loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros)); + loss_cur = loss_cur + 0.5 * reg * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros)); loss_dec = (loss_init - loss_cur) / loss_init; if( verbose ) diff --git a/scripts/builtin/alsDS.dml b/scripts/builtin/alsDS.dml index 0b5797894f2..4f7a5cffe69 100644 --- a/scripts/builtin/alsDS.dml +++ b/scripts/builtin/alsDS.dml @@ -30,7 +30,7 @@ # ---------------------------------------------------------------------------------------------------------------------- # X Matrix[Double] --- Location to read the input matrix V to be factorized # rank Integer 10 Rank of the factorization -# lambda Double 0.000001 Regularization parameter, no regularization if 0.0 +# reg Double 0.000001 Regularization parameter, no regularization if 0.0 # maxi Integer 50 Maximum number of iterations # check Boolean FALSE Check for convergence after every iteration, i.e., updating L and R once # thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared @@ -46,7 +46,7 @@ # V Matrix[Double] An m x r matrix where r is the factorization rank # ---------------------------------------------------------------------------------------------------------------------- -m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double lambda = 0.000001, +m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double reg = 0.000001, Integer maxi = 50, Boolean check = FALSE, Double thr = 0.0001, Boolean verbose = TRUE) return (Matrix[Double] U, Matrix[Double] V) { @@ -92,17 +92,17 @@ m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double lambda = 0.000001 # check for regularization if ( verbose ) - print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda); + print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH REG - " + reg); loss_init = 0.0; # only used if check is TRUE if (check) { loss_init = sum (X_nonzero_ind * (X - (U %*% t(V)))^2) - + lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros)); + + reg * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros)); if( verbose ) print ("----- Initial train loss: " + loss_init + " -----"); } - lambda_I = diag (matrix (lambda, rows = r, cols = 1)); + lambda_I = diag (matrix (reg, rows = r, cols = 1)); it = 0; converged = FALSE; while ((it < max_iter) & (!converged)) { @@ -126,7 +126,7 @@ m_alsDS = function(Matrix[Double] X, Integer rank = 10, Double lambda = 0.000001 # check for convergence if (check) { loss_cur = sum (X_nonzero_ind * (X - (U %*% t(V)))^2) - + lambda * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros)); + + reg * (sum ((U^2) * row_nonzeros) + sum ((V^2) * col_nonzeros)); loss_dec = (loss_init - loss_cur) / loss_init; if( verbose ) print ("Train loss at iteration (X) " + it + ": " + loss_cur + " loss-dec " + loss_dec); diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml index fa1ff1137d5..04e28517d85 100644 --- a/scripts/builtin/bandit.dml +++ b/scripts/builtin/bandit.dml @@ -54,7 +54,6 @@ m_bandit = function(Matrix[Double] X_train, Matrix[Double] Y_train, Matrix[Double] X_test, Matrix[Double] Y_test, List[Unknown] metaList, String evaluationFunc, Matrix[Double] evalFunHp, Frame[Unknown] lp, Matrix[Double] lpHp, Frame[Unknown] primitives, Frame[Unknown] param, Integer k = 3, Integer R=50, Double baseLineScore, Boolean cv, Integer cvk = 2, Double ref = 0, Integer seed = -1, Boolean enablePruning = FALSE, Boolean verbose = TRUE) - # return(Boolean perf) return (Frame[Unknown] bestPipeline, Matrix[Double] bestHyperparams, Matrix[Double] bestAccuracy, Frame[String] applyFunc) { print("Starting optimizer") diff --git a/scripts/builtin/cox.dml b/scripts/builtin/cox.dml index f6c9363b0c0..68672d63396 100644 --- a/scripts/builtin/cox.dml +++ b/scripts/builtin/cox.dml @@ -84,7 +84,7 @@ # ---------------------------------------------------------------------------------------------------------------------- m_cox = function(Matrix[Double] X, Matrix[Double] TE, Matrix[Double] F, Matrix[Double] R, - Double alpha = 0.05, Double tol = 0.000001, Int moi = 100, Int mii = 0) + Double alpha = 0.05, Double tol = 0.000001, Integer moi = 100, Integer mii = 0) return (Matrix[Double] M, Matrix[Double] S, Matrix[Double] T, Matrix[Double] COV, Matrix[Double] RT, Matrix[Double] XO) { X_orig = X; diff --git a/scripts/builtin/l2svm.dml b/scripts/builtin/l2svm.dml index 9fa6a71c134..9d3fca7d150 100644 --- a/scripts/builtin/l2svm.dml +++ b/scripts/builtin/l2svm.dml @@ -30,7 +30,7 @@ # intercept Boolean False No Intercept ( If set to TRUE then a constant bias column is added to X) # epsilon Double 0.001 Procedure terminates early if the reduction in objective function value is less # than epsilon (tolerance) times the initial objective function value. -# lambda Double 1.0 Regularization parameter (lambda) for L2 regularization +# reg Double 1.0 Regularization parameter (reg) for L2 regularization # maxIterations Int 100 Maximum number of conjugate gradient iterations # maxii Int 20 - # verbose Boolean FALSE Set to true if one wants print statements updating on loss. @@ -46,7 +46,7 @@ # ---------------------------------------------------------------------------------------------------------------------- m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE, - Double epsilon = 0.001, Double lambda = 1, Integer maxIterations = 100, + Double epsilon = 0.001, Double reg = 1, Integer maxIterations = 100, Integer maxii = 20, Boolean verbose = FALSE, Integer columnId = -1) return(Matrix[Double] model) { @@ -55,7 +55,7 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE stop("L2SVM: Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows") if(epsilon < 0) stop("L2SVM: Stopping due to invalid argument: Tolerance (tol) must be non-negative") - if(lambda < 0) + if(reg < 0) stop("L2SVM: Stopping due to invalid argument: Regularization constant (reg) must be non-negative") if(maxIterations < 1) stop("L2SVM: Stopping due to invalid argument: Maximum iterations should be a positive integer") @@ -106,8 +106,8 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE # minimizing primal obj along direction s step_sz = 0 Xd = X %*% s - wd = lambda * sum(w * s) - dd = lambda * sum(s * s) + wd = reg * sum(w * s) + dd = reg * sum(s * s) continue1 = TRUE iiter = 0 while(continue1 & iiter < maxii){ @@ -129,8 +129,8 @@ m_l2svm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE out = 1 - Y * Xw sv = (out > 0) out = sv * out - obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) - g_new = t(X) %*% (out * Y) - lambda * w + obj = 0.5 * sum(out * out) + reg/2 * sum(w * w) + g_new = t(X) %*% (out * Y) - reg * w if(verbose) { colstr = ifelse(columnId!=-1, ", Col:"+columnId + " ,", " ,") diff --git a/scripts/builtin/lenetTrain.dml b/scripts/builtin/lenetTrain.dml index 4f02c2dd488..b82acf3d064 100644 --- a/scripts/builtin/lenetTrain.dml +++ b/scripts/builtin/lenetTrain.dml @@ -39,7 +39,7 @@ # lr Double 0.01 Learning rate # mu Double 0.9 Momentum value # decay Double 0.95 Learning rate decay -# lambda Double 5e-04 Regularization strength +# reg Double 5e-04 Regularization strength # seed Integer -1 Seed for model initialization # verbose Boolean FALSE Flag indicates if function should print to stdout # ---------------------------------------------------------------------------------------------------------------------- @@ -64,7 +64,7 @@ source("nn/layers/lenetForwardPass.dml") as lenet_fw m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] X_val, Matrix[Double] Y_val, Integer C, Integer Hin, Integer Win, Integer batch_size=64, - Integer epochs=20, Double lr=0.01, Double mu=0.9, Double decay=0.95, Double lambda=5e-04, + Integer epochs=20, Double lr=0.01, Double mu=0.9, Double decay=0.95, Double reg=5e-04, Boolean verbose=FALSE, Integer seed=-1) return (List[unknown] model) { @@ -126,7 +126,7 @@ m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] X_val # Compute data backward pass [dW1, db1, dW2, db2, dW3, db3, dW4, db4] = feed_backward( - X_batch, C, Hin, Win, lambda, model, dprobs, cache) + X_batch, C, Hin, Win, reg, model, dprobs, cache) # Optimize with SGD w/ Nesterov momentum [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1) @@ -153,7 +153,7 @@ m_lenetTrain = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double] X_val } feed_backward = function(Matrix[Double] X, Integer C, Integer Hin, Integer Win, - Double lambda,list[unknown] model, matrix[Double] dprobs, list[unknown] cache) + Double reg,list[unknown] model, matrix[Double] dprobs, list[unknown] cache) return (Matrix[Double] dW1, Matrix[Double] db1, Matrix[Double] dW2, Matrix[Double] db2, Matrix[Double] dW3, Matrix[Double] db3, @@ -193,10 +193,10 @@ feed_backward = function(Matrix[Double] X, Integer C, Integer Hin, Integer Win, X, as.matrix(model["W1"]), as.matrix(model["b1"]), C, Hin, Win, Hf, Wf, stride, stride, pad, pad) # Compute regularization backward pass - dW1_reg = l2_reg::backward(as.matrix(model["W1"]), lambda) - dW2_reg = l2_reg::backward(as.matrix(model["W2"]), lambda) - dW3_reg = l2_reg::backward(as.matrix(model["W3"]), lambda) - dW4_reg = l2_reg::backward(as.matrix(model["W4"]), lambda) + dW1_reg = l2_reg::backward(as.matrix(model["W1"]), reg) + dW2_reg = l2_reg::backward(as.matrix(model["W2"]), reg) + dW3_reg = l2_reg::backward(as.matrix(model["W3"]), reg) + dW4_reg = l2_reg::backward(as.matrix(model["W4"]), reg) dW1 = dW1 + dW1_reg dW2 = dW2 + dW2_reg dW3 = dW3 + dW3_reg diff --git a/scripts/builtin/mcc.dml b/scripts/builtin/mcc.dml index 60456c74910..644ec37a00d 100644 --- a/scripts/builtin/mcc.dml +++ b/scripts/builtin/mcc.dml @@ -37,7 +37,7 @@ # mattCC Double --- Matthews' Correlation Coefficient # --------------------------------------------------------------------------------------------- -m_mcc = function(Matrix[Double] predictions = matrix(0,0,0), Matrix[Double] labels = matrix(0,0,0)) +m_mcc = function(Matrix[Double] predictions, Matrix[Double] labels) return (Double mattCC) { # # validation checks diff --git a/scripts/builtin/msvm.dml b/scripts/builtin/msvm.dml index 921d694e447..3f2dbde51e0 100644 --- a/scripts/builtin/msvm.dml +++ b/scripts/builtin/msvm.dml @@ -32,7 +32,7 @@ # num_classes integer 10 Number of classes # epsilon Double 0.001 Procedure terminates early if the reduction in objective function # value is less than epsilon (tolerance) times the initial objective function value. -# lambda Double 1.0 Regularization parameter (lambda) for L2 regularization +# reg Double 1.0 Regularization parameter (lambda) for L2 regularization # maxIterations Int 100 Maximum number of conjugate gradient iterations # verbose Boolean False Set to true to print while training. # ---------------------------------------------------------------------------------------------------------------------- @@ -45,7 +45,7 @@ #----------------------------------------------------------------------------------------------------------------------- m_msvm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE, - Double epsilon = 0.001, Double lambda = 1.0, Integer maxIterations = 100, + Double epsilon = 0.001, Double reg = 1.0, Integer maxIterations = 100, Boolean verbose = FALSE) return(Matrix[Double] model) { @@ -76,7 +76,7 @@ m_msvm = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE, parfor(class in 1:max(Y)) { Y_local = 2 * (Y == class) - 1 w[,class] = l2svm(X=X, Y=Y_local, intercept=FALSE, - epsilon=epsilon, lambda=lambda, maxIterations=maxIterations, + epsilon=epsilon, reg=reg, maxIterations=maxIterations, verbose=verbose, columnId=class) } diff --git a/scripts/builtin/setdiff.dml b/scripts/builtin/setdiff.dml index 3c7bdc80739..27721de79c4 100644 --- a/scripts/builtin/setdiff.dml +++ b/scripts/builtin/setdiff.dml @@ -36,7 +36,7 @@ # R Matrix[Double] vector with all elements that are present in X but not in Y # --------------------------------------------------------------------------------------------- -setdiff = function(Matrix[double] X, Matrix[double] Y) +m_setdiff = function(Matrix[double] X, Matrix[double] Y) return (matrix[double] R) { common = intersect(X, Y); diff --git a/scripts/builtin/sherlock.dml b/scripts/builtin/sherlock.dml index f82b02ab230..86b989075f9 100644 --- a/scripts/builtin/sherlock.dml +++ b/scripts/builtin/sherlock.dml @@ -52,7 +52,7 @@ # fb Matrix[Double] biases vectors for combining all trained features (final) # ---------------------------------------------------------------------------------------------------------------------- -source("scripts/builtin/sherlockNet.dml") as sherlockNet +source("scripts/nn/examples/sherlockNet.dml") as sherlockNet m_sherlock = function(Matrix[Double] X_train, Matrix[Double] y_train) return (Matrix[Double] cW1, Matrix[Double] cb1, diff --git a/scripts/builtin/sherlockPredict.dml b/scripts/builtin/sherlockPredict.dml index 8765c989894..e69d0f26158 100644 --- a/scripts/builtin/sherlockPredict.dml +++ b/scripts/builtin/sherlockPredict.dml @@ -51,7 +51,7 @@ # probs Matrix[Double] class probabilities of shape (N, K) # ---------------------------------------------------------------------------------------------------------------------- -source("scripts/builtin/sherlockNet.dml") as sherlockNet +source("scripts/nn/examples/sherlockNet.dml") as sherlockNet m_sherlockPredict = function(Matrix[Double] X, Matrix[Double] cW1, Matrix[Double] cb1, diff --git a/scripts/builtin/symmetricDifference.dml b/scripts/builtin/symmetricDifference.dml index dc18386fa1c..7d875eaafdb 100644 --- a/scripts/builtin/symmetricDifference.dml +++ b/scripts/builtin/symmetricDifference.dml @@ -36,7 +36,7 @@ # R Matrix[Double] vector with all elements in X and Y but not in both # --------------------------------------------------------------------------------------------- -symmetricDifference = function(Matrix[Double] X, Matrix[Double] Y) +m_symmetricDifference = function(Matrix[Double] X, Matrix[Double] Y) return (matrix[double] R) { R = setdiff(union(X,Y), intersect(X,Y)) diff --git a/scripts/builtin/union.dml b/scripts/builtin/union.dml index 73ce3c42027..ff191c2cf60 100644 --- a/scripts/builtin/union.dml +++ b/scripts/builtin/union.dml @@ -36,7 +36,7 @@ # R Matrix matrix with all unique rows existing in X and Y # --------------------------------------------------------------------------------------------- -union = function(Matrix[Double] X, Matrix[Double] Y) +m_union = function(Matrix[Double] X, Matrix[Double] Y) return (matrix[double] R) { R = unique(rbind(X, Y)); diff --git a/scripts/builtin/unique.dml b/scripts/builtin/unique.dml index ac403753f9c..491ac20d3ae 100644 --- a/scripts/builtin/unique.dml +++ b/scripts/builtin/unique.dml @@ -35,7 +35,7 @@ # R Matrix[Double] matrix with only unique rows # --------------------------------------------------------------------------------------------- -unique = function(matrix[double] X) +m_unique = function(matrix[double] X) return (matrix[double] R) { R = X diff --git a/scripts/builtin/sherlockNet.dml b/scripts/nn/examples/sherlockNet.dml similarity index 100% rename from scripts/builtin/sherlockNet.dml rename to scripts/nn/examples/sherlockNet.dml diff --git a/scripts/perftest/scripts/alsDS.dml b/scripts/perftest/scripts/alsDS.dml index 2c3380c4283..6334cc1af4f 100755 --- a/scripts/perftest/scripts/alsDS.dml +++ b/scripts/perftest/scripts/alsDS.dml @@ -20,7 +20,7 @@ #------------------------------------------------------------- rank = ifdef($rank, 10); -lambda = ifdef($lambda, 0.000001); +reg = ifdef($lambda, 0.000001); maxiter = ifdef($maxiter, 50); thr = ifdef($thr, 0.0001); verbose = ifdef($verbose, TRUE); @@ -31,7 +31,7 @@ check = ifdef($check, TRUE); X = read($X); -[U, V] = alsDS(X=X, rank=rank, lambda=lambda, maxi=maxiter, check=check, thr=thr, verbose=verbose); +[U, V] = alsDS(X=X, rank=rank, reg=reg, maxi=maxiter, check=check, thr=thr, verbose=verbose); write(U, $modelU, format=fmt); write(V, $modelV, format=fmt); diff --git a/scripts/perftest/scripts/m-svm.dml b/scripts/perftest/scripts/m-svm.dml index 028356e8f4d..e0fd125f159 100755 --- a/scripts/perftest/scripts/m-svm.dml +++ b/scripts/perftest/scripts/m-svm.dml @@ -29,7 +29,7 @@ tol = as.double ($tol); X = read($X) Y = read($Y) -model = msvm(X = X, Y = Y, intercept = icpt, epsilon = tol, lambda = reg, maxIterations = maxiter, verbose = FALSE) +model = msvm(X = X, Y = Y, intercept = icpt, epsilon = tol, reg = reg, maxIterations = maxiter, verbose = FALSE) extra_model_params = matrix(0, rows=2, cols=ncol(model)) extra_model_params[1, 1] = icpt diff --git a/src/main/python/generator/generator.py b/src/main/python/generator/generator.py index f9a7a1917cf..2a441c5cbbd 100644 --- a/src/main/python/generator/generator.py +++ b/src/main/python/generator/generator.py @@ -19,12 +19,13 @@ # # ------------------------------------------------------------- -from typing import Tuple, List import json import os import re -from parser import FunctionParser +import sys import traceback +from parser import FunctionParser +from typing import List, Tuple class PythonAPIFileGenerator(object): @@ -89,7 +90,7 @@ def generate_init_file(self): init_file.write(self.init_import.format(function=f)) init_file.write("\n") init_file.write(self.init_all.format( - functions=self.function_names).replace(",",",\n")) + functions=self.function_names).replace(",", ",\n")) class PythonAPIFunctionGenerator(object): @@ -141,30 +142,57 @@ def generate_function(self, data: dict) -> str: function_name=function_name, parameters=parameters, header=header, params_dict=params_dict, api_call=api_call) + def replace_types(self, item: str): + pattern = self.__class__.type_mapping_pattern + return self.__class__.type_mapping["type"].get(re.search(pattern, str( + item).lower()).group() if item else item.lower(), item) + def format_param_string(self, parameters: List[Tuple[str]], nameLength: int) -> str: - result = [] - has_optional = False - path = os.path.dirname(__file__) - newline_spacing = "\n" + " " * (nameLength + 5) - for param in parameters: - # map data types - pattern = self.__class__.type_mapping_pattern - param = [self.__class__.type_mapping["type"].get(re.search(pattern, str( - item).lower()).group() if item else str(item).lower(), item) for item in param] - if param[2] is not None: - has_optional = True + try: + result = [] + has_optional = False + path = os.path.dirname(__file__) + newline_spacing = "\n" + " " * (nameLength + 5) + + for param in parameters: + # map data types + # pattern = self.__class__.type_mapping_pattern + # print(param) + param[1] = self.replace_types(param[1]) + # print(param) + if "[" in param[1] or "[" in param[0]: + raise AttributeError( + "Failed parsing param" + str(param) + "\n" + str(parameters)) + if param[2] is not None: + has_optional = True + # result.append("{nl}{name}: {typ},".format( + # result=result, name=param[0], typ=param[1], + # nl=newline_spacing)) + else: + # has_optional = False + result.append("{nl}{name}: {typ},".format( + result=result, name=param[0], typ=param[1], + nl=newline_spacing)) + if len(result) == 0: + result = "" + # if has_optional: + # result = u"{kwargs}".format( + # result=result, kwargs=self.__class__.kwargs_parameter_string, + # nl=newline_spacing) else: - result.append("{nl}{name}: {typ},".format( - result=result, name=param[0], typ=param[1], - nl=newline_spacing)) - result[0] = result[0][len(newline_spacing):] - result[-1] = result[-1][:-1] - result = "".join(result) - if has_optional: - result = u"{result},{nl}{kwargs}".format( - result=result, kwargs=self.__class__.kwargs_parameter_string, - nl=newline_spacing) - return result + result[0] = result[0][len(newline_spacing):] + result[-1] = result[-1][:-1] + result = "".join(result) + if has_optional: + result = u"{result},{nl}{kwargs}".format( + result=result, kwargs=self.__class__.kwargs_parameter_string, + nl=newline_spacing) + + # print("\n\n" +str(parameters) + "\n\n " +result) + return result + except Exception as e: + raise AttributeError("Failed Formatting parameter strings: " + + str(parameters) + " " + format_exception(e)) def format_params_dict_string(self, parameters: List[Tuple[str]]) -> str: if not len(parameters): @@ -219,7 +247,8 @@ def format_api_call(self, if(output_type): output_type = output_type[0].upper() else: - raise AttributeError("Error in pattern match") + raise AttributeError("Error in pattern match: " + str(value) + "\n" + + function_name + "\n" + str(parameters) + "\n" + str(return_values)) result = ("{sds_context}," + "\n \'{function_name}\'," + "\n named_input_nodes=params_dict").format( @@ -317,6 +346,21 @@ def header_return_string(self, parameter: dict) -> str: return meaning_str +def format_exception(e): + exception_list = traceback.format_stack() + exception_list = exception_list[:-2] + exception_list.extend(traceback.format_tb(sys.exc_info()[2])) + exception_list.extend(traceback.format_exception_only( + sys.exc_info()[0], sys.exc_info()[1])) + + exception_str = "Traceback (most recent call last):\n" + exception_str += "".join(exception_list) + # Removing the last \n + exception_str = exception_str[:-1] + + return exception_str + + if __name__ == "__main__": if "python" in os.getcwd(): source_path = os.path.join("../../../", 'scripts', 'builtin') @@ -337,9 +381,8 @@ def header_return_string(self, parameter: dict) -> str: header_data) script_content = fun_generator.generate_function(data) except Exception as e: - traceback.print_exc() - print("[ERROR] error in : \'{file_name}\'.".format( - file_name=dml_file)) + print("[ERROR] error in : \'{file_name}\' \n{err} \n{trace}.".format( + file_name=dml_file, err=e, trace=format_exception(e))) continue file_generator.generate_file( data["function_name"], script_content, dml_file) diff --git a/src/main/python/generator/parser.py b/src/main/python/generator/parser.py index 135d85811c3..72b5b73671c 100644 --- a/src/main/python/generator/parser.py +++ b/src/main/python/generator/parser.py @@ -20,16 +20,16 @@ # ------------------------------------------------------------- +import json import os import re -import json class FunctionParser(object): header_input_pattern = r"^[ \t\n]*[#]+[ \t\n]*input[ \t\n\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\n\-]*[\s#\-]*$" header_output_pattern = r"[\s#\-]*[#]+[ \t]*(return|output)[ \t\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\-]*[\s#\-]*$" - function_pattern = r"^m_[\w]+[ \t\n]+=[ \t\n]+function[^#{]*" - parameter_pattern = r"^m_[\w]+[\s]+=[\s]+function[\s]*\([\s]*([\w\[\]\s,\d=.\-'\"_\.]*)[\s]*\)[\s]*return[\s]*\([\s]*([\w\[\]\s,\d=.\-_]*)[\s]*\)[\s]*" + function_pattern = r"^[ms]_[\w]+[ \t\n]*=[ \t\n]+function[^#{]*" + # parameter_pattern = r"^m_[\w]+[\s]+=[\s]+function[\s]*\([\s]*(?=return)[\s]*\)[\s]*return[\s]*\([\s]*([\w\[\]\s,\d=.\-_]*)[\s]*\)[\s]*" header_parameter_pattern = r"[\s#\-]*[#]+[ \t]*([\w|-]+)[\s]+([\w]+)[\s]+([\w,\d.\"\-]+)[\s]+([\w|\W]+)" divider_pattern = r"[\s#\-]*" @@ -57,31 +57,71 @@ def parse_function(self, path: str): """ file_name = os.path.basename(path) function_name, extension = os.path.splitext(file_name) + # try: function_definition = self.find_function_definition(path) - pattern = re.compile( - self.__class__.parameter_pattern, flags=re.I | re.M) - match = pattern.match(function_definition) - if match: - param_str, retval_str = match.group(1, 2) + # pattern = re.compile( + # self.__class__.parameter_pattern, flags=re.I | re.M) + # match = pattern.match(function_definition) + + # if match: + + func_split = function_definition.split("function")[1].split("return") + param_str, retval_str = self.extract_param_str( + func_split[0]), self.extract_param_str(func_split[1]) + if param_str: parameters = self.get_parameters(param_str) return_values = self.get_parameters(retval_str) - else: - # TODO handle default matrix variables. - raise AttributeError("Unable to match to function definition:\n" + function_definition) - data = {'function_name': function_name, + data = {'function_name': function_name, 'parameters': parameters, 'return_values': return_values} - return data + if parameters and return_values: + return data + else: + raise AttributeError("Unable to match to function definition:\n" + function_definition + + "\n parameter_str: " + param_str + "\n retVal: " + retval_str) + else: + raise AttributeError("Unable to match to function definition:\n" + function_definition + + "\n parameter_str: " + param_str + "\n retVal: " + retval_str) + # else: + # # TODO handle default matrix variables. + # raise AttributeError("Unable to match to function definition:\n" + function_definition) + # except Exception as e: + # import generator + # raise AttributeError("Unable to parse " + path + " " + generator.format_exception(e)) + + def extract_param_str(self, a: str): + try: + return a[a.index("(") + 1: a.rindex(")")] + except: + raise AttributeError("failed extracting from: " + a) def get_parameters(self, param_str: str): - + params = re.split(r",[\s]*", param_str) - - parameters = [] + + paramsCombined = [] + inside = 0 + for param in params: - parameters.append(self.parse_single_parameter(param)) + before = inside + start = param.count("(") + end = param.count(")") + inside += start - end + if before > 0: + if inside > 0: + paramsCombined[-1] += param + "," + else: + paramsCombined[-1] += param + "," + else: + paramsCombined.append(param) + + parameters = [] + + for param in paramsCombined: + parameters.append(self.parse_single_parameter(param.strip())) return parameters def parse_single_parameter(self, param: str): + # try: splitted = re.split(r"[\s]+", param) dml_type = splitted[0] name = splitted[1] @@ -94,7 +134,16 @@ def parse_single_parameter(self, param: str): default_split = name.split("=") name = default_split[0] default_value = default_split[1] - return (name, dml_type, default_value) + if default_value is None: + raise AttributeError("Failed parsing " + param) + + if "(" in name or "=" in name or "]" in name or "=" in dml_type: + raise AttributeError("failed Parsing " + + param + " " + str(splitted)) + return [name, dml_type, default_value] + # except Exception as e: + # import generator + # raise AttributeError("Failed parsing " + param + " " + generator.format_exception(e)) def get_header_parameters(self, param_str: str): parameters = list() @@ -109,7 +158,6 @@ def get_header_parameters(self, param_str: str): except Exception as e: if re.search(pattern=self.__class__.divider_pattern, string=param_line, flags=re.I | re.M) is not None: continue - print(e) return parameters return parameters @@ -196,9 +244,9 @@ def check_parameters(self, header, data): header_param_names = [p[0].lower() for p in header["parameters"]] data_param_names = [p[0].lower() for p in data["parameters"]] - if header_param_names != data_param_names: - print("[WARNING] The parameter names of the function does not match with the documentation " - "for file \'{file_name}\'.".format(file_name=data["function_name"])) + # if header_param_names != data_param_names: + # print("[WARNING] The parameter names of the function does not match with the documentation " + # "for file \'{file_name}\'.".format(file_name=data["function_name"])) header_param_type = [p[1].lower() for p in header["parameters"]] header_param_type = [type_mapping["type"].get( @@ -209,6 +257,6 @@ def check_parameters(self, header, data): re.search(type_mapping_pattern, str(item).lower()).group() if item else str(item).lower(), item) for item in data_param_type] - if header_param_type != data_param_type: - print("[WARNING] The parameter type of the function does not match with the documentation " - "for file \'{file_name}\'.".format(file_name=data["function_name"])) + # if header_param_type != data_param_type: + # print("[WARNING] The parameter type of the function does not match with the documentation " + # "for file \'{file_name}\'.".format(file_name=data["function_name"])) diff --git a/src/main/python/generator/resources/type_mapping.json b/src/main/python/generator/resources/type_mapping.json index 7d2041a0302..c45eaec5ad7 100644 --- a/src/main/python/generator/resources/type_mapping.json +++ b/src/main/python/generator/resources/type_mapping.json @@ -5,9 +5,11 @@ "frame": "Frame", "boolean": "bool", "integer": "int", + "int": "int", + "scalar":"float", "double": "float", "string": "str", - "list": "Iterable" + "list": "List" }, "default": { "---": "None" diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py index 5e79d071ea2..feb5342ecca 100644 --- a/src/main/python/systemds/operator/algorithm/__init__.py +++ b/src/main/python/systemds/operator/algorithm/__init__.py @@ -29,11 +29,16 @@ from .builtin.alsDS import alsDS from .builtin.alsPredict import alsPredict from .builtin.alsTopkPredict import alsTopkPredict +from .builtin.apply_pipeline import apply_pipeline from .builtin.arima import arima +from .builtin.autoencoder_2layer import autoencoder_2layer +from .builtin.bandit import bandit from .builtin.bivar import bivar from .builtin.components import components from .builtin.confusionMatrix import confusionMatrix from .builtin.cor import cor +from .builtin.correctTypos import correctTypos +from .builtin.correctTyposApply import correctTyposApply from .builtin.cox import cox from .builtin.cspline import cspline from .builtin.csplineCG import csplineCG @@ -44,19 +49,29 @@ from .builtin.decisionTree import decisionTree from .builtin.decisionTreePredict import decisionTreePredict from .builtin.deepWalk import deepWalk +from .builtin.denialConstraints import denialConstraints from .builtin.discoverFD import discoverFD from .builtin.dist import dist +from .builtin.dmv import dmv +from .builtin.ema import ema from .builtin.executePipeline import executePipeline +from .builtin.ffPredict import ffPredict from .builtin.ffTrain import ffTrain +from .builtin.fit_pipeline import fit_pipeline +from .builtin.fixInvalidLengths import fixInvalidLengths +from .builtin.fixInvalidLengthsApply import fixInvalidLengthsApply +from .builtin.frameSort import frameSort from .builtin.frequencyEncode import frequencyEncode from .builtin.frequencyEncodeApply import frequencyEncodeApply from .builtin.garch import garch from .builtin.gaussianClassifier import gaussianClassifier from .builtin.getAccuracy import getAccuracy from .builtin.glm import glm +from .builtin.glmPredict import glmPredict from .builtin.gmm import gmm from .builtin.gmmPredict import gmmPredict from .builtin.gnmf import gnmf +from .builtin.gridSearch import gridSearch from .builtin.hospitalResidencyMatch import hospitalResidencyMatch from .builtin.hyperband import hyperband from .builtin.img_brightness import img_brightness @@ -83,17 +98,23 @@ from .builtin.km import km from .builtin.kmeans import kmeans from .builtin.kmeansPredict import kmeansPredict +from .builtin.knn import knn from .builtin.knnGraph import knnGraph from .builtin.knnbf import knnbf from .builtin.l2svm import l2svm from .builtin.l2svmPredict import l2svmPredict from .builtin.lasso import lasso +from .builtin.lenetPredict import lenetPredict from .builtin.lenetTrain import lenetTrain from .builtin.lm import lm from .builtin.lmCG import lmCG from .builtin.lmDS import lmDS +from .builtin.lmPredict import lmPredict from .builtin.logSumExp import logSumExp from .builtin.matrixProfile import matrixProfile +from .builtin.mcc import mcc +from .builtin.mdedup import mdedup +from .builtin.mice import mice from .builtin.miceApply import miceApply from .builtin.msvm import msvm from .builtin.msvmPredict import msvmPredict @@ -111,6 +132,8 @@ from .builtin.outlierBySd import outlierBySd from .builtin.outlierBySdApply import outlierBySdApply from .builtin.pca import pca +from .builtin.pcaInverse import pcaInverse +from .builtin.pcaTransform import pcaTransform from .builtin.pnmf import pnmf from .builtin.ppca import ppca from .builtin.randomForest import randomForest @@ -118,6 +141,7 @@ from .builtin.scaleApply import scaleApply from .builtin.scaleMinMax import scaleMinMax from .builtin.selectByVarThresh import selectByVarThresh +from .builtin.setdiff import setdiff from .builtin.sherlock import sherlock from .builtin.sherlockPredict import sherlockPredict from .builtin.shortestPath import shortestPath @@ -130,16 +154,22 @@ from .builtin.stableMarriage import stableMarriage from .builtin.statsNA import statsNA from .builtin.steplm import steplm +from .builtin.stratstats import stratstats +from .builtin.symmetricDifference import symmetricDifference from .builtin.tSNE import tSNE from .builtin.toOneHot import toOneHot from .builtin.tomeklink import tomeklink +from .builtin.topk_cleaning import topk_cleaning from .builtin.underSampling import underSampling +from .builtin.union import union +from .builtin.unique import unique from .builtin.univar import univar from .builtin.vectorToCsv import vectorToCsv from .builtin.winsorize import winsorize from .builtin.winsorizeApply import winsorizeApply from .builtin.xdummy1 import xdummy1 from .builtin.xdummy2 import xdummy2 +from .builtin.xgboost import xgboost from .builtin.xgboostPredictClassification import xgboostPredictClassification from .builtin.xgboostPredictRegression import xgboostPredictRegression @@ -151,11 +181,16 @@ 'alsDS', 'alsPredict', 'alsTopkPredict', + 'apply_pipeline', 'arima', + 'autoencoder_2layer', + 'bandit', 'bivar', 'components', 'confusionMatrix', 'cor', + 'correctTypos', + 'correctTyposApply', 'cox', 'cspline', 'csplineCG', @@ -166,19 +201,29 @@ 'decisionTree', 'decisionTreePredict', 'deepWalk', + 'denialConstraints', 'discoverFD', 'dist', + 'dmv', + 'ema', 'executePipeline', + 'ffPredict', 'ffTrain', + 'fit_pipeline', + 'fixInvalidLengths', + 'fixInvalidLengthsApply', + 'frameSort', 'frequencyEncode', 'frequencyEncodeApply', 'garch', 'gaussianClassifier', 'getAccuracy', 'glm', + 'glmPredict', 'gmm', 'gmmPredict', 'gnmf', + 'gridSearch', 'hospitalResidencyMatch', 'hyperband', 'img_brightness', @@ -205,17 +250,23 @@ 'km', 'kmeans', 'kmeansPredict', + 'knn', 'knnGraph', 'knnbf', 'l2svm', 'l2svmPredict', 'lasso', + 'lenetPredict', 'lenetTrain', 'lm', 'lmCG', 'lmDS', + 'lmPredict', 'logSumExp', 'matrixProfile', + 'mcc', + 'mdedup', + 'mice', 'miceApply', 'msvm', 'msvmPredict', @@ -233,6 +284,8 @@ 'outlierBySd', 'outlierBySdApply', 'pca', + 'pcaInverse', + 'pcaTransform', 'pnmf', 'ppca', 'randomForest', @@ -240,6 +293,7 @@ 'scaleApply', 'scaleMinMax', 'selectByVarThresh', + 'setdiff', 'sherlock', 'sherlockPredict', 'shortestPath', @@ -252,15 +306,21 @@ 'stableMarriage', 'statsNA', 'steplm', + 'stratstats', + 'symmetricDifference', 'tSNE', 'toOneHot', 'tomeklink', + 'topk_cleaning', 'underSampling', + 'union', + 'unique', 'univar', 'vectorToCsv', 'winsorize', 'winsorizeApply', 'xdummy1', 'xdummy2', + 'xgboost', 'xgboostPredictClassification', 'xgboostPredictRegression'] diff --git a/src/main/python/systemds/operator/algorithm/builtin/als.py b/src/main/python/systemds/operator/algorithm/builtin/als.py index d532c68b45c..5357ea6a81a 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/als.py +++ b/src/main/python/systemds/operator/algorithm/builtin/als.py @@ -33,8 +33,8 @@ def als(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param rank: Rank of the factorization - :param reg: Regularization: - :param lambda: Regularization parameter, no regularization if 0.0 + :param regType: Regularization: + :param reg: Regularization parameter, no regularization if 0.0 :param maxi: Maximum number of iterations :param check: Check for convergence after every iteration, i.e., updating U and V once :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py index a54874a1052..bde4133bc6b 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/alsCG.py +++ b/src/main/python/systemds/operator/algorithm/builtin/alsCG.py @@ -33,8 +33,8 @@ def alsCG(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param rank: Rank of the factorization - :param reg: Regularization: - :param lambda: Regularization parameter, no regularization if 0.0 + :param regType: Regularization: + :param reg: Regularization parameter, no regularization if 0.0 :param maxi: Maximum number of iterations :param check: Check for convergence after every iteration, i.e., updating U and V once :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared diff --git a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py index f1b03c83940..cba1d29f88c 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/alsDS.py +++ b/src/main/python/systemds/operator/algorithm/builtin/alsDS.py @@ -33,7 +33,7 @@ def alsDS(X: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param rank: Rank of the factorization - :param lambda: Regularization parameter, no regularization if 0.0 + :param reg: Regularization parameter, no regularization if 0.0 :param maxi: Maximum number of iterations :param check: Check for convergence after every iteration, i.e., updating L and R once :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared diff --git a/src/main/python/systemds/operator/algorithm/builtin/applyAndEvaluate.py b/src/main/python/systemds/operator/algorithm/builtin/applyAndEvaluate.py new file mode 100644 index 00000000000..3a968bb2675 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/applyAndEvaluate.py @@ -0,0 +1,55 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/applyAndEvaluate.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def applyAndEvaluate(trainData: Frame, + testData: Frame, + pip: Frame, + applyFunc: Frame, + hp: Matrix, + evaluationFunc: str, + evalFunHp: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + + params_dict = {'trainData': trainData, 'testData': testData, 'pip': pip, 'applyFunc': applyFunc, 'hp': hp, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp} + params_dict.update(kwargs) + + vX_0 = Matrix(trainData.sds_context, '') + vX_1 = Matrix(trainData.sds_context, '') + vX_2 = Matrix(trainData.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, ] + + op = MultiReturn(trainData.sds_context, 'applyAndEvaluate', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py new file mode 100644 index 00000000000..fa52482eaeb --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py @@ -0,0 +1,44 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/apply_pipeline.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def apply_pipeline(testData: Frame, + pip: Frame, + applyFunc: Frame, + hp: Matrix, + exState: List, + iState: List, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + + params_dict = {'testData': testData, 'pip': pip, 'applyFunc': applyFunc, 'hp': hp, 'exState': exState, 'iState': iState} + params_dict.update(kwargs) + return Matrix(testData.sds_context, + 'apply_pipeline', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py new file mode 100644 index 00000000000..3f3a061170a --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/autoencoder_2layer.py @@ -0,0 +1,76 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/autoencoder_2layer.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def autoencoder_2layer(X: Matrix, + num_hidden1: int, + num_hidden2: int, + max_epochs: int, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param num_hidden1: Number of neurons in the 1st hidden layer + :param num_hidden2: Number of neurons in the 2nd hidden layer + :param max_epochs: Number of epochs to train for + :param full_obj: If TRUE, Computes objective function value (squared-loss) + :param at: of each epoch. Note that, computing the full + :param objective: a lot of time. + :param batch_size: Mini-batch size (training parameter) + :param step: Initial step size (training parameter) + :param decay: Decays step size after each epoch (training parameter) + :param mu: Momentum parameter (training parameter) + :return: 'OperationNode' containing + """ + params_dict = {'X': X, 'num_hidden1': num_hidden1, 'num_hidden2': num_hidden2, 'max_epochs': max_epochs} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + vX_2 = Matrix(X.sds_context, '') + vX_3 = Matrix(X.sds_context, '') + vX_4 = Matrix(X.sds_context, '') + vX_5 = Matrix(X.sds_context, '') + vX_6 = Matrix(X.sds_context, '') + vX_7 = Matrix(X.sds_context, '') + vX_8 = Matrix(X.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, vX_7, vX_8, ] + + op = MultiReturn(X.sds_context, 'autoencoder_2layer', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + vX_4._unnamed_input_nodes = [op] + vX_5._unnamed_input_nodes = [op] + vX_6._unnamed_input_nodes = [op] + vX_7._unnamed_input_nodes = [op] + vX_8._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/bandit.py b/src/main/python/systemds/operator/algorithm/builtin/bandit.py index 5cb87b54970..4adf73c760b 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/bandit.py +++ b/src/main/python/systemds/operator/algorithm/builtin/bandit.py @@ -33,18 +33,31 @@ def bandit(X_train: Matrix, Y_train: Matrix, X_test: Matrix, Y_test: Matrix, - metaList: Iterable, + metaList: List, evaluationFunc: str, evalFunHp: Matrix, lp: Frame, + lpHp: Matrix, primitives: Frame, param: Frame, baseLineScore: float, cv: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]): - params_dict = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test, 'metaList': metaList, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp, 'lp': lp, 'primitives': primitives, 'param': param, 'baseLineScore': baseLineScore, 'cv': cv} + params_dict = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test, 'metaList': metaList, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp, 'lp': lp, 'lpHp': lpHp, 'primitives': primitives, 'param': param, 'baseLineScore': baseLineScore, 'cv': cv} params_dict.update(kwargs) - return Matrix(X_train.sds_context, - 'bandit', - named_input_nodes=params_dict) + + vX_0 = Frame(X_train.sds_context, '') + vX_1 = Matrix(X_train.sds_context, '') + vX_2 = Matrix(X_train.sds_context, '') + vX_3 = Frame(X_train.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, ] + + op = MultiReturn(X_train.sds_context, 'bandit', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py new file mode 100644 index 00000000000..acbd0f9448f --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py @@ -0,0 +1,58 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/correctTypos.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def correctTypos(strings: Frame, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param frequency_threshold: Strings that occur above this frequency level will not be corrected + :param distance_threshold: Max distance at which strings are considered similar + :param is_verbose: Print debug information + :return: 'OperationNode' containing + """ + params_dict = {'strings': strings} + params_dict.update(kwargs) + + vX_0 = Frame(strings.sds_context, '') + vX_1 = Scalar(strings.sds_context, '') + vX_2 = Scalar(strings.sds_context, '') + vX_3 = Matrix(strings.sds_context, '') + vX_4 = Frame(strings.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, ] + + op = MultiReturn(strings.sds_context, 'correctTypos', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + vX_4._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py new file mode 100644 index 00000000000..3aa4c0e2d4a --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py @@ -0,0 +1,45 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/correctTyposApply.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def correctTyposApply(strings: Frame, + distance_matrix: Matrix, + dict: Frame, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param frequency_threshold: Strings that occur above this frequency level will not be corrected + :param distance_threshold: Max distance at which strings are considered similar + :return: 'OperationNode' containing + """ + params_dict = {'strings': strings, 'distance_matrix': distance_matrix, 'dict': dict} + params_dict.update(kwargs) + return Matrix(strings.sds_context, + 'correctTyposApply', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py index 6d731861226..94da3da45cf 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py +++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py @@ -32,7 +32,6 @@ def decisionTree(X: Matrix, Y: Matrix, R: Matrix, - verbose: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param a: vector, other positive Integers indicate the number of categories @@ -42,7 +41,7 @@ def decisionTree(X: Matrix, :param verbose: boolean specifying if the algorithm should print information while executing :return: 'OperationNode' containing information: & if the feature is categorical) & looks at if j is an internal node, otherwise 0 & as r input vector & of the subset of values & 6,7,... if j is categorical & a leaf node: number of misclassified samples reaching at node j & at m[6,j] if the feature chosen for j is scale, & feature chosen for j is categorical rows 6,7,... depict the value subset chosen for j & a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0 """ - params_dict = {'X': X, 'Y': Y, 'R': R, 'verbose': verbose} + params_dict = {'X': X, 'Y': Y, 'R': R} params_dict.update(kwargs) return Matrix(X.sds_context, 'decisionTree', diff --git a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py new file mode 100644 index 00000000000..b2bb53c59b8 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py @@ -0,0 +1,65 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/denialConstraints.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def denialConstraints(dataFrame: Frame, + constraintsFrame: Frame): + """ + :param dataFrame: frame which columns represent the variables of the data and the rows correspond + :param to: or instances. + :param Recommended: a column indexing the instances from 1 to N (N=number of instances). + :param constraintsFrame: frame with fixed columns and each row representing one constraint. + :param ie: value of the variable 1 in instance 1 is lower/higher than the value of variable 1 in instance 2, + :param then: of of variable 2 in instance 2 can't be lower/higher than the value of variable 2 in instance 2. + :param in: of instanceCompare + :param rank: yrs.service sex salary + :param 1: 19 18 Male 139750 + :param 2: 20 16 Male 173200 + :param 3: 3 3 Male 79750.56 + :param 4: 45 39 Male 115000 + :param 5: 40 40 Male 141500 + :param 6: 6 6 Male 97000 + :param 7: 30 23 Male 175000 + :param 8: 45 45 Male 147765 + :param 9: 21 20 Male 119250 + :param 10: 18 18 Female 129000 + :param 11: 12 8 Male 119800 + :param 12: 7 2 Male 79800 + :param 13: 1 1 Male 77700 + :param 1: yrs.since.phd < yrs.service + :param 2: rank Prof yrs.service >< salary + :param 3: salary = 78182 + :param 4: discipline B yrs.service > yrs.since.phd + :return: 'OperationNode' containing shows the indexes of dataframe that are wrong. & shows the index of the denial constraint that is fulfilled & no wrong instances to show (0 constrains fulfilled) --> wronginstances=matrix(0,1,2) + """ + params_dict = {'dataFrame': dataFrame, 'constraintsFrame': constraintsFrame} + return Matrix(dataFrame.sds_context, + 'denialConstraints', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/dmv.py b/src/main/python/systemds/operator/algorithm/builtin/dmv.py new file mode 100644 index 00000000000..2a6eaa49522 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/dmv.py @@ -0,0 +1,43 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/dmv.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def dmv(X: Frame, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param threshold: Threshold value in interval [0, 1] for dominant pattern per column (e.g., 0.8 means + :param replace: The string disguised missing values are replaced with + :return: 'OperationNode' containing + """ + params_dict = {'X': X} + params_dict.update(kwargs) + return Matrix(X.sds_context, + 'dmv', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/ema.py b/src/main/python/systemds/operator/algorithm/builtin/ema.py new file mode 100644 index 00000000000..ab4ba1a675d --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/ema.py @@ -0,0 +1,52 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/ema.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def ema(X: Frame, + search_iterations: int, + mode: str, + freq: int, + alpha: float, + beta: float, + gamma: float): + """ + :param search_iterations: Budget iterations for parameter optimisation, + :param used: weren't set + :param mode: Type of EMA method. Either "single", "double" or "triple" + :param freq: Seasonality when using triple EMA. + :param alpha: alpha- value for EMA + :param beta: beta- value for EMA + :param gamma: gamma- value for EMA + :return: 'OperationNode' containing + """ + params_dict = {'X': X, 'search_iterations': search_iterations, 'mode': mode, 'freq': freq, 'alpha': alpha, 'beta': beta, 'gamma': gamma} + return Matrix(X.sds_context, + 'ema', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py index 470d152ad30..31235e5910f 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py +++ b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py @@ -29,23 +29,45 @@ from systemds.utils.consts import VALID_INPUT_TYPES -def executePipeline(X: Matrix): +def executePipeline(pipeline: Frame, + Xtrain: Matrix, + Ytrain: Matrix, + Xtest: Matrix, + Ytest: Matrix, + metaList: List, + hyperParameters: Matrix, + flagsCount: int, + verbose: bool, + **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param flagsCount: --- :param test: --- - :return: 'OperationNode' containing validation check & convert the matrix row-vector into list & flag & append flag & append flag & append flag & of hyper-parameters and loop till that & flag & and remove categorical & and remove numerics & + 1 for nan replacement & matrix & matrix & ohe call, to call inside eval as a function & encoding of categorical features & features & ohe call, to call inside eval as a function & to call inside eval as a function & doing relative over-sampling & count & replace the null with default values & replace the null with default values & flip the noisy labels & best option + :return: 'OperationNode' containing """ - params_dict = {'X': X} + params_dict = {'pipeline': pipeline, 'Xtrain': Xtrain, 'Ytrain': Ytrain, 'Xtest': Xtest, 'Ytest': Ytest, 'metaList': metaList, 'hyperParameters': hyperParameters, 'flagsCount': flagsCount, 'verbose': verbose} + params_dict.update(kwargs) - vX_0 = Matrix(X.sds_context, '') - vX_1 = Matrix(X.sds_context, '') - vX_2 = Matrix(X.sds_context, '') - output_nodes = [vX_0, vX_1, vX_2, ] + vX_0 = Matrix(pipeline.sds_context, '') + vX_1 = Matrix(pipeline.sds_context, '') + vX_2 = Matrix(pipeline.sds_context, '') + vX_3 = Matrix(pipeline.sds_context, '') + vX_4 = Scalar(pipeline.sds_context, '') + vX_5 = Matrix(pipeline.sds_context, '') + vX_6 = Matrix(pipeline.sds_context, '') + vX_7 = Scalar(pipeline.sds_context, '') + vX_8 = List(pipeline.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, vX_7, vX_8, ] - op = MultiReturn(X.sds_context, 'executePipeline', output_nodes, named_input_nodes=params_dict) + op = MultiReturn(pipeline.sds_context, 'executePipeline', output_nodes, named_input_nodes=params_dict) vX_0._unnamed_input_nodes = [op] vX_1._unnamed_input_nodes = [op] vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + vX_4._unnamed_input_nodes = [op] + vX_5._unnamed_input_nodes = [op] + vX_6._unnamed_input_nodes = [op] + vX_7._unnamed_input_nodes = [op] + vX_8._unnamed_input_nodes = [op] return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py new file mode 100644 index 00000000000..3c6244dc1e4 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/ffPredict.py @@ -0,0 +1,43 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/ffPredict.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def ffPredict(model: List, + X: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param batch_size: Batch size + :return: 'OperationNode' containing value + """ + params_dict = {'model': model, 'X': X} + params_dict.update(kwargs) + return Matrix(model.sds_context, + 'ffPredict', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py new file mode 100644 index 00000000000..34bee247be1 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py @@ -0,0 +1,59 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/fit_pipeline.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def fit_pipeline(trainData: Frame, + testData: Frame, + pip: Frame, + applyFunc: Frame, + hp: Matrix, + evaluationFunc: str, + evalFunHp: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + + params_dict = {'trainData': trainData, 'testData': testData, 'pip': pip, 'applyFunc': applyFunc, 'hp': hp, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp} + params_dict.update(kwargs) + + vX_0 = Matrix(trainData.sds_context, '') + vX_1 = Matrix(trainData.sds_context, '') + vX_2 = Matrix(trainData.sds_context, '') + vX_3 = List(trainData.sds_context, '') + vX_4 = List(trainData.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, ] + + op = MultiReturn(trainData.sds_context, 'fit_pipeline', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + vX_4._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py new file mode 100644 index 00000000000..d70ca4a42c5 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py @@ -0,0 +1,52 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/fixInvalidLengths.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def fixInvalidLengths(F1: Frame, + mask: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + + params_dict = {'F1': F1, 'mask': mask} + params_dict.update(kwargs) + + vX_0 = Frame(F1.sds_context, '') + vX_1 = Matrix(F1.sds_context, '') + vX_2 = Matrix(F1.sds_context, '') + vX_3 = Matrix(F1.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, ] + + op = MultiReturn(F1.sds_context, 'fixInvalidLengths', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py new file mode 100644 index 00000000000..2fa9c5f748c --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py @@ -0,0 +1,40 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/fixInvalidLengthsApply.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def fixInvalidLengthsApply(X: Frame, + mask: Matrix, + qLow: Matrix, + qUp: Matrix): + + params_dict = {'X': X, 'mask': mask, 'qLow': qLow, 'qUp': qUp} + return Matrix(X.sds_context, + 'fixInvalidLengthsApply', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/frameSort.py b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py new file mode 100644 index 00000000000..1199c8529c3 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py @@ -0,0 +1,40 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/frameSort.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def frameSort(F: Frame, + mask: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + + params_dict = {'F': F, 'mask': mask} + params_dict.update(kwargs) + return Matrix(F.sds_context, + 'frameSort', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py new file mode 100644 index 00000000000..38d7ef570f3 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/glmPredict.py @@ -0,0 +1,48 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/glmPredict.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def glmPredict(X: Matrix, + B: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param dfam: GLM distribution family: 1 = Power, 2 = Binomial, 3 = Multinomial Logit + :param vpow: Power for Variance defined as (mean)^power (ignored if dfam != 1): + :param link: Link function code: 0 = canonical (depends on distribution), 1 = Power, + :param lpow: Power for Link function defined as (mean)^power (ignored if link != 1): + :param disp: Dispersion value, when available + :param verbose: Print statistics to stdout + :return: 'OperationNode' containing printed one per each line, in the following & string identifier for the statistic, see the table below. & optional integer value that specifies the y-column for per-column statistics; & binomial/multinomial one-column y input is converted into multi-column. & optional boolean value (true or false) that tells us whether or not the input & value of the statistic. & meaning + """ + params_dict = {'X': X, 'B': B} + params_dict.update(kwargs) + return Matrix(X.sds_context, + 'glmPredict', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/gmm.py b/src/main/python/systemds/operator/algorithm/builtin/gmm.py index 350ee0a835e..e2f74fab8ff 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/gmm.py +++ b/src/main/python/systemds/operator/algorithm/builtin/gmm.py @@ -30,7 +30,6 @@ def gmm(X: Matrix, - verbose: bool, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ :param n_components: Number of n_components in the Gaussian mixture model @@ -41,7 +40,7 @@ def gmm(X: Matrix, :param tol: tolerance value for convergence :return: 'OperationNode' containing of estimated parameters & information criterion for best iteration & kth class """ - params_dict = {'X': X, 'verbose': verbose} + params_dict = {'X': X} params_dict.update(kwargs) vX_0 = Matrix(X.sds_context, '') diff --git a/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py b/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py index 0ddae9ca4e0..42304818b92 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py +++ b/src/main/python/systemds/operator/algorithm/builtin/gridSearch.py @@ -24,15 +24,43 @@ from typing import Dict, Iterable -from systemds.operator import OperationNode, Matrix +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar from systemds.script_building.dag import OutputType from systemds.utils.consts import VALID_INPUT_TYPES -def gridSearch(X: OperationNode, y: OperationNode, train: str, predict: str, params: Iterable, paramValues: Iterable, **kwargs: Dict[str, VALID_INPUT_TYPES]): - - params_dict = {'X':X, 'y':y, 'train':train, 'predict':predict, 'params':params, 'paramValues':paramValues} + +def gridSearch(X: Matrix, + y: Matrix, + train: str, + predict: str, + params: List, + paramValues: List, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param train: Name ft of the train function to call via ft(trainArgs) + :param predict: Name fp of the loss function to call via fp((predictArgs,B)) + :param numB: Maximum number of parameters in model B (pass the max because the size + :param may: parameters like icpt or multi-class classification) + :param columnvectors: hyper-parameters in 'params' + :param gridSearch: hyper-parameter by name, if + :param not: an empty list, the lm parameters are used + :param gridSearch: trained models at the end, if + :param not: an empty list, list(X, y) is used instead + :param cv: flag enabling k-fold cross validation, otherwise training loss + :param cvk: if cv=TRUE, specifies the the number of folds, otherwise ignored + :param verbose: flag for verbose debug output + :return: 'OperationNode' containing returned as a column-major linearized column vector + """ + params_dict = {'X': X, 'y': y, 'train': train, 'predict': predict, 'params': params, 'paramValues': paramValues} params_dict.update(kwargs) - return OperationNode(X.sds_context, 'gridSearch', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2, output_types=[OutputType.MATRIX, OutputType.FRAME]) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Frame(X.sds_context, '') + output_nodes = [vX_0, vX_1, ] + + op = MultiReturn(X.sds_context, 'gridSearch', output_nodes, named_input_nodes=params_dict) + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] - \ No newline at end of file + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py index 4a0065dc064..727bb51bec5 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/hyperband.py +++ b/src/main/python/systemds/operator/algorithm/builtin/hyperband.py @@ -33,7 +33,7 @@ def hyperband(X_train: Matrix, y_train: Matrix, X_val: Matrix, y_val: Matrix, - params: Iterable, + params: List, paramRanges: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): """ diff --git a/src/main/python/systemds/operator/algorithm/builtin/knn.py b/src/main/python/systemds/operator/algorithm/builtin/knn.py new file mode 100644 index 00000000000..33c871c4271 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/knn.py @@ -0,0 +1,66 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/knn.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def knn(Train: Matrix, + Test: Matrix, + CL: Matrix, + START_SELECTED: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param CL_T: Y The target type of matrix CL whether + :param columns: are continuous ( =1 ) or + :param trans_continuous: Y Option flag for continuous feature transformed to [-1,1]: + :param k_value: Y k value for KNN, ignore if select_k enable + :param select_k: Y Use k selection algorithm to estimate k (TRUE means yes) + :param k_min: Y Min k value( available if select_k = 1 ) + :param k_max: Y Max k value( available if select_k = 1 ) + :param select_feature: Y Use feature selection algorithm to select feature (TRUE means yes) + :param feature_max: Y Max feature selection + :param interval: Y Interval value for K selecting ( available if select_k = 1 ) + :param feature_importance: Y Use feature importance algorithm to estimate each feature + :param predict_con_tg: Y Continuous target predict function: mean(=0) or median(=1) + :return: 'OperationNode' containing + """ + params_dict = {'Train': Train, 'Test': Test, 'CL': CL, 'START_SELECTED': START_SELECTED} + params_dict.update(kwargs) + + vX_0 = Matrix(Train.sds_context, '') + vX_1 = Matrix(Train.sds_context, '') + vX_2 = Matrix(Train.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, ] + + op = MultiReturn(Train.sds_context, 'knn', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/l2svm.py b/src/main/python/systemds/operator/algorithm/builtin/l2svm.py index 5b622799800..1a8cfbab8a4 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/l2svm.py +++ b/src/main/python/systemds/operator/algorithm/builtin/l2svm.py @@ -35,7 +35,7 @@ def l2svm(X: Matrix, """ :param intercept: No Intercept ( If set to TRUE then a constant bias column is added to X) :param epsilon: Procedure terminates early if the reduction in objective function value is less - :param lambda: Regularization parameter (lambda) for L2 regularization + :param reg: Regularization parameter (reg) for L2 regularization :param maxIterations: Maximum number of conjugate gradient iterations :param maxii: - :param verbose: Set to true if one wants print statements updating on loss. diff --git a/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py b/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py new file mode 100644 index 00000000000..073b8eda24e --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/lenetPredict.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/lenetPredict.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def lenetPredict(model: List, + X: Matrix, + C: int, + Hin: int, + Win: int, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param C: Number of input channels + :param Hin: Input height + :param Win: Input width + :param batch_size: Batch size + :return: 'OperationNode' containing + """ + params_dict = {'model': model, 'X': X, 'C': C, 'Hin': Hin, 'Win': Win} + params_dict.update(kwargs) + return Matrix(model.sds_context, + 'lenetPredict', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py b/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py index 66bc460decb..ac51610564b 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py +++ b/src/main/python/systemds/operator/algorithm/builtin/lenetTrain.py @@ -46,7 +46,7 @@ def lenetTrain(X: Matrix, :param lr: Learning rate :param mu: Momentum value :param decay: Learning rate decay - :param lambda: Regularization strength + :param reg: Regularization strength :param seed: Seed for model initialization :param verbose: Flag indicates if function should print to stdout :return: 'OperationNode' containing diff --git a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py b/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py index 612887be37c..784eea8e483 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py +++ b/src/main/python/systemds/operator/algorithm/builtin/lmPredict.py @@ -31,10 +31,14 @@ def lmPredict(X: Matrix, B: Matrix, - ytest: Matrix, **kwargs: Dict[str, VALID_INPUT_TYPES]): - - params_dict = {'X': X, 'B': B, 'ytest': ytest} + """ + :param if: is not wanted + :param icpt: Intercept presence, shifting and rescaling the columns of X + :param verbose: If TRUE print messages are activated + :return: 'OperationNode' containing + """ + params_dict = {'X': X, 'B': B} params_dict.update(kwargs) return Matrix(X.sds_context, 'lmPredict', diff --git a/src/main/python/systemds/operator/algorithm/builtin/mcc.py b/src/main/python/systemds/operator/algorithm/builtin/mcc.py new file mode 100644 index 00000000000..194ed45c98d --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/mcc.py @@ -0,0 +1,38 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/mcc.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def mcc(predictions: Matrix, + labels: Matrix): + + params_dict = {'predictions': predictions, 'labels': labels} + return Matrix(predictions.sds_context, + 'mcc', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/mdedup.py b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py new file mode 100644 index 00000000000..6e9bacd9a5c --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py @@ -0,0 +1,46 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/mdedup.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def mdedup(X: Frame, + LHSfeatures: Matrix, + LHSthreshold: Matrix, + RHSfeatures: Matrix, + RHSthreshold: Matrix, + verbose: bool): + """ + :param X: Input Frame X + :param verbose: To print the output + :return: 'OperationNode' containing + """ + params_dict = {'X': X, 'LHSfeatures': LHSfeatures, 'LHSthreshold': LHSthreshold, 'RHSfeatures': RHSfeatures, 'RHSthreshold': RHSthreshold, 'verbose': verbose} + return Matrix(X.sds_context, + 'mdedup', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/mice.py b/src/main/python/systemds/operator/algorithm/builtin/mice.py new file mode 100644 index 00000000000..38e957d0375 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/mice.py @@ -0,0 +1,61 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/mice.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def mice(X: Matrix, + cMask: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param iter: Number of iteration for multiple imputations + :param threshold: confidence value [0, 1] for robust imputation, values will only be imputed + :param if: value has probability greater than threshold, + :param only: categorical data + :param verbose: Boolean value. + :return: 'OperationNode' containing are represented with empty string i.e ",," in csv file & n are storing continuos/numeric data and variables with & storing categorical data + """ + params_dict = {'X': X, 'cMask': cMask} + params_dict.update(kwargs) + + vX_0 = Matrix(X.sds_context, '') + vX_1 = Matrix(X.sds_context, '') + vX_2 = Scalar(X.sds_context, '') + vX_3 = Frame(X.sds_context, '') + vX_4 = List(X.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, ] + + op = MultiReturn(X.sds_context, 'mice', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + vX_4._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py b/src/main/python/systemds/operator/algorithm/builtin/miceApply.py index ab14dfe398a..ea484df0fa2 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/miceApply.py +++ b/src/main/python/systemds/operator/algorithm/builtin/miceApply.py @@ -33,7 +33,7 @@ def miceApply(X: Matrix, meta: Matrix, threshold: float, dM: Frame, - betaList: Iterable): + betaList: List): """ :param threshold: confidence value [0, 1] for robust imputation, values will only be imputed :param if: value has probability greater than threshold, diff --git a/src/main/python/systemds/operator/algorithm/builtin/msvm.py b/src/main/python/systemds/operator/algorithm/builtin/msvm.py index d9a2791cccf..53db201cba5 100644 --- a/src/main/python/systemds/operator/algorithm/builtin/msvm.py +++ b/src/main/python/systemds/operator/algorithm/builtin/msvm.py @@ -37,7 +37,7 @@ def msvm(X: Matrix, :param num_classes: Number of classes :param epsilon: Procedure terminates early if the reduction in objective function :param value: than epsilon (tolerance) times the initial objective function value. - :param lambda: Regularization parameter (lambda) for L2 regularization + :param reg: Regularization parameter (lambda) for L2 regularization :param maxIterations: Maximum number of conjugate gradient iterations :param verbose: Set to true to print while training. :return: 'OperationNode' containing diff --git a/src/main/python/systemds/operator/algorithm/builtin/pcaInverse.py b/src/main/python/systemds/operator/algorithm/builtin/pcaInverse.py new file mode 100644 index 00000000000..14ce313c54b --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/pcaInverse.py @@ -0,0 +1,40 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/pcaInverse.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def pcaInverse(Y: Matrix, + Clusters: Matrix, + Centering: Matrix, + ScaleFactor: Matrix): + + params_dict = {'Y': Y, 'Clusters': Clusters, 'Centering': Centering, 'ScaleFactor': ScaleFactor} + return Matrix(Y.sds_context, + 'pcaInverse', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/pcaTransform.py b/src/main/python/systemds/operator/algorithm/builtin/pcaTransform.py new file mode 100644 index 00000000000..53bf5e8a2a3 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/pcaTransform.py @@ -0,0 +1,40 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/pcaTransform.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def pcaTransform(X: Matrix, + Clusters: Matrix, + Centering: Matrix, + ScaleFactor: Matrix): + + params_dict = {'X': X, 'Clusters': Clusters, 'Centering': Centering, 'ScaleFactor': ScaleFactor} + return Matrix(X.sds_context, + 'pcaTransform', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/setdiff.py b/src/main/python/systemds/operator/algorithm/builtin/setdiff.py new file mode 100644 index 00000000000..7559935e32c --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/setdiff.py @@ -0,0 +1,38 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/setdiff.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def setdiff(X: Matrix, + Y: Matrix): + + params_dict = {'X': X, 'Y': Y} + return Matrix(X.sds_context, + 'setdiff', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/stratstats.py b/src/main/python/systemds/operator/algorithm/builtin/stratstats.py new file mode 100644 index 00000000000..64c5679cb89 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/stratstats.py @@ -0,0 +1,46 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/stratstats.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def stratstats(X: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param the: empty means "use X in place of Y" + :param the: empty means "use X in place of S" + :param the: empty means "use columns 1 : ncol(X)" + :param the: empty means "use columns 1 : ncol(Y)" + :param Scid: Column index of the stratum column in S + :return: 'OperationNode' containing the following information: + """ + params_dict = {'X': X} + params_dict.update(kwargs) + return Matrix(X.sds_context, + 'stratstats', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/symmetricDifference.py b/src/main/python/systemds/operator/algorithm/builtin/symmetricDifference.py new file mode 100644 index 00000000000..33a82096679 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/symmetricDifference.py @@ -0,0 +1,38 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/symmetricDifference.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def symmetricDifference(X: Matrix, + Y: Matrix): + + params_dict = {'X': X, 'Y': Y} + return Matrix(X.sds_context, + 'symmetricDifference', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py new file mode 100644 index 00000000000..e9165510b4c --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py @@ -0,0 +1,59 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/topk_cleaning.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def topk_cleaning(dataTrain: Frame, + primitives: Frame, + parameters: Frame, + evaluationFunc: str, + evalFunHp: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + + params_dict = {'dataTrain': dataTrain, 'primitives': primitives, 'parameters': parameters, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp} + params_dict.update(kwargs) + + vX_0 = Frame(dataTrain.sds_context, '') + vX_1 = Matrix(dataTrain.sds_context, '') + vX_2 = Matrix(dataTrain.sds_context, '') + vX_3 = Scalar(dataTrain.sds_context, '') + vX_4 = Matrix(dataTrain.sds_context, '') + vX_5 = Frame(dataTrain.sds_context, '') + output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, ] + + op = MultiReturn(dataTrain.sds_context, 'topk_cleaning', output_nodes, named_input_nodes=params_dict) + + vX_0._unnamed_input_nodes = [op] + vX_1._unnamed_input_nodes = [op] + vX_2._unnamed_input_nodes = [op] + vX_3._unnamed_input_nodes = [op] + vX_4._unnamed_input_nodes = [op] + vX_5._unnamed_input_nodes = [op] + + return op diff --git a/src/main/python/systemds/operator/algorithm/builtin/union.py b/src/main/python/systemds/operator/algorithm/builtin/union.py new file mode 100644 index 00000000000..013de4bc3c9 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/union.py @@ -0,0 +1,42 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/union.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def union(X: Matrix, + Y: Matrix): + """ + :param X: input vector + :param Y: input vector + :return: 'OperationNode' containing with all unique rows existing in x and y + """ + params_dict = {'X': X, 'Y': Y} + return Matrix(X.sds_context, + 'union', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/unique.py b/src/main/python/systemds/operator/algorithm/builtin/unique.py new file mode 100644 index 00000000000..cceb3fadea3 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/unique.py @@ -0,0 +1,37 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/unique.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def unique(X: Matrix): + + params_dict = {'X': X} + return Matrix(X.sds_context, + 'unique', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/algorithm/builtin/xgboost.py b/src/main/python/systemds/operator/algorithm/builtin/xgboost.py new file mode 100644 index 00000000000..80db85bf0e8 --- /dev/null +++ b/src/main/python/systemds/operator/algorithm/builtin/xgboost.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------- + +# Autogenerated By : src/main/python/generator/generator.py +# Autogenerated From : scripts/builtin/xgboost.dml + +from typing import Dict, Iterable + +from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar +from systemds.script_building.dag import OutputType +from systemds.utils.consts import VALID_INPUT_TYPES + + +def xgboost(X: Matrix, + y: Matrix, + **kwargs: Dict[str, VALID_INPUT_TYPES]): + """ + :param Feature: a scalar feature and features 2 is a categorical feature + :param If: not provided by default all variables are assumed to be scale (1) + :param sml_type: Supervised machine learning type: 1 = Regression(default), 2 = Classification + :param num_trees: Number of trees to be created in the xgboost model + :param learning_rate: Alias: eta. After each boosting step the learning rate controls the weights of the new predictions + :param max_depth: Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit + :param lambda: L2 regularization term on weights. Increasing this value will make model more conservative and reduce amount of leaves of a tree + :return: 'OperationNode' containing feature id if the feature is categorical) & looks at if j is an internal node, otherwise 0 & stored at m[6,j] if the feature chosen for j is scale, & feature chosen for j is categorical rows 6,7,... depict & chosen for j & a leaf node 1 if j is impure and the number of samples at j > threshold, otherwise 0 + """ + params_dict = {'X': X, 'y': y} + params_dict.update(kwargs) + return Matrix(X.sds_context, + 'xgboost', + named_input_nodes=params_dict) diff --git a/src/main/python/systemds/operator/nodes/scalar.py b/src/main/python/systemds/operator/nodes/scalar.py index a4d62928ea5..815858fec76 100644 --- a/src/main/python/systemds/operator/nodes/scalar.py +++ b/src/main/python/systemds/operator/nodes/scalar.py @@ -37,7 +37,7 @@ class Scalar(OperationNode): __assign: bool - def __init__(self, sds_context: 'SystemDSContext', operation: str, + def __init__(self, sds_context, operation: str, unnamed_input_nodes: Iterable[VALID_INPUT_TYPES] = None, named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, output_type: OutputType = OutputType.DOUBLE, diff --git a/src/main/python/tests/algorithms/test_gmm.py b/src/main/python/tests/algorithms/test_gmm.py index 10a561fe0e9..37475570daa 100644 --- a/src/main/python/tests/algorithms/test_gmm.py +++ b/src/main/python/tests/algorithms/test_gmm.py @@ -49,7 +49,7 @@ def test_lm_simple(self): n_gaussian = 4 [_, _, _, _, mu, precision_cholesky, weight] = gmm( - features, False, n_components=n_gaussian, seed=10) + features, n_components=n_gaussian, seed=10) [_, pp] = gmmPredict( test, weight, mu, precision_cholesky, model=self.sds.scalar("VVV")) diff --git a/src/main/python/tests/algorithms/test_gmm_train_predict.py b/src/main/python/tests/algorithms/test_gmm_train_predict.py index 26c3128cfa6..151b5315490 100644 --- a/src/main/python/tests/algorithms/test_gmm_train_predict.py +++ b/src/main/python/tests/algorithms/test_gmm_train_predict.py @@ -47,7 +47,7 @@ def train(self): n_gaussian = 4 [_, _, _, _, mu, precision_cholesky, weight] = gmm( - features, False, n_components=n_gaussian, seed=10) + features, n_components=n_gaussian, seed=10) model = sds_train.list(mu, precision_cholesky, weight) model.write(self.model_path).compute() diff --git a/src/test/scripts/functions/builtin/l2svm.dml b/src/test/scripts/functions/builtin/l2svm.dml index 9b9502dbfa8..20438ee457f 100644 --- a/src/test/scripts/functions/builtin/l2svm.dml +++ b/src/test/scripts/functions/builtin/l2svm.dml @@ -21,5 +21,5 @@ X = read($X) Y = read($Y) -model= l2svm(X=X, Y=Y, intercept = $inc, epsilon = $eps, lambda = $lam, maxIterations = $max ) +model= l2svm(X=X, Y=Y, intercept = $inc, epsilon = $eps, reg = $lam, maxIterations = $max ) write(model, $model) diff --git a/src/test/scripts/functions/builtin/multisvm.dml b/src/test/scripts/functions/builtin/multisvm.dml index b95b56f9386..b1fb5fd9e7f 100644 --- a/src/test/scripts/functions/builtin/multisvm.dml +++ b/src/test/scripts/functions/builtin/multisvm.dml @@ -22,5 +22,5 @@ X = read($X) Y = read($Y) model = msvm(X=X, Y=Y, intercept = $inc, - epsilon = $eps, lambda = $lam, maxIterations = $max ) + epsilon = $eps, reg = $lam, maxIterations = $max ) write(model, $model) diff --git a/src/test/scripts/functions/federated/FederatedAlsCGTest.dml b/src/test/scripts/functions/federated/FederatedAlsCGTest.dml index 05258f4ac6f..1b386034683 100644 --- a/src/test/scripts/functions/federated/FederatedAlsCGTest.dml +++ b/src/test/scripts/functions/federated/FederatedAlsCGTest.dml @@ -23,12 +23,12 @@ X = federated(addresses=list($in_X1, $in_X2), ranges=list(list(0, 0), list($rows, $cols), list($rows, 0), list($rows * 2, $cols))); rank = $in_rank; -reg = $in_reg; -lambda = $in_lambda; +regType = $in_reg; +reg = $in_lambda; maxi = $in_maxi; thr = $in_thr; -[U, V] = alsCG(X = X, rank = rank, reg = reg, lambda = lambda, maxi = maxi, check = TRUE, thr = thr); +[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxi = maxi, check = TRUE, thr = thr); Z = U %*% V; diff --git a/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml b/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml index a73efba1a68..64ef02469e3 100644 --- a/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml +++ b/src/test/scripts/functions/federated/FederatedAlsCGTestReference.dml @@ -22,12 +22,12 @@ X = rbind(read($in_X1), read($in_X2)); rank = $in_rank; -reg = $in_reg; -lambda = $in_lambda; +regType = $in_reg; +reg = $in_lambda; maxi = $in_maxi; thr = $in_thr; -[U, V] = alsCG(X = X, rank = rank, reg = reg, lambda = lambda, maxi = maxi, check = TRUE, thr = thr); +[U, V] = alsCG(X = X, rank = rank, regType = regType, reg = reg, maxi = maxi, check = TRUE, thr = thr); Z = U %*% V; diff --git a/src/test/scripts/functions/federated/FederatedL2SVMTest.dml b/src/test/scripts/functions/federated/FederatedL2SVMTest.dml index 7ae1a57fcb2..e31a08d29fe 100644 --- a/src/test/scripts/functions/federated/FederatedL2SVMTest.dml +++ b/src/test/scripts/functions/federated/FederatedL2SVMTest.dml @@ -30,6 +30,6 @@ else { ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols))) } -model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100) +model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100) write(model, $out) diff --git a/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml b/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml index b5439d425e9..e5e428abe68 100644 --- a/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml +++ b/src/test/scripts/functions/federated/FederatedL2SVMTestReference.dml @@ -27,6 +27,6 @@ if( $4 ) { else X = rbind(read($1), read($2)) -model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100) +model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100) write(model, $5) diff --git a/src/test/scripts/functions/federated/FederatedMSVMTest.dml b/src/test/scripts/functions/federated/FederatedMSVMTest.dml index 3d9cc8c9104..b44dd727e15 100644 --- a/src/test/scripts/functions/federated/FederatedMSVMTest.dml +++ b/src/test/scripts/functions/federated/FederatedMSVMTest.dml @@ -30,6 +30,6 @@ else { ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols))) } -model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100, verbose = FALSE) +model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100, verbose = FALSE) write(model, $out) diff --git a/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml b/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml index 19fad3a09cd..e4da0f346a6 100644 --- a/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml +++ b/src/test/scripts/functions/federated/FederatedMSVMTestReference.dml @@ -27,6 +27,6 @@ if( $4 ) { else X = rbind(read($1), read($2)) -model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100, verbose = FALSE) +model = msvm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100, verbose = FALSE) write(model, $5) diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml index b1f5fdc6d70..a56619f1aae 100644 --- a/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml +++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest.dml @@ -23,5 +23,5 @@ X = federated(addresses=list($in_X1, $in_X2), ranges=list(list(0, 0), list($rows / 2, $cols), list($rows / 2, 0), list($rows, $cols))) Y = federated(addresses=list($in_Y1, $in_Y2), ranges=list(list(0, 0), list($rows / 2, 1), list($rows / 2, 0), list($rows, 1))) -model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100) +model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100) write(model, $out) diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml index 4e72b492669..aa73e3a463c 100644 --- a/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml +++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest2.dml @@ -23,5 +23,5 @@ X = federated(addresses=list($in_X1), ranges=list(list(0, 0), list($rows / 2, $cols))) Y = federated(addresses=list($in_Y1), ranges=list(list(0, 0), list($rows / 2, 1))) -model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100) +model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100) write(model, $out) diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml index 486e856f71b..03c2b5f0bfe 100644 --- a/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml +++ b/src/test/scripts/functions/federated/FederatedYL2SVMTest2Reference.dml @@ -21,5 +21,5 @@ X = read($1) Y = read($3) -model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100) +model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100) write(model, $5) diff --git a/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml b/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml index c17a6559c17..26233a3ab4c 100644 --- a/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml +++ b/src/test/scripts/functions/federated/FederatedYL2SVMTestReference.dml @@ -21,5 +21,5 @@ X = rbind(read($1), read($2)) Y = rbind(read($3), read($4)) -model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, lambda = 1, maxIterations = 100) +model = l2svm(X=X, Y=Y, intercept = FALSE, epsilon = 1e-12, reg = 1, maxIterations = 100) write(model, $5) diff --git a/src/test/scripts/functions/lineage/LineageReuseAlg5.dml b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml index 57af57542e5..660c705f28b 100644 --- a/src/test/scripts/functions/lineage/LineageReuseAlg5.dml +++ b/src/test/scripts/functions/lineage/LineageReuseAlg5.dml @@ -31,7 +31,7 @@ return (Matrix[Double] loss) { N = 1000; no_lamda = 10; stp = (0.1 - 0.0001)/no_lamda; -lamda = 0.0001; +reg = 0.0001; Rbeta = matrix(0, rows=N+1, cols=no_lamda*2); Rloss = matrix(0, rows=no_lamda*2, cols=1); i = 1; @@ -43,18 +43,18 @@ y = ceil(y); for (l in 1:no_lamda) { beta = l2svm(X=X, Y=y, intercept=FALSE, epsilon=1e-12, - lambda = lamda, verbose=FALSE); + reg = reg, verbose=FALSE); Rbeta[1:nrow(beta),i] = beta; Rloss[i,] = l2norm(X, y, beta, FALSE); i = i + 1; beta = l2svm(X=X, Y=y, intercept=TRUE, epsilon=1e-12, - lambda = lamda, verbose=FALSE); + reg = reg, verbose=FALSE); Rbeta[1:nrow(beta),i] = beta; Rloss[i,] = l2norm(X, y, beta, TRUE); i = i + 1; - lamda = lamda + stp; + reg = reg + stp; } leastLoss = rowIndexMin(t(Rloss)); diff --git a/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml b/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml index 23f39b08145..ebd360b7316 100644 --- a/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml +++ b/src/test/scripts/functions/lineage/LineageTraceParforMSVM.dml @@ -20,7 +20,7 @@ #------------------------------------------------------------- msvm2 = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE, - Double epsilon = 0.001, Double lambda = 1.0, Integer maxIterations = 100, Boolean verbose = FALSE) + Double epsilon = 0.001, Double reg = 1.0, Integer maxIterations = 100, Boolean verbose = FALSE) return(Matrix[Double] model) { if(min(Y) < 0) @@ -43,7 +43,7 @@ msvm2 = function(Matrix[Double] X, Matrix[Double] Y, Boolean intercept = FALSE, parfor(class in 1:max(Y), opt=CONSTRAINED, par=4, mode=REMOTE_SPARK) { Y_local = 2 * (Y == class) - 1 w[,class] = l2svm(X=X, Y=Y_local, intercept=intercept, - epsilon=epsilon, lambda=lambda, maxIterations=maxIterations, + epsilon=epsilon, reg=reg, maxIterations=maxIterations, verbose= verbose, columnId=class) } diff --git a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml index 296165c0292..1b035b88f3f 100644 --- a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml +++ b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml @@ -118,9 +118,9 @@ return(Matrix[Double] output, Matrix[Double] error) if(is.na(as.scalar(evalFunHp[1,1]))) { nc = max(Y); - params = list("intercept", "lambda", "epsilon") + params = list("intercept", "reg", "epsilon") paramRanges = list(seq(0, 1), 10^seq(1,-3), 10^seq(1,-5)); - trainArgs = list(X=X, Y=Y, intercept=-1, lambda=-1, epsilon=-1, maxIterations=1000, verbose=FALSE); + trainArgs = list(X=X, Y=Y, intercept=-1, reg=-1, epsilon=-1, maxIterations=1000, verbose=FALSE); dataArgs = list("X", "Y"); [B1,opt] = gridSearch(X=X, y=Y, train="msvm", predict="accuracyMSVM", numB=(ncol(X)+1)*(nc), params=params, paramValues=paramRanges, dataArgs=dataArgs, trainArgs=trainArgs, cv=TRUE, cvk=3, verbose=TRUE); @@ -134,7 +134,7 @@ return(Matrix[Double] output, Matrix[Double] error) a = 0 } else { - beta = msvm(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]), lambda=as.scalar(evalFunHp[1,2]), epsilon=as.scalar(evalFunHp[1,3]), + beta = msvm(X=X, Y=Y, intercept=as.scalar(evalFunHp[1,1]), reg=as.scalar(evalFunHp[1,2]), epsilon=as.scalar(evalFunHp[1,3]), maxIterations=1000, verbose=FALSE); yhat = msvmPredict(X=Xtest, W=beta); yhat = rowIndexMax(yhat)