From a4d7183fea48ec8555c799190bcd13d497c24495 Mon Sep 17 00:00:00 2001 From: Orhan Kislal Date: Wed, 15 Aug 2018 12:27:50 -0700 Subject: [PATCH] Documentation: Remove online examples from sql functions. JIRA: MADLIB-1260 For a madlib module, we can call `select madlib_schema.module_name('example');` to print out examples of this module. They are hard to maintain and not that useful since we already have examples in our user documentation http://madlib.apache.org/docs/latest/index.html/. We are going to remove those examples for every module that has it, and make sure madlib throw out proper error message when user calls it. Co-authored-by: Orhan Kislal Co-authored-by: Nandish Jayaram --- .../modules/assoc_rules/assoc_rules.py_in | 63 +- .../postgres/modules/convex/mlp_igd.py_in | 375 +---------- .../modules/elastic_net/elastic_net.py_in | 125 ---- src/ports/postgres/modules/glm/glm.py_in | 181 ----- src/ports/postgres/modules/glm/multinom.py_in | 97 --- src/ports/postgres/modules/glm/ordinal.py_in | 97 --- src/ports/postgres/modules/graph/apsp.py_in | 72 -- src/ports/postgres/modules/graph/bfs.py_in | 58 -- src/ports/postgres/modules/graph/hits.py_in | 86 +-- .../postgres/modules/graph/measures.py_in | 120 +--- .../postgres/modules/graph/pagerank.py_in | 109 +-- src/ports/postgres/modules/graph/sssp.py_in | 73 -- src/ports/postgres/modules/graph/wcc.py_in | 125 +--- src/ports/postgres/modules/knn/knn.py_in | 134 +--- .../modules/linalg/matrix_help_message.py_in | 624 +++--------------- src/ports/postgres/modules/linalg/svd.py_in | 46 -- .../linear_systems/dense_linear_systems.py_in | 15 - .../sparse_linear_systems.py_in | 19 - src/ports/postgres/modules/pca/pca.py_in | 114 +--- .../postgres/modules/pca/pca_project.py_in | 161 +---- .../postgres/modules/pmml/table_to_pmml.py_in | 38 -- .../decision_tree.py_in | 65 -- .../random_forest.py_in | 66 -- .../postgres/modules/regress/linear.py_in | 43 -- .../postgres/modules/regress/logistic.py_in | 48 -- .../modules/regress/multilogistic.py_in | 86 --- .../modules/sample/balance_sample.py_in | 55 -- .../modules/sample/stratified_sample.py_in | 59 +- .../modules/sample/stratified_sample.sql_in | 6 +- .../modules/sample/train_test_split.py_in | 73 +- .../postgres/modules/stats/correlation.py_in | 57 -- .../modules/stats/cox_prop_hazards.py_in | 51 -- .../postgres/modules/summary/summary.py_in | 40 -- src/ports/postgres/modules/svm/svm.py_in | 155 ----- .../utilities/minibatch_preprocessing.py_in | 50 -- .../postgres/modules/utilities/path.py_in | 69 -- .../modules/utilities/sessionize.py_in | 84 +-- 37 files changed, 119 insertions(+), 3620 deletions(-) diff --git a/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in b/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in index abc8b5080..243851dd7 100644 --- a/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in +++ b/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in @@ -564,65 +564,6 @@ in each row, with the following columns: independent, to the observed support of X occuring without Y """.format(schema_madlib=schema_madlib) else: - if message.lower() in ("example", "examples"): - return """ ------------------------------------------------------------------------- - EXAMPLES ------------------------------------------------------------------------- -DROP TABLE IF EXISTS test_data; -CREATE TABLE test_data ( - trans_id INT, - product TEXT -); -INSERT INTO test_data VALUES (1, 'beer'); -INSERT INTO test_data VALUES (1, 'diapers'); -INSERT INTO test_data VALUES (1, 'chips'); -INSERT INTO test_data VALUES (2, 'beer'); -INSERT INTO test_data VALUES (2, 'diapers'); -INSERT INTO test_data VALUES (3, 'beer'); -INSERT INTO test_data VALUES (3, 'diapers'); -INSERT INTO test_data VALUES (4, 'beer'); -INSERT INTO test_data VALUES (4, 'chips'); -INSERT INTO test_data VALUES (5, 'beer'); -INSERT INTO test_data VALUES (6, 'beer'); -INSERT INTO test_data VALUES (6, 'diapers'); -INSERT INTO test_data VALUES (6, 'chips'); -INSERT INTO test_data VALUES (7, 'beer'); -INSERT INTO test_data VALUES (7, 'diapers'); - -Find all association rules with a support and threshold value of -at least 0.25 and 0.5 respectively. - -SELECT * FROM {schema_madlib}.assoc_rules( .25, - .5, - 'trans_id', - 'product', - 'test_data', - NULL, - TRUE - ); - -View output results: -SELECT * FROM assoc_rules; - -Find association rules generated from itemsets of size at most 2, -and a support and threshold value of at least 0.25 and 0.5 respectively. - -SELECT * FROM {schema_madlib}.assoc_rules( .25, - .5, - 'trans_id', - 'product', - 'test_data', - NULL, - TRUE, - 2 - ); - -View output results: -SELECT * FROM assoc_rules; - """.format(schema_madlib=schema_madlib) - else: - return """ + return """ For an overview on usage, run: SELECT {schema_madlib}.assoc_rules('usage'); -For an example of using assoc_rules, run: SELECT {schema_madlib}.assoc_rules('example'); - """.format(schema_madlib=schema_madlib) + """.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in index 7df44ec88..0126d312f 100644 --- a/src/ports/postgres/modules/convex/mlp_igd.py_in +++ b/src/ports/postgres/modules/convex/mlp_igd.py_in @@ -1134,10 +1134,7 @@ def mlp_help(schema_madlib, message, is_classification): functions. For more details on function usage: - SELECT {schema_madlib}.{method}('usage') - - For a small example on using the function: - SELECT {schema_madlib}.{method}('example')""".format(**args) + SELECT {schema_madlib}.{method}('usage')""".format(**args) usage = """ --------------------------------------------------------------------------- @@ -1239,304 +1236,6 @@ def mlp_help(schema_madlib, message, is_classification): """.format(**args) - regression_example = """ - -- Create input table - - CREATE TABLE lin_housing (id serial, x float8[], zipcode int, y float8); - COPY lin_housing (x, zipcode, y) FROM STDIN NULL '?' DELIMITER '|'; - {{1,0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98}}|94016|24.00 - {{1,0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14}}|94016|21.60 - {{1,0.02729,0.00,7.070,0,0.4690,7.1850,61.10,4.9671,2,242.0,17.80,392.83,4.03}}|94016|34.70 - {{1,0.03237,0.00,2.180,0,0.4580,6.9980,45.80,6.0622,3,222.0,18.70,394.63,2.94}}|94016|33.40 - {{1,0.06905,0.00,2.180,0,0.4580,7.1470,54.20,6.0622,3,222.0,18.70,396.90,5.33}}|94016|36.20 - {{1,0.02985,0.00,2.180,0,0.4580,6.4300,58.70,6.0622,3,222.0,18.70,394.12,5.21}}|94016|28.70 - {{1,0.08829,12.50,7.870,0,0.5240,6.0120,66.60,5.5605,5,311.0,15.20,395.60,12.43}}|94016|22.90 - {{1,0.14455,12.50,7.870,0,0.5240,6.1720,96.10,5.9505,5,311.0,15.20,396.90,19.15}}|94016|27.10 - {{1,0.21124,12.50,7.870,0,0.5240,5.6310,100.00,6.0821,5,311.0,15.20,386.63,29.93}}|94016|16.50 - {{1,0.17004,12.50,7.870,0,0.5240,6.0040,85.90,6.5921,5,311.0,15.20,386.71,17.10}}|94016|18.90 - {{1,0.22489,12.50,7.870,0,0.5240,6.3770,94.30,6.3467,5,311.0,15.20,392.52,20.45}}|94016|15.00 - {{1,0.11747,12.50,7.870,0,0.5240,6.0090,82.90,6.2267,5,311.0,15.20,396.90,13.27}}|20001|18.90 - {{1,0.09378,12.50,7.870,0,0.5240,5.8890,39.00,5.4509,5,311.0,15.20,390.50,15.71}}|20001|21.70 - {{1,0.62976,0.00,8.140,0,0.5380,5.9490,61.80,4.7075,4,307.0,21.00,396.90,8.26}}|20001|20.40 - {{1,0.63796,0.00,8.140,0,0.5380,6.0960,84.50,4.4619,4,307.0,21.00,380.02,10.26}}|20001|18.20 - {{1,0.62739,0.00,8.140,0,0.5380,5.8340,56.50,4.4986,4,307.0,21.00,395.62,8.47}}|20001|19.90 - {{1,1.05393,0.00,8.140,0,0.5380,5.9350,29.30,4.4986,4,307.0,21.00,386.85,6.58}}|20001| 23.10 - {{1,0.78420,0.00,8.140,0,0.5380,5.9900,81.70,4.2579,4,307.0,21.00,386.75,14.67}}|20001|17.50 - {{1,0.80271,0.00,8.140,0,0.5380,5.4560,36.60,3.7965,4,307.0,21.00,288.99,11.69}}|20001|20.20 - {{1,0.72580,0.00,8.140,0,0.5380,5.7270,69.50,3.7965,4,307.0,21.00,390.95,11.28}}|20001|18.20 - \. - - -- Generate a multilayer perception with a two hidden layers of 25 units - -- each. Use the x column as the independent variables, and use the class - -- column as the classification. Set the tolerance to 0 so that 500 - -- iterations will be run. Use a sigmoid activation function. - -- The model will be written to mlp_regress_result. - - DROP TABLE IF EXISTS mlp_regress, mlp_regress_summary, mlp_regress_standardization; - SELECT {schema_madlib}.{method}( - 'lin_housing', -- Source table - 'mlp_regress', -- Desination table - 'x', -- Input features - 'y', -- Dependent variable - ARRAY[25,25], -- Number of units per layer - 'learning_rate_init=0.001, - n_iterations=500, - lambda=0.001, - tolerance=0', -- Optimizer params - 'relu', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE -- Not verbose - ); - SELECT * FROM mlp_regress; - - -- Use the n_tries optimizer param to learn the best of multiple models: - DROP TABLE IF EXISTS mlp_regress, mlp_regress_summary, mlp_regress_standardization; - SELECT {schema_madlib}.{method}( - 'lin_housing', -- Source table - 'mlp_regress', -- Desination table - 'x', -- Input features - 'y', -- Dependent variable - ARRAY[25,25], -- Number of units per layer - 'learning_rate_init=0.001, - n_iterations=50, - n_tries=3, - lambda=0.001, - tolerance=0', -- Optimizer params, with n_tries - 'relu', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE -- Not verbose - ); - SELECT * FROM mlp_regress; - - -- Use the warm start param to improve the model present in mlp_regress. - -- Note that mlp_regress should not be dropped. - SELECT {schema_madlib}.{method}( - 'lin_housing', -- Source table - 'mlp_regress', -- Desination table - 'x', -- Input features - 'y', -- Dependent variable - ARRAY[25,25], -- Number of units per layer - 'learning_rate_init=0.001, - n_iterations=50, - n_tries=3 - lambda=0.001, - tolerance=0', - 'relu', -- Activation function - NULL, -- Default weight (1) - TRUE, -- Warm start - FALSE -- Verbose - ); - SELECT * FROM mlp_regress; - - -- Use the grouping feature to learn a different model for each zipcode: - DROP TABLE IF EXISTS mlp_regress_group, mlp_regress_group_summary; - DROP TABLE IF EXISTS mlp_regress_group_standardization; - SELECT {schema_madlib}.{method}( - 'lin_housing', -- Source table - 'mlp_regress_group', -- Desination table - 'x', -- Input features - 'y', -- Dependent variable - ARRAY[25,25], -- Number of units per layer - 'learning_rate_init=0.001, - n_iterations=50, - lambda=0.001, - tolerance=0', -- Optimizer params, with n_tries - 'relu', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE, -- Not verbose - 'zipcode' -- Grouping column - ); - SELECT * FROM mlp_regress_group; - - -- n_tries and warm_start can be used with grouping too, similar to as - -- shown above without grouping. - - -- Pre-process source table so that the solver uses mini-batch gradient descent. - DROP TABLE IF EXISTS lin_housing_batch, lin_housing_batch_summary; - DROP TABLE IF EXISTS lin_housing_batch_standardization; - SELECT {schema_madlib}.minibatch_preprocessor( - 'lin_housing', -- Source table - 'lin_housing_batch', -- Destination table of preprocessor - 'y', -- Dependent variable - 'x', -- Independent variable - 10 -- Buffer size (optional) - ); - - -- Train MLP with lin_housing_batch, the solver automatically uses mini-batch - -- gradient descent. - DROP TABLE IF EXISTS mlp_regress_group, mlp_regress_group_summary; - DROP TABLE IF EXISTS mlp_regress_group_standardization; - SELECT {schema_madlib}.{method}( - 'lin_housing_batch', -- Source table - 'mlp_regress_batch', -- Desination table - 'independent_varname', -- Input features - 'dependent_varname', -- Dependent variable - ARRAY[25,25], -- Number of units per layer - 'learning_rate_init=0.001, - n_iterations=50, - lambda=0.001, - tolerance=0', - n_epochs=20, -- Optimizer params, with n_tries - 'relu', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE -- Not verbose - ); - SELECT * FROM mlp_regress_batch; - """ - - classification_example = """ - -- Create input table - - CREATE TABLE iris_data( - id INTEGER, - attributes NUMERIC[], - class_text VARCHAR, - class INTEGER, - state VARCHAR - ); - - COPY iris_data (attributes, class_text, class, state) FROM STDIN NULL '?' DELIMITER '|'; - {{4.4,3.2,1.3,0.2}}|Iris_setosa|1|Alaska - {{5.0,3.5,1.6,0.6}}|Iris_setosa|1|Alaska - {{5.1,3.8,1.9,0.4}}|Iris_setosa|1|Alaska - {{4.8,3.0,1.4,0.3}}|Iris_setosa|1|Alaska - {{5.1,3.8,1.6,0.2}}|Iris_setosa|1|Alaska - {{5.7,2.8,4.5,1.3}}|Iris_versicolor|2|Alaska - {{6.3,3.3,4.7,1.6}}|Iris_versicolor|2|Alaska - {{4.9,2.4,3.3,1.0}}|Iris_versicolor|2|Alaska - {{6.6,2.9,4.6,1.3}}|Iris_versicolor|2|Alaska - {{5.2,2.7,3.9,1.4}}|Iris_versicolor|2|Alaska - {{5.0,2.0,3.5,1.0}}|Iris_versicolor|2|Alaska - {{4.8,3.0,1.4,0.1}}|Iris_setosa|1|Tennessee - {{4.3,3.0,1.1,0.1}}|Iris_setosa|1|Tennessee - {{5.8,4.0,1.2,0.2}}|Iris_setosa|1|Tennessee - {{5.7,4.4,1.5,0.4}}|Iris_setosa|1|Tennessee - {{5.4,3.9,1.3,0.4}}|Iris_setosa|1|Tennessee - {{6.0,2.9,4.5,1.5}}|Iris_versicolor|2|Tennessee - {{5.7,2.6,3.5,1.0}}|Iris_versicolor|2|Tennessee - {{5.5,2.4,3.8,1.1}}|Iris_versicolor|2|Tennessee - {{5.5,2.4,3.7,1.0}}|Iris_versicolor|2|Tennessee - {{5.8,2.7,3.9,1.2}}|Iris_versicolor|2|Tennessee - {{6.0,2.7,5.1,1.6}}|Iris_versicolor|2|Tennessee - \. - - - -- Generate a multilayer perception with a single hidden layer of 5 units. - -- Use the attributes column as the independent variables, and use the class - -- column as the classification. Set the tolerance to 0 so that 500 - -- iterations will be run. Use a hyperbolic tangent activation function. - -- The model will be written to mlp_model. - - DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization; - SELECT madlib.mlp_classification( - 'iris_data', -- Source table - 'mlp_model', -- Destination table - 'attributes', -- Input features - 'class_text', -- Label - ARRAY[5], -- Number of units per layer - 'learning_rate_init=0.003, - n_iterations=500, - tolerance=0', -- Optimizer params - 'tanh', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE -- Not verbose - ); - - SELECT * FROM mlp_model; - - -- Use the n_tries optimizer param to learn the best of multiple models: - DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization; - SELECT madlib.mlp_classification( - 'iris_data', -- Source table - 'mlp_model', -- Destination table - 'attributes', -- Input features - 'class_text', -- Label - ARRAY[5], -- Number of units per layer - 'learning_rate_init=0.003, - n_iterations=500, - n_tries=3, - tolerance=0', -- Optimizer params, with n_tries - 'tanh', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE -- Not verbose - ); - - -- Use the warm start param to improve the model present in mlp_model. - -- Note that mlp_model should not be dropped. - SELECT madlib.mlp_classification( - 'iris_data', -- Source table - 'mlp_model', -- Destination table - 'attributes', -- Input features - 'class_text', -- Label - ARRAY[5], -- Number of units per layer - 'learning_rate_init=0.003, - n_iterations=500, - tolerance=0', -- Optimizer params - 'tanh', -- Activation function - NULL, -- Default weight (1) - FALSE, -- Warm start - FALSE -- Not verbose - ); - - -- Use the grouping feature to learn a different model for each state: - DROP TABLE IF EXISTS mlp_model_group, mlp_model_group_summary; - DROP TABLE IF EXISTS mlp_model_group_standardization; - SELECT madlib.mlp_classification( - 'iris_data', -- Source table - 'mlp_model_group',-- Destination table - 'attributes', -- Input features - 'class_text', -- Label - ARRAY[5], -- Number of units per layer - 'learning_rate_init=0.003, - n_iterations=500, - tolerance=0', -- Optimizer params - 'tanh', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE, -- Not verbose - 'state' -- Grouping column - ); - - -- n_tries and warm_start can be used with grouping too, similar to as - -- shown above without grouping. - - -- Pre-process source table so that the solver uses mini-batch gradient descent. - DROP TABLE IF EXISTS iris_data_batch, iris_data_batch_summary; - DROP TABLE IF EXISTS iris_data_batch_standardization; - SELECT {schema_madlib}.minibatch_preprocessor( - 'iris_data', -- Source table - 'iris_data_batch', -- Destination table of preprocessor - 'y', -- Dependent variable - 'x' -- Independent variable - ); - - -- Train MLP with lin_housing_batch, the solver automatically uses mini-batch - -- gradient descent. - DROP TABLE IF EXISTS mlp_model_batch, mlp_model_batch_summary; - DROP TABLE IF EXISTS mlp_model_batch_standardization; - SELECT madlib.mlp_classification( - 'iris_data_batch', -- Source table - 'mlp_model_batch', -- Destination table - 'attributes', -- Input features - 'class_text', -- Label - ARRAY[5], -- Number of units per layer - 'learning_rate_init=0.003, - n_iterations=500, - tolerance=0', -- Optimizer params - 'tanh', -- Activation function - NULL, -- Default weight (1) - FALSE, -- No warm start - FALSE -- Not verbose - ); - - """.format(**args) - example = classification_example if is_classification else regression_example optimizer_params = """ ------------------------------------------------------------------------------------------------ OPTIMIZER PARAMS @@ -1605,8 +1304,6 @@ def mlp_help(schema_madlib, message, is_classification): return summary elif message.lower() in ('usage', 'help', '?'): return usage - elif message.lower() == 'example': - return example elif message.lower() == 'optimizer_params': return optimizer_params return """ @@ -1629,10 +1326,7 @@ def mlp_predict_help(schema_madlib, message): functions. For more details on function usage: - SELECT {schema_madlib}.mlp_predict('usage') - - For a small example on using the function: - SELECT {schema_madlib}.mlp_predict('example')""".format(**args) + SELECT {schema_madlib}.mlp_predict('usage')""".format(**args) usage = """ --------------------------------------------------------------------------- @@ -1669,75 +1363,10 @@ def mlp_predict_help(schema_madlib, message): """.format(**args) - example = """ - -- See {schema_madlib}.mlp_classification('example') for test - -- and model tables - - -- Predict classes using - SELECT {schema_madlib}.mlp_predict( - 'mlp_model', -- Model table - 'iris_data', -- Test data table - 'id', -- Id column in test table - 'mlp_prediction', -- Output table for predictions - 'response' -- Output classes, not probabilities - ); - SELECT * FROM mlp_prediction; - - WITH total_count AS (SELECT count(*) AS c FROM iris_data) - SELECT count(*)/((SELECT c FROM total_count)::DOUBLE PRECISION) - AS train_accuracy - FROM - ( - SELECT iris_data.class_text AS actual_label, - mlp_prediction.estimated_class_text AS predicted_label - FROM mlp_prediction - INNER JOIN iris_data ON iris_data.id=mlp_prediction.id - ) q - WHERE q.actual_label=q.predicted_label; - - -- Predict using models specific to states: - SELECT {schema_madlib}.mlp_predict( - 'mlp_model_group', -- Grouping based model table - 'iris_data', -- Test data table - 'id', -- Id column in test table - 'mlp_prediction', -- Output table for predictions - 'response' -- Output classes, not probabilities - ); - SELECT * FROM mlp_prediction; - - -- See {schema_madlib}.mlp_regression('example') for test - -- and model tables. - - -- Predict using the regression model: - DROP TABLE IF EXISTS mlp_regress_prediction; - SELECT madlib.mlp_predict( - 'mlp_regress', -- Model table - 'lin_housing', -- Test data table - 'id', -- Id column in test table - 'mlp_regress_prediction', -- Output table for predictions - 'response' -- Output values, not probabilities - ); - SELECT * FROM mlp_regress_prediction; - - -- Predict using the zipcode specific regression models: - DROP TABLE IF EXISTS mlp_regress_prediction; - SELECT madlib.mlp_predict( - 'mlp_regress_group', -- Grouping based model table - 'lin_housing', -- Test data table - 'id', -- Id column in test table - 'mlp_regress_prediction', -- Output table for predictions - 'response' -- Output values, not probabilities - ); - SELECT * FROM mlp_regress_prediction; - - """.format(**args) - if not message: return summary elif message.lower() in ('usage', 'help', '?'): return usage - elif message.lower() == 'example': - return example return """ No such option. Use "SELECT {schema_madlib}.mlp_predict()" for help. """.format(**args) diff --git a/src/ports/postgres/modules/elastic_net/elastic_net.py_in b/src/ports/postgres/modules/elastic_net/elastic_net.py_in index 555840a35..c6dec53ea 100644 --- a/src/ports/postgres/modules/elastic_net/elastic_net.py_in +++ b/src/ports/postgres/modules/elastic_net/elastic_net.py_in @@ -62,133 +62,8 @@ def elastic_net_help(schema_madlib, family_or_optimizer=None, **kwargs): -- Run: SELECT {schema_madlib}.elastic_net_train('predict'); to see how to predict. - -- - Run: SELECT {schema_madlib}.elastic_net_train('example'); - to see some examples. """.format(schema_madlib=schema_madlib) - if (family_or_optimizer.lower() in ('example', 'examples')): - return """ - ---------------------------------------------------------------- - EXAMPLE - ---------------------------------------------------------------- - Create an input data set: - DROP TABLE IF EXISTS houses; - CREATE TABLE houses ( id INT, - tax INT, - bedroom INT, - bath FLOAT, - price INT, - size INT, - lot INT, - zipcode INT); - INSERT INTO houses VALUES - (1, 590, 2, 1, 50000, 770, 22100, 94301), - (2, 1050, 3, 2, 85000, 1410, 12000, 94301), - (3, 20, 3, 1, 22500, 1060, 3500, 94301), - (4, 870, 2, 2, 90000, 1300, 17500, 94301), - (5, 1320, 3, 2, 133000, 1500, 30000, 94301), - (6, 1350, 2, 1, 90500, 820, 25700, 94301), - (7, 2790, 3, 2.5, 260000, 2130, 25000, 94301), - (8, 680, 2, 1, 142500, 1170, 22000, 94301), - (9, 1840, 3, 2, 160000, 1500, 19000, 94301), - (10, 3680, 4, 2, 240000, 2790, 20000, 94301), - (11, 1660, 3, 1, 87000, 1030, 17500, 94301), - (12, 1620, 3, 2, 118600, 1250, 20000, 94301), - (13, 3100, 3, 2, 140000, 1760, 38000, 94301), - (14, 2070, 2, 3, 148000, 1550, 14000, 94301), - (15, 650, 3, 1.5, 65000, 1450, 12000, 94301), - (16, 770, 2, 2, 91000, 1300, 17500, 76010), - (17, 1220, 3, 2, 132300, 1500, 30000, 76010), - (18, 1150, 2, 1, 91100, 820, 25700, 76010), - (19, 2690, 3, 2.5, 260011, 2130, 25000, 76010), - (20, 780, 2, 1, 141800, 1170, 22000, 76010), - (21, 1910, 3, 2, 160900, 1500, 19000, 76010), - (22, 3600, 4, 2, 239000, 2790, 20000, 76010), - (23, 1600, 3, 1, 81010, 1030, 17500, 76010), - (24, 1590, 3, 2, 117910, 1250, 20000, 76010), - (25, 3200, 3, 2, 141100, 1760, 38000, 76010), - (26, 2270, 2, 3, 148011, 1550, 14000, 76010), - (27, 750, 3, 1.5, 66000, 1450, 12000, 76010); - - Train a model: - DROP TABLE IF EXISTS houses_en, houses_en_summary; - SELECT {schema_madlib}.elastic_net_train( - 'houses', -- source table - 'houses_en', -- result table - 'price', -- dependent variable - 'array[tax, bath, size]', -- independent variable - 'gaussian', -- regression family - 0.5, -- alpha value - 0.1, -- lambda value - TRUE, -- standardize - NULL, -- grouping column(s) - 'fista', -- optimizer - '', -- optimizer parameters - NULL, -- excluded columns - 10000, -- maximum iterations - 1e-6 -- tolerance value - ); - - View the resulting model: - \\x on - SELECT * FROM houses_en; - \\x off - - Use the prediction function to evaluate residuals: - SELECT id, price, predict, price - predict AS residual - FROM ( - SELECT - houses.*, - {schema_madlib}.elastic_net_gaussian_predict( - m.coef_all, - m.intercept, - ARRAY[tax,bath,size] - ) AS predict - FROM houses, houses_en m) s - ORDER BY id; - - Additional Example (with grouping): - DROP TABLE IF EXISTS houses_en1, houses_en1_summary; - SELECT {schema_madlib}.elastic_net_train( 'houses', - 'houses_en1', - 'price', - 'array[tax, bath, size]', - 'gaussian', - 1, - 30000, - TRUE, - 'zipcode', - 'fista', - '', - NULL, - 10000, - 1e-6 - ); - - View the resulting model and see a separate model for each group: - \\x on - SELECT * FROM houses_en1; - \\x off - - Use the prediction function to evaluate residuals: - SELECT {schema_madlib}.elastic_net_predict( - 'houses_en1', -- model table - 'houses', -- new source data table - 'id', -- unique ID associated with each row - 'houses_en1_prediction' -- table to store prediction result - ); - - View the results: - SELECT houses.id, - houses.price, - houses_en1_prediction.prediction, - houses.price - houses_en1_prediction.prediction AS residual - FROM houses_en1_prediction, houses - WHERE houses.id=houses_en1_prediction.id; - - """ - if (family_or_optimizer.lower() in ('usage', 'help', '?')): return """ ---------------------------------------------------------------- diff --git a/src/ports/postgres/modules/glm/glm.py_in b/src/ports/postgres/modules/glm/glm.py_in index 49dd9c898..718837e96 100644 --- a/src/ports/postgres/modules/glm/glm.py_in +++ b/src/ports/postgres/modules/glm/glm.py_in @@ -378,10 +378,6 @@ Function to fit a generalized linear model, relating responses to linear combina of predictor variables. For details on function usage: - SELECT {schema_madlib}.glm('usage') - -For a small example on using the function: - SELECT {schema_madlib}.glm('example') """ elif message in ['usage', 'help', '?']: @@ -449,88 +445,6 @@ A summary table named _summary is also created at the same time, whic total_rows_processed bigint, -- total numbers of rows processed total_rows_skipped bigint, -- total numbers of rows skipped """ - elif message in ['example', 'examples']: - - help_string = """ -CREATE TABLE warpbreaks( - id serial, - breaks integer, - wool char(1), - tension char(1) -); -INSERT INTO warpbreaks(breaks, wool, tension) VALUES -(26, 'A', 'L'), -(30, 'A', 'L'), -(54, 'A', 'L'), -(25, 'A', 'L'), -(70, 'A', 'L'), -(52, 'A', 'L'), -(51, 'A', 'L'), -(26, 'A', 'L'), -(67, 'A', 'L'), -(18, 'A', 'M'), -(21, 'A', 'M'), -(29, 'A', 'M'), -(17, 'A', 'M'), -(12, 'A', 'M'), -(18, 'A', 'M'), -(35, 'A', 'M'), -(30, 'A', 'M'), -(36, 'A', 'M'), -(36, 'A', 'H'), -(21, 'A', 'H'), -(24, 'A', 'H'), -(18, 'A', 'H'), -(10, 'A', 'H'), -(43, 'A', 'H'), -(28, 'A', 'H'), -(15, 'A', 'H'), -(26, 'A', 'H'), -(27, 'B', 'L'), -(14, 'B', 'L'), -(29, 'B', 'L'), -(19, 'B', 'L'), -(29, 'B', 'L'), -(31, 'B', 'L'), -(41, 'B', 'L'), -(20, 'B', 'L'), -(44, 'B', 'L'), -(42, 'B', 'M'), -(26, 'B', 'M'), -(19, 'B', 'M'), -(16, 'B', 'M'), -(39, 'B', 'M'), -(28, 'B', 'M'), -(21, 'B', 'M'), -(39, 'B', 'M'), -(29, 'B', 'M'), -(20, 'B', 'H'), -(21, 'B', 'H'), -(24, 'B', 'H'), -(17, 'B', 'H'), -(13, 'B', 'H'), -(15, 'B', 'H'), -(15, 'B', 'H'), -(16, 'B', 'H'), -(28, 'B', 'H'); - -SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy', 'wool,tension'); - --- Drop output tables before calling the function -DROP TABLE IF EXISTS glm_model; -DROP TABLE IF EXISTS glm_model_summary; - -SELECT glm('warpbreaks_dummy', - 'glm_model', - 'breaks', - 'ARRAY[1.0,"wool_B","tension_M", "tension_H"]', - 'family=poisson, link=log', - NULL, - 'max_iter=100,optimizer=irls,tolerance=1e-6', - true); - -SELECT * from glm_model; - """ else: help_string = "No such option. Use {schema_madlib}.glm('help')" @@ -561,9 +475,6 @@ coefficients should match the number of variables in the new predictors. For details on function usage: SELECT {schema_madlib}.glm_predict('usage') -For a small example on using the function: - SELECT {schema_madlib}.glm_predict('example') - For prediction functions related to specific distributions: SELECT {schema_madlib}.glm_predict_poisson('help') SELECT {schema_madlib}.glm_predict_binomial('help') @@ -586,92 +497,6 @@ SELECT {schema_madlib}.glm_predict( The output is a table with one column which gives the estimated conditional means for the new predictors. """ - elif message in ['example', 'examples']: - - help_string = """ -DROP TABLE IF EXISTS warpbreaks, warpbreaks_dummy, glm_model, glm_model_summary; -CREATE TABLE warpbreaks( - id serial, - breaks integer, - wool char(1), - tension char(1) -); -INSERT INTO warpbreaks(breaks, wool, tension) VALUES -(26, 'A', 'L'), -(30, 'A', 'L'), -(54, 'A', 'L'), -(25, 'A', 'L'), -(70, 'A', 'L'), -(52, 'A', 'L'), -(51, 'A', 'L'), -(26, 'A', 'L'), -(67, 'A', 'L'), -(18, 'A', 'M'), -(21, 'A', 'M'), -(29, 'A', 'M'), -(17, 'A', 'M'), -(12, 'A', 'M'), -(18, 'A', 'M'), -(35, 'A', 'M'), -(30, 'A', 'M'), -(36, 'A', 'M'), -(36, 'A', 'H'), -(21, 'A', 'H'), -(24, 'A', 'H'), -(18, 'A', 'H'), -(10, 'A', 'H'), -(43, 'A', 'H'), -(28, 'A', 'H'), -(15, 'A', 'H'), -(26, 'A', 'H'), -(27, 'B', 'L'), -(14, 'B', 'L'), -(29, 'B', 'L'), -(19, 'B', 'L'), -(29, 'B', 'L'), -(31, 'B', 'L'), -(41, 'B', 'L'), -(20, 'B', 'L'), -(44, 'B', 'L'), -(42, 'B', 'M'), -(26, 'B', 'M'), -(19, 'B', 'M'), -(16, 'B', 'M'), -(39, 'B', 'M'), -(28, 'B', 'M'), -(21, 'B', 'M'), -(39, 'B', 'M'), -(29, 'B', 'M'), -(20, 'B', 'H'), -(21, 'B', 'H'), -(24, 'B', 'H'), -(17, 'B', 'H'), -(13, 'B', 'H'), -(15, 'B', 'H'), -(15, 'B', 'H'), -(16, 'B', 'H'), -(28, 'B', 'H'); - -SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy', 'wool,tension'); - --- Drop output tables before calling the function -DROP TABLE IF EXISTS glm_model; -DROP TABLE IF EXISTS glm_model_summary; - -SELECT glm('warpbreaks_dummy', - 'glm_model', - 'breaks', - 'ARRAY[1.0,"wool_B","tension_M", "tension_H"]', - 'family=poisson, link=log', - NULL, - 'max_iter=100,optimizer=irls,tolerance=1e-6', - true); - -SELECT * from glm_model; -SELECT w.id, madlib.glm_predict(coef, ARRAY[1, "wool_B", "tension_M", "tension_H"]::float8[],'log') as mu -FROM warpbreaks_dummy w, glm_model m -ORDER BY w.id; - """ else: help_string = "No such option. Use {schema_madlib}.glm_predict('help')" @@ -709,9 +534,6 @@ mean for the new predictors, rounded to the nearest integral value. For more details on glm predict functions: SELECT {schema_madlib}.glm_predict('usage') - -For examples: - SELECT {schema_madlib}.glm_predict('example') """ else: help_string = "No such option. Use {schema_madlib}.glm_predict_poisson('help')" @@ -747,9 +569,6 @@ of the dependent variable as a boolean value. For more details on glm predict functions: SELECT {schema_madlib}.glm_predict('usage') - -For examples: - SELECT {schema_madlib}.glm_predict('example') """ else: help_string = "No such option. Use {schema_madlib}.glm_predict_binomial('help')" diff --git a/src/ports/postgres/modules/glm/multinom.py_in b/src/ports/postgres/modules/glm/multinom.py_in index f5469d145..1293f5758 100644 --- a/src/ports/postgres/modules/glm/multinom.py_in +++ b/src/ports/postgres/modules/glm/multinom.py_in @@ -369,9 +369,6 @@ Currently only logit link functions are supported. For more details on function usage: SELECT {schema_madlib}.multinom('usage') - -For a small example on using the function: - SELECT {schema_madlib}.multinom('example') """ elif message in ['usage', 'help', '?']: @@ -422,89 +419,6 @@ A summary table named _summary is also created at the same time, whic total_rows_processed bigint, -- total numbers of rows processed total_rows_skipped bigint, -- total numbers of rows skipped """ - - elif message in ['example', 'examples']: - - help_string = """ - -DROP TABLE IF EXISTS test3; -CREATE TABLE test3 ( - feat1 INTEGER, - feat2 INTEGER, - cat INTEGER -); -INSERT INTO test3(feat1, feat2, cat) VALUES -(1,35,1), -(2,33,0), -(3,39,1), -(1,37,1), -(2,31,1), -(3,36,0), -(2,36,1), -(2,31,1), -(2,41,1), -(2,37,1), -(1,44,1), -(3,33,2), -(1,31,1), -(2,44,1), -(1,35,1), -(1,44,0), -(1,46,0), -(2,46,1), -(2,46,2), -(3,49,1), -(2,39,0), -(2,44,1), -(1,47,1), -(1,44,1), -(1,37,2), -(3,38,2), -(1,49,0), -(2,44,0), -(3,61,2), -(1,65,2), -(3,67,1), -(3,65,2), -(1,65,2), -(2,67,2), -(1,65,2), -(1,62,2), -(3,52,2), -(3,63,2), -(2,59,2), -(3,65,2), -(2,59,0), -(3,67,2), -(3,67,2), -(3,60,2), -(3,67,2), -(3,62,2), -(2,54,2), -(3,65,2), -(3,62,2), -(2,59,2), -(3,60,2), -(3,63,2), -(3,65,2), -(2,63,1), -(2,67,2), -(2,65,2), -(2,62,2); - --- Run the multilogistic regression function. -DROP TABLE IF EXISTS test3_output; -DROP TABLE IF EXISTS test3_output_summary; -SELECT madlib.multinom('test3', - 'test3_output', - 'cat', - 'ARRAY[1, feat1, feat2]', - '0', - 'logit' - ); - -SELECT * from test3_output; - """ else: help_string = "No such option. Use {schema_madlib}.multinom('help')" @@ -732,9 +646,6 @@ a new set of predictors. For more details on function usage: SELECT {schema_madlib}.multinom_predict('usage') - -For a small example on using the function: - SELECT {schema_madlib}.multinom_predict('example') """ elif message in ['usage', 'help', '?']: @@ -757,14 +668,6 @@ SELECT {schema_madlib}.multinom_predict( The output is a table with one column which gives the predicted category when predict_type is response and probability when predict_type is probability. """ - elif message in ['example', 'examples']: - help_string = """ --- run the training example first -ALTER TABLE test3 ADD COLUMN id SERIAL; -DROP TABLE IF EXISTS test3_predict; -SELECT multinom_predict('test3_out', 'test3', 'test3_predict', 'response', 'id'); -SELECT * FROM test3_predict; - """ else: help_string = "No such option. Use {schema_madlib}.multinom_predict('help')" diff --git a/src/ports/postgres/modules/glm/ordinal.py_in b/src/ports/postgres/modules/glm/ordinal.py_in index ffa7ccfd7..bbc33a730 100644 --- a/src/ports/postgres/modules/glm/ordinal.py_in +++ b/src/ports/postgres/modules/glm/ordinal.py_in @@ -360,9 +360,6 @@ Currently logit and probit link functions are supported. For more details on function usage: SELECT {schema_madlib}.ordinal('usage') - -For a small example on using the function: - SELECT {schema_madlib}.ordinal('example') """ elif message in ['usage', 'help', '?']: @@ -415,89 +412,6 @@ A summary table named _summary is also created at the same time, whic total_rows_processed bigint, -- total numbers of rows processed total_rows_skipped bigint, -- total numbers of rows skipped """ - - elif message in ['example', 'examples']: - - help_string = """ - -DROP TABLE IF EXISTS test3; -CREATE TABLE test3 ( - feat1 INTEGER, - feat2 INTEGER, - cat INTEGER -); -INSERT INTO test3(feat1, feat2, cat) VALUES -(1,35,1), -(2,33,0), -(3,39,1), -(1,37,1), -(2,31,1), -(3,36,0), -(2,36,1), -(2,31,1), -(2,41,1), -(2,37,1), -(1,44,1), -(3,33,2), -(1,31,1), -(2,44,1), -(1,35,1), -(1,44,0), -(1,46,0), -(2,46,1), -(2,46,2), -(3,49,1), -(2,39,0), -(2,44,1), -(1,47,1), -(1,44,1), -(1,37,2), -(3,38,2), -(1,49,0), -(2,44,0), -(3,61,2), -(1,65,2), -(3,67,1), -(3,65,2), -(1,65,2), -(2,67,2), -(1,65,2), -(1,62,2), -(3,52,2), -(3,63,2), -(2,59,2), -(3,65,2), -(2,59,0), -(3,67,2), -(3,67,2), -(3,60,2), -(3,67,2), -(3,62,2), -(2,54,2), -(3,65,2), -(3,62,2), -(2,59,2), -(3,60,2), -(3,63,2), -(3,65,2), -(2,63,1), -(2,67,2), -(2,65,2), -(2,62,2); - --- Run the ordinal logistic regression function. -DROP TABLE IF EXISTS test3_output; -DROP TABLE IF EXISTS test3_output_summary; -SELECT madlib.ordinal('test3', - 'test3_output', - 'cat', - 'ARRAY[feat1, feat2]', - '0<1<2', - 'logit' - ); - -SELECT * from test3_output; - """ else: help_string = "No such option. Use {schema_madlib}.ordinal('help')" @@ -818,9 +732,6 @@ a new set of predictors. For more details on function usage: SELECT {schema_madlib}.ordinal_predict('usage') - -For a small example on using the function: - SELECT {schema_madlib}.ordinal_predict('example') """ elif message in ['usage', 'help', '?']: @@ -842,14 +753,6 @@ SELECT {schema_madlib}.ordinal_predict( The output is a table with one column which gives the predicted category when predict_type is response and probability when predict_type is probability. """ - elif message in ['example', 'examples']: - help_string = """ --- run the training example first -ALTER TABLE test3 ADD COLUMN id SERIAL; -DROP TABLE IF EXISTS test3_predict; -SELECT ordinal_predict('test3_out', 'test3', 'test3_predict', 'probability'); -SELECT * FROM test3_predict; - """ else: help_string = "No such option. Use {schema_madlib}.ordinal_predict('help')" diff --git a/src/ports/postgres/modules/graph/apsp.py_in b/src/ports/postgres/modules/graph/apsp.py_in index d0bba005f..4da2fd289 100644 --- a/src/ports/postgres/modules/graph/apsp.py_in +++ b/src/ports/postgres/modules/graph/apsp.py_in @@ -659,78 +659,6 @@ every group and has the following columns: will not exist and the table will have a single row. - path (ARRAY) : The shortest path from the source vertex to the destination vertex. -""" - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- --- Create a graph, represented as vertex and edge tables. -DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path; -CREATE TABLE vertex( - id INTEGER - ); -CREATE TABLE edge( - src INTEGER, - dest INTEGER, - weight DOUBLE PRECISION -); - -INSERT INTO vertex VALUES -(0), -(1), -(2), -(3), -(4), -(5), -(6), -(7) -; -INSERT INTO edge VALUES -(0, 1, 1), -(0, 2, 1), -(0, 4, 10), -(1, 2, 2), -(1, 3, 10), -(2, 3, 1), -(2, 5, 1), -(2, 6, 3), -(3, 0, 1), -(4, 0, -2), -(5, 6, 1), -(6, 7, 1) -; - --- Compute the apsp: -DROP TABLE IF EXISTS out; -SELECT madlib.graph_apsp( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest, weight=weight', -- Comma delimited string of edge arguments - 'out' -- Output table of apsp -); --- View the apsp costs for every vertex: -SELECT * FROM out ORDER BY src, dest; - --- View the actual shortest path for a vertex: -SELECT graph_apsp_get_path('out',0, 5,'out_path'); -SELECT * FROM out_path; - --- Create a graph with 2 groups: -DROP TABLE IF EXISTS edge_gr; -CREATE TABLE edge_gr AS -( - SELECT *, 0 AS grp FROM edge - UNION - SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6 -); -INSERT INTO edge_gr VALUES -(4,5,-20,1); - --- Find apsp for all groups: -DROP TABLE IF EXISTS out_gr, out_gr_summary; -SELECT graph_apsp('vertex',NULL,'edge_gr',NULL,'out_gr','grp'); """ else: help_string = "No such option. Use {schema_madlib}.graph_apsp()" diff --git a/src/ports/postgres/modules/graph/bfs.py_in b/src/ports/postgres/modules/graph/bfs.py_in index 0504d91cd..f0c07ac64 100644 --- a/src/ports/postgres/modules/graph/bfs.py_in +++ b/src/ports/postgres/modules/graph/bfs.py_in @@ -427,64 +427,6 @@ grouping columns): - parent : The parent of this vertex in BFS traversal of the graph from source_vertex. Will use 'parent' for column naming. For the case where vertex_id = source_vertex, the value for parent is NULL. -""" - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- --- Create a graph, represented as vertex and edge tables. -DROP TABLE IF EXISTS vertex, edge; -CREATE TABLE vertex( - id INTEGER - ); -CREATE TABLE edge( - src INTEGER, - dest INTEGER - ); -INSERT INTO vertex VALUES -(0), -(1), -(2), -(3), -(4), -(5), -(6), -(7), -(8), -(9), -(10), -(11) -; -INSERT INTO edge VALUES -(0, 5), -(1, 0), -(1, 3), -(2, 6), -(3, 4), -(3, 5), -(4, 2), -(8, 9), -(9, 10), -(9, 11), -(10, 8) -; - --- Traverse undirected graph from vertex 3: -DROP TABLE IF EXISTS out, out_summary; -SELECT madlib.graph_bfs( - 'vertex', -- Vertex table - NULL, -- Vertix id column (NULL means use default naming) - 'edge', -- Edge table - NULL, -- Edge arguments (NULL means use default naming) - 3, -- Source vertex for BFS - 'out' -- Output table of nodes reachable from source_vertex - ); - -- Default values used for the other arguments -SELECT * FROM out ORDER BY dist,id; - -SELECT * FROM out_summary; - """ else: help_string = "No such option. Use {schema_madlib}.graph_bfs()" diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in index 5299a4638..23b6b1cb7 100644 --- a/src/ports/postgres/modules/graph/hits.py_in +++ b/src/ports/postgres/modules/graph/hits.py_in @@ -525,87 +525,7 @@ number of iterations required for convergence. It is named by adding the suffix '_summary' to the 'out_table' parameter. """ else: - if message is not None and \ - message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- --- Create a graph, represented as vertex and edge tables. -DROP TABLE IF EXISTS vertex, edge; -CREATE TABLE vertex( - id INTEGER - ); -CREATE TABLE edge( - src INTEGER, - dest INTEGER, - user_id INTEGER - ); -INSERT INTO vertex VALUES -(0), -(1), -(2), -(3), -(4), -(5), -(6); -INSERT INTO edge VALUES -(0, 1, 1), -(0, 2, 1), -(0, 4, 1), -(1, 2, 1), -(1, 3, 1), -(2, 3, 1), -(2, 5, 1), -(2, 6, 1), -(3, 0, 1), -(4, 0, 1), -(5, 6, 1), -(6, 3, 1), -(0, 1, 2), -(0, 2, 2), -(0, 4, 2), -(1, 2, 2), -(1, 3, 2), -(2, 3, 2), -(3, 0, 2), -(4, 0, 2), -(5, 6, 2), -(6, 3, 2); - --- Compute the HITS score: -DROP TABLE IF EXISTS hits_out, hits_out_summary; -SELECT {schema_madlib}.hits( - 'vertex', -- Vertex table - 'id', -- Vertex id column - 'edge', -- Edge table - 'src=src, dest=dest', -- Comma delimited string of edge arguments - 'hits_out'); -- Output table of HITS --- View the authority and hub scores of all vertices, ordered by their id. -SELECT * FROM hits_out ORDER BY id; - --- Compute the HITS score of nodes associated with each user: -DROP TABLE IF EXISTS hits_out, hits_out_summary; -SELECT {schema_madlib}.hits( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest', -- Comma delimted string of edge arguments - 'hits_out', -- Output table of HITS - NULL, -- Default max_iter - NULL, -- Threshold - 'user_id'); -- Grouping column - --- View the authority and hub scores of all vertices, ordered by the grouping column. -SELECT * FROM hits_out ORDER BY user_id, id; - --- View the summary table to find the number of iterations required for --- convergence. -SELECT * FROM hits_out_summary; - -""" - else: - help_string = """ + help_string = """ ---------------------------------------------------------------------------- SUMMARY ---------------------------------------------------------------------------- @@ -614,10 +534,6 @@ all the vertices in the graph. -- For an overview on usage, run: SELECT {schema_madlib}.hits('usage'); - -For some examples, run: -SELECT {schema_madlib}.hits('example') --- """ return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/graph/measures.py_in b/src/ports/postgres/modules/graph/measures.py_in index eb79adbf0..1f0752245 100644 --- a/src/ports/postgres/modules/graph/measures.py_in +++ b/src/ports/postgres/modules/graph/measures.py_in @@ -368,58 +368,6 @@ def graph_vertex_degrees(schema_madlib, vertex_table, vertex_id, edge_table, # All help functions # ----------------------------------------------------------------------- - -CREATE_GRAPH_EXAMPLE = """ --- Create a graph, represented as vertex and edge tables. -DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path; -CREATE TABLE vertex( - id INTEGER - ); -CREATE TABLE edge( - src INTEGER, - dest INTEGER, - weight DOUBLE PRECISION -); - -INSERT INTO vertex VALUES -(0), -(1), -(2), -(3), -(4), -(5), -(6), -(7) -; -INSERT INTO edge VALUES -(0, 1, 1), -(0, 2, 1), -(0, 4, 10), -(1, 2, 2), -(1, 3, 10), -(2, 3, 1), -(2, 5, 1), -(2, 6, 3), -(3, 0, 1), -(4, 0, -2), -(5, 6, 1), -(6, 7, 1) -; -""" - -COMPUTE_APSP_EXAMPLE = """ --- Compute the apsp: -DROP TABLE IF EXISTS out; -SELECT graph_apsp( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest, weight=weight', -- Comma delimited string of edge arguments - 'out' -- Output table of apsp -); -""" - - def graph_closeness_help(schema_madlib, message, **kwargs): intro = """ @@ -460,26 +408,11 @@ the following columns (in addition to the grouping columns): vertices. - k_degree : Total number of reachable vertices. - """ - elif message.lower() in ['example', 'examples']: - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- -{create_graph_example} -{compute_apsp_example} - --# Compute the closeness measure for all nodes: -DROP TABLE IF EXISTS out_closeness; -SELECT {schema_madlib}.graph_closeness('out_apsp', 'out_closeness'); -SELECT * FROM out_closeness; """ else: help_string = "No such option. Use {schema_madlib}.graph_closeness()" - return help_string.format(schema_madlib=schema_madlib, - create_graph_example=CREATE_GRAPH_EXAMPLE, - compute_apsp_example=COMPUTE_APSP_EXAMPLE) + return help_string.format(schema_madlib=schema_madlib) # ------------------------------------------------------------------------- @@ -511,25 +444,10 @@ SELECT {schema_madlib}.graph_diameter( It contains a row for every group, the diameter value and the two vertices that are the farthest apart. """ - elif message.lower() in ['example', 'examples']: - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- -{create_graph_example} -{compute_apsp_example} - --# Compute the diameter measure for the graph: -DROP TABLE IF EXISTS out_diameter; -SELECT {schema_madlib}.graph_diameter('out_apsp', 'out_diameter'); -SELECT * FROM out_diameter; - """ else: help_string = "No such option. Use {schema_madlib}.graph_diameter()" - return help_string.format(schema_madlib=schema_madlib, - create_graph_example=CREATE_GRAPH_EXAMPLE, - compute_apsp_example=COMPUTE_APSP_EXAMPLE) + return help_string.format(schema_madlib=schema_madlib) # ------------------------------------------------------------------------- @@ -562,25 +480,10 @@ SELECT {schema_madlib}.graph_avg_path_length( ---------------------------------------------------------------------------- It contains a row for every group, and the average path value. """ - elif message.lower() in ['example', 'examples']: - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- -{create_graph_example} -{compute_apsp_example} - --# Compute the average path length for the graph: -DROP TABLE IF EXISTS out_avg_path_length; -SELECT {schema_madlib}.graph_avg_path_length('out_apsp', 'out_avg_path_length'); -SELECT * FROM out_avg_path_length; - """ else: help_string = "No such option. Use {schema_madlib}.graph_avg_path_length()" - return help_string.format(schema_madlib=schema_madlib, - create_graph_example=CREATE_GRAPH_EXAMPLE, - compute_apsp_example=COMPUTE_APSP_EXAMPLE) + return help_string.format(schema_madlib=schema_madlib) # ------------------------------------------------------------------------- @@ -618,26 +521,9 @@ It contains a row for every vertex of every group and has the following columns - outdegree : Number of outgoing edges from the vertex. """ - elif message.lower() in ['example', 'examples']: - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- -{create_graph_example} - -DROP TABLE IF EXISTS degrees; -SELECT {schema_madlib}.graph_vertex_degrees( - 'vertex', -- Vertex table - 'id', -- Vertix id column (NULL means use default naming) - 'edge', -- Edge table - 'src=src, dest=dest, weight=weight', - 'degrees'); -- Output table of shortest paths -SELECT * FROM degrees ORDER BY id; - """ else: help_string = "No such option. Use {schema_madlib}.graph_vertex_degrees()" return help_string.format(schema_madlib=schema_madlib, - create_graph_example=CREATE_GRAPH_EXAMPLE, graph_usage=usage_text) # ------------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in index 71cddd24e..e39d21623 100644 --- a/src/ports/postgres/modules/graph/pagerank.py_in +++ b/src/ports/postgres/modules/graph/pagerank.py_in @@ -765,110 +765,7 @@ number of iterations required for convergence. It is named by adding the suffix '_summary' to the 'out_table' parameter. """ else: - if message is not None and \ - message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- --- Create a graph, represented as vertex and edge tables. -DROP TABLE IF EXISTS vertex, edge; -CREATE TABLE vertex( - id INTEGER - ); -CREATE TABLE edge( - src INTEGER, - dest INTEGER, - user_id INTEGER - ); -INSERT INTO vertex VALUES -(0), -(1), -(2), -(3), -(4), -(5), -(6); -INSERT INTO edge VALUES -(0, 1, 1), -(0, 2, 1), -(0, 4, 1), -(1, 2, 1), -(1, 3, 1), -(2, 3, 1), -(2, 5, 1), -(2, 6, 1), -(3, 0, 1), -(4, 0, 1), -(5, 6, 1), -(6, 3, 1), -(0, 1, 2), -(0, 2, 2), -(0, 4, 2), -(1, 2, 2), -(1, 3, 2), -(2, 3, 2), -(3, 0, 2), -(4, 0, 2), -(5, 6, 2), -(6, 3, 2); - --- Compute the PageRank: -DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary; -SELECT madlib.pagerank( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest', -- Comma delimted string of edge arguments - 'pagerank_out'); -- Output table of PageRank - --- View the PageRank of all vertices, sorted by their scores. -SELECT * FROM pagerank_out ORDER BY pagerank DESC; --- View the summary table to find the number of iterations required for --- convergence. -SELECT * FROM pagerank_out_summary; - --- Compute PageRank of nodes associated with each user: -DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary; -SELECT madlib.pagerank( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest', -- Comma delimted string of edge arguments - 'pagerank_out', -- Output table of PageRank - NULL, -- Default damping factor - NULL, -- Default max_iter - 0.00000001, -- Threshold - 'user_id'); -- Grouping column - --- View the PageRank of all vertices, sorted by their scores. -SELECT * FROM pagerank_out ORDER BY user_id, pagerank DESC; --- View the summary table to find the number of iterations required for --- convergence for each group. -SELECT * FROM pagerank_out_summary; - --- Compute the Personalized PageRank: -DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary; -SELECT madlib.pagerank( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest', -- Comma delimted string of edge arguments - 'pagerank_out', -- Output table of PageRank - NULL, -- Default damping factor (0.85) - NULL, -- Default max iters (100) - NULL, -- Default Threshold - NULL, -- No Grouping - ARRAY[2,4]); -- Personlized Nodes - --- View the Personalized PageRank of all vertices, sorted by their scores. -SELECT * FROM pagerank_out ORDER BY pagerank DESC; --- View the summary table to find the number of iterations required for --- convergence. -SELECT * FROM pagerank_out_summary; -""" - else: - help_string = """ + help_string = """ ---------------------------------------------------------------------------- SUMMARY ---------------------------------------------------------------------------- @@ -877,10 +774,6 @@ the vertices in the graph. -- For an overview on usage, run: SELECT {schema_madlib}.pagerank('usage'); - -For some examples, run: -SELECT {schema_madlib}.pagerank('example') --- """ return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in index a78eea4af..d7f2c8013 100644 --- a/src/ports/postgres/modules/graph/sssp.py_in +++ b/src/ports/postgres/modules/graph/sssp.py_in @@ -626,79 +626,6 @@ every group and has the following columns: will not exist and the table will have a single row. - path (ARRAY) : The shortest path from the source vertex (as specified in the SSSP execution) to the destination vertex. -""" - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- --- Create a graph, represented as vertex and edge tables. -DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path; -CREATE TABLE vertex( - id INTEGER - ); -CREATE TABLE edge( - src INTEGER, - dest INTEGER, - weight DOUBLE PRECISION -); - -INSERT INTO vertex VALUES -(0), -(1), -(2), -(3), -(4), -(5), -(6), -(7) -; -INSERT INTO edge VALUES -(0, 1, 1), -(0, 2, 1), -(0, 4, 10), -(1, 2, 2), -(1, 3, 10), -(2, 3, 1), -(2, 5, 1), -(2, 6, 3), -(3, 0, 1), -(4, 0, -2), -(5, 6, 1), -(6, 7, 1) -; - --- Compute the SSSP: -DROP TABLE IF EXISTS out; -SELECT madlib.graph_sssp( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest, weight=weight', -- Comma delimted string of edge arguments - 0, -- The source vertex - 'out' -- Output table of SSSP -); --- View the SSSP costs for every vertex: -SELECT * FROM out ORDER BY id; - --- View the actual shortest path for a vertex: -SELECT graph_sssp_get_path('out',5,'out_path'); -SELECT * FROM out_path; - --- Create a graph with 2 groups: -DROP TABLE IF EXISTS edge_gr; -CREATE TABLE edge_gr AS -( - SELECT *, 0 AS grp FROM edge - UNION - SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6 -); -INSERT INTO edge_gr VALUES -(4,5,-20,1); - --- Find SSSP for all groups: -DROP TABLE IF EXISTS out_gr, out_gr_summary; -SELECT graph_sssp('vertex',NULL,'edge_gr',NULL,0,'out_gr','grp'); """ else: help_string = "No such option. Use {schema_madlib}.graph_sssp()" diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in index 09f8fea60..de1af27c1 100644 --- a/src/ports/postgres/modules/graph/wcc.py_in +++ b/src/ports/postgres/modules/graph/wcc.py_in @@ -652,126 +652,7 @@ def wcc_help(schema_madlib, message, **kwargs): -- of number of components. );""" else: - if message is not None and \ - message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- --- Create a graph, represented as vertex and edge tables. -DROP TABLE IF EXISTS vertex, edge; -CREATE TABLE vertex( - id INTEGER -); -CREATE TABLE edge( - src INTEGER, - dest INTEGER, - user_id INTEGER -); -INSERT INTO vertex VALUES -(0), -(1), -(2), -(3), -(4), -(5), -(6), -(10), -(11), -(12), -(13), -(14), -(15), -(16); -INSERT INTO edge VALUES -(0, 1, 1), -(0, 2, 1), -(1, 2, 1), -(1, 3, 1), -(2, 3, 1), -(2, 5, 1), -(2, 6, 1), -(3, 0, 1), -(5, 6, 1), -(6, 3, 1), -(10, 11, 2), -(10, 12, 2), -(11, 12, 2), -(11, 13, 2), -(12, 13, 2), -(13, 10, 2), -(15, 16, 2), -(15, 14, 2); - --- Find all weakly connected components in the graph: -DROP TABLE IF EXISTS wcc_out; -SELECT madlib.weakly_connected_components( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest', -- Comma delimted string of edge arguments - 'wcc_out'); -- Output table of weakly connected components - --- View the component ID associated with each vertex in the graph: -SELECT * FROM wcc_out ORDER BY component_id; - --- Find all weakly connected components associated with each user, using the --- grouping feature: -DROP TABLE IF EXISTS wcc_out; -SELECT madlib.weakly_connected_components( - 'vertex', -- Vertex table - 'id', -- Vertix id column - 'edge', -- Edge table - 'src=src, dest=dest', -- Comma delimted string of edge arguments - 'wcc_out', -- Output table of weakly connected components - 'user_id'); -- Grouping column - --- View the component ID associated with each vertex within the sub-graph --- associated with each user: -SELECT * FROM wcc_out ORDER BY user_id, component_id; - --- Retrieve the largest connected component -DROP TABLE IF EXISTS largest_cpt_table; -SELECT madlib.graph_wcc_largest_cpt( - 'wcc_out', -- WCC's output table - 'largest_cpt_table'); -- output table with largest component IDs -DROP TABLE largest_cpt_table; - --- There are several helper functions to use after wcc_out is obtained: --- Retrieve Histogram of Vertices Per Connected Component -DROP TABLE IF EXISTS histogram_table; -SELECT madlib.graph_wcc_histogram( - 'wcc_out', -- WCC's output table - 'histogram_table'); -- output table containing the histogram of vertices -DROP TABLE histogram_table; - --- Check if Two Vertices Belong to the Same Component -DROP TABLE IF EXISTS vc_table; -SELECT madlib.graph_wcc_vertex_check( - 'wcc_out', -- WCC's output table - '14,15', -- Pair of vertex IDs - 'vc_table'); -- output table containing components that contain the - -- two vertices -DROP TABLE vc_table; - --- Retrieve All Vertices Reachable from a Vertex -DROP TABLE IF EXISTS reach_table; -SELECT madlib.graph_wcc_reachable_vertices( - 'wcc_out', -- WCC's output table - '0', -- source vertex - 'reach_table'); -- output table containing all vertices reachable from - -- source vertex -DROP TABLE reach_table; - --- Count of Connected Components -DROP TABLE IF EXISTS count_table; -SELECT madlib.graph_wcc_num_cpts( - 'wcc_out', -- WCC's output table - 'count_table'); -- output table containing number of components per group -DROP TABLE count_table; -""" - else: - help_string = """ + help_string = """ ---------------------------------------------------------------------------- SUMMARY ---------------------------------------------------------------------------- @@ -782,10 +663,6 @@ connected component is also a strongly connected component. -- For an overview on usage, run: SELECT {schema_madlib}.weakly_connected_components('usage'); - -For some examples, run: -SELECT {schema_madlib}.weakly_connected_components('example') --- """ return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/knn/knn.py_in b/src/ports/postgres/modules/knn/knn.py_in index cfd93d92e..c9ae91872 100644 --- a/src/ports/postgres/modules/knn/knn.py_in +++ b/src/ports/postgres/modules/knn/knn.py_in @@ -342,135 +342,7 @@ prediction The output of KNN- label in case of classification, average k_nearest_neighbours The list of k-nearest neighbors that were used in the voting/averaging. """ else: - if message is not None and \ - message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- --- Prepare some training data for classification: -DROP TABLE IF EXISTS knn_train_data; -CREATE TABLE knn_train_data ( - id integer, - data integer[], - label integer -- Integer label means for classification - ); -INSERT INTO knn_train_data VALUES -(1, '{{1,1}}', 1), -(2, '{{2,2}}', 1), -(3, '{{3,3}}', 1), -(4, '{{4,4}}', 1), -(5, '{{4,5}}', 1), -(6, '{{20,50}}', 0), -(7, '{{10,31}}', 0), -(8, '{{81,13}}', 0), -(9, '{{1,111}}', 0); - --- Prepare some training data for regression: -DROP TABLE IF EXISTS knn_train_data_reg; -CREATE TABLE knn_train_data_reg ( - id integer, - data integer[], - label float -- Float label means for regression - ); -INSERT INTO knn_train_data_reg VALUES -(1, '{{1,1}}', 1.0), -(2, '{{2,2}}', 1.0), -(3, '{{3,3}}', 1.0), -(4, '{{4,4}}', 1.0), -(5, '{{4,5}}', 1.0), -(6, '{{20,50}}', 0.0), -(7, '{{10,31}}', 0.0), -(8, '{{81,13}}', 0.0), -(9, '{{1,111}}', 0.0); - --- Prepare some testing data: -DROP TABLE IF EXISTS knn_test_data; -CREATE TABLE knn_test_data ( - id integer, - data integer[] - ); -INSERT INTO knn_test_data VALUES -(1, '{{2,1}}'), -(2, '{{2,6}}'), -(3, '{{15,40}}'), -(4, '{{12,1}}'), -(5, '{{2,90}}'), -(6, '{{50,45}}'); - --- Run KNN for classification: -DROP TABLE IF EXISTS knn_result_classification; -SELECT * FROM {schema_madlib}.knn( - 'knn_train_data', -- Table of training data - 'data', -- Col name of training data - 'id', -- Col name of id in train data - 'label', -- Training labels - 'knn_test_data', -- Table of test data - 'data', -- Col name of test data - 'id', -- Col name of id in test data - 'knn_result_classification', -- Output table - 3, -- Number of nearest neighbors - True, -- True to list nearest-neighbors by id - 'madlib.squared_dist_norm2', -- Distance function - False -- False for not using weighted average - ); -SELECT * from knn_result_classification ORDER BY id; - -Note that the nearest neighbors are sorted from closest -to furthest from the corresponding test point. - --- Run KNN for regression: -DROP TABLE IF EXISTS knn_result_regression; -SELECT * FROM {schema_madlib}.knn( - 'knn_train_data_reg', -- Table of training data - 'knn_test_data', -- Table of test data - 'data', -- Col name of test data - 'id', -- Col name of id in test data - 'knn_result_regression', -- Output table - 3, -- Number of nearest neighbors - True, -- True to list nearest-neighbors by id - 'madlib.dist_norm2', -- Distance function - False -- False for not using weighted average - ); -SELECT * FROM knn_result_regression ORDER BY id; - --- List nearest neighbors only, without doing classification -or regression: -DROP TABLE IF EXISTS knn_result_list_neighbors; -SELECT * FROM {schema_madlib}.knn( - 'knn_train_data_reg', -- Table of training data - 'data', -- Col name of training data - 'id', -- Col Name of id in train data - NULL, -- NULL training labels means just list neighbors - 'knn_test_data', -- Table of test data - 'data', -- Col name of test data - 'id', -- Col name of id in test data - 'knn_result_list_neighbors', -- Output table - 3 -- Number of nearest neighbors - ); -SELECT * FROM knn_result_list_neighbors ORDER BY id; - --- Run KNN for classification using weighted average: -DROP TABLE IF EXISTS knn_result_classification; -SELECT * FROM {schema_madlib}.knn( - 'knn_train_data', -- Table of training data - 'data', -- Col name of training data - 'id', -- Col name of id in train data - 'label', -- Training labels - 'knn_test_data', -- Table of test data - 'data', -- Col name of test data - 'id', -- Col name of id in test data - 'knn_result_classification', -- Output table - 3, -- Number of nearest neighbors - True, -- True to list nearest-neighbors by id - 'madlib.squared_dist_norm2', -- Distance function - True -- Calculation using weighted average - ); -SELECT * from knn_result_classification ORDER BY id; - -""" - else: - help_string = """ + help_string = """ ---------------------------------------------------------------------------- SUMMARY ---------------------------------------------------------------------------- @@ -486,10 +358,6 @@ of k nearest neighbors of the given testing example. -- For an overview on usage, run: SELECT {schema_madlib}.knn('usage'); - -For some examples, run: -SELECT {schema_madlib}.knn('example') --- """ return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/linalg/matrix_help_message.py_in b/src/ports/postgres/modules/linalg/matrix_help_message.py_in index 053fd7570..62b2535a2 100644 --- a/src/ports/postgres/modules/linalg/matrix_help_message.py_in +++ b/src/ports/postgres/modules/linalg/matrix_help_message.py_in @@ -47,7 +47,7 @@ The column names in {} are set using the options provided in 'out_args'. def _get_help_message(schema_madlib, message, function_name, functionality_str, - usage_str, example_str, **kwargs): + usage_str, **kwargs): format_dict = dict(locals().items() + globals().items()) if not message: help_string = """ @@ -58,15 +58,11 @@ Functionality: {functionality_str} For more details on the function usage: SELECT {schema_madlib}.{function_name}('usage'); -For an example on using this function: - SELECT {schema_madlib}.{function_name}('example'); For more details on the two input formats (dense or sparse): SELECT {schema_madlib}.matrix_info(); """ elif message.lower().strip() in ['usage', 'help', '?']: help_string = usage_str - elif message.lower().strip() in ['example', 'examples']: - help_string = example_str else: help_string = "No such option. Use {schema_madlib}.{function_name}('usage')" return help_string.format(**format_dict) @@ -82,7 +78,14 @@ def matrix_info_help_message(schema_madlib, message, **kwargs): Returns: STR. """ - dense_format = """ + message = message.lower() + if not message: + help_string = """ + Run "SELECT matrix_info('dense');" or "SELECT matrix_info('sparse');" + for examples of the specific data format. + """ + elif message == 'dense': + help_string = """ A dense matrix is represented as a distributed collection of 1-D arrays. An example 3x10 matrix would be the below table: @@ -95,7 +98,8 @@ An example 3x10 matrix would be the below table: The column names above can be user-defined - the matrix functions provide options to input these column names. The default names expected are 'row_num' and 'val'. """ - sparse_format = """ + elif message == 'sparse': + help_string = """ A sparse matrix is represented using the row and column indices for each non-zero entry of the matrix. This representation is useful for sparse matrices, containing multiple zero elements. Given below is an example of a sparse 4x7 matrix @@ -121,89 +125,6 @@ The column names above can be user-defined - the matrix functions provide option to input these column names. The default names expected are 'row_num', 'col_num' and 'val'. """ - message = message.lower() - if not message: - help_string = dense_format + sparse_format + """ - Run "SELECT matrix_info('dense');" or "SELECT matrix_info('sparse');" - for examples of the specific data format. - """ - elif message == 'dense': - help_string = dense_format + """ - --- Example to create dense matices --- These matrices are used in all the matrix operation help message examples. - -DROP TABLE IF EXISTS "matrix_A"; -CREATE TABLE "matrix_A" ( - row_id integer, - row_vec integer[] -); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (1, '{{9,6,5,8,5,6,6,3,10,8}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (2, '{{8,2,2,6,6,10,2,1,9,9}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (3, '{{3,9,9,9,8,6,3,9,5,6}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (4, '{{6,4,2,2,2,7,8,8,0,7}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (5, '{{6,8,9,9,4,6,9,5,7,7}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (6, '{{4,10,7,3,9,5,9,2,3,4}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (7, '{{8,10,7,10,1,9,7,9,8,7}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (8, '{{7,4,5,6,2,8,1,1,4,8}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (9, '{{8,8,8,5,2,6,9,1,8,3}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (10, '{{4,6,3,2,6,4,1,2,3,8}}'); - -DROP TABLE IF EXISTS "matrix_B"; -CREATE TABLE "matrix_B" ( - row_id integer, - row_vec integer[] -); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (1, '{{9,10,2,4,6,5,3,7,5,6}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (2, '{{5,3,5,2,8,6,9,7,7,6}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (3, '{{0,1,2,3,2,7,7,3,10,1}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (4, '{{2,9,0,4,3,6,8,6,3,4}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (5, '{{3,8,7,7,0,5,3,9,2,10}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (6, '{{5,3,1,7,6,3,5,3,6,4}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (7, '{{4,8,4,4,2,7,10,0,3,3}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (8, '{{4,6,0,1,3,1,6,6,9,8}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (9, '{{6,5,1,7,2,7,10,6,0,6}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (10, '{{1,4,4,4,8,5,2,8,5,5}}'); -""" - elif message == 'sparse': - # TODO - help_string = sparse_format + """ - -- Example data for sparse matrices -CREATE TABLE "mat_A_sparse"( - "rowNum" integer, - col_num integer, - entry integer -); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 1, 9); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 2, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 8, 3); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 9, 10); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 10, 8); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 1, 8); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 2, 2); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 3, 2); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 4, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 6, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 7, 3); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (8, 1, 7); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (9, 3, 8); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (9, 4, 5); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (10, 2, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (10, 3, 3); - -CREATE TABLE "mat_B_sparse"( - row_id integer, - col_id integer, - val integer -); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (1, 1, 9); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (1, 8, 3); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (2, 2, 2); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (2, 3, 2); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (2, 4, 6); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (10, 2, 6); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (10, 3, 3); - """ else: help_string = "No such option. Use {schema_madlib}.matrix_add('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -229,22 +150,8 @@ SELECT {schema_madlib}.matrix_identity( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example returns an identity matrix of size 4 x 4 - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_identity(4, 'mat_r', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_identity(4, 'mat_r', 'row=row,col=col,val=val, fmt=sparse'); -SELECT * FROM mat_r ORDER BY row; - """ return _get_help_message(schema_madlib, message, "matrix_identity", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -267,24 +174,9 @@ SELECT {schema_madlib}.matrix_diag( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example creates a diagonal matrix. The output by default is a sparse --- matrix. A dense matrix can be obtained by using 'fmt=dense' - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_diag(array[1.0, 2.5, 3.4, 10, 6.8], - 'matrix_r', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM matrix_r ORDER BY row_id; ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_diag(array[1.0, 2.5, 3.4, 10, 6.8], 'matrix_r', 'row=row_id, col=col_id,val=val'); -SELECT * FROM matrix_r ORDER BY row_id; - """ return _get_help_message(schema_madlib, message, "matrix_diag", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------------------------ @@ -309,22 +201,9 @@ SELECT {schema_madlib}.matrix_extract_diag( ------------------------------------------------------------ The output is an array containing the main diagonal. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------ --- Below example extracts the main diagonal. The function call is the same --- for dense and sparse matrices. --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense input format -------------------------------- -SELECT madlib.matrix_extract_diag('"matrix_A"', 'row=row_id, val=row_vec'); ------------------------ Sparse input format -------------------------------- -SELECT madlib.matrix_extract_diag('"mat_B_sparse"', 'row=row_id, col=col_id, val=val'); - """ return _get_help_message(schema_madlib, message, "matrix_extract_diag", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -352,27 +231,8 @@ SELECT {schema_madlib}.matrix_add( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A + B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_add('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_add('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_add", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -395,23 +255,8 @@ SELECT {schema_madlib}.matrix_zeros( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example returns a matrix initialized with all zeros. The default output --- format is sparse. - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_zeros(5, 4, 'matrix_r_dense', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM matrix_r_dense ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_zeros(5, 4, 'matrix_r_sparse', 'row=row_id, col=col_id, val=val'); -SELECT * FROM matrix_r_sparse ORDER BY row_id; - """ return _get_help_message(schema_madlib, message, "matrix_zeros", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -434,23 +279,8 @@ SELECT {schema_madlib}.matrix_ones( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example returns a matrix initialized with all ones. The default --- output format is sparse. - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_ones(5, 4, 'matrix_r_dense', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM mat_r_dense ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_ones(3, 2, 'matrix_r_sparse', 'row=row_id, col=col_id, val=val'); -SELECT * FROM matrix_r_sparse ORDER BY row_id, col_id; - """ return _get_help_message(schema_madlib, message, "matrix_ones", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -478,27 +308,8 @@ SELECT {schema_madlib}.matrix_sub( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A - B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_sub('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_sub('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_sub", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ def matrix_ndims_help_message(schema_madlib, message, **kwargs): @@ -515,8 +326,6 @@ This function provides dimension information of a matrix either in dense or spar For more details on the function usage: SELECT {schema_madlib}.matrix_ndims('usage'); -For an example on using this function: - SELECT {schema_madlib}.matrix_ndims('example'); For more details on the two input formats (dense or sparse): SELECT {schema_madlib}.matrix_info(); """ @@ -555,25 +364,6 @@ If not provided, out_args uses same value as in_args. ------------------------------------------------------------ An array with matrix_in dimension information in format of (number of rows,number of columns) """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes B' - ------------------------ Dense format -------------------------------- --- Data for "matrix_B" can be obtained from --- SELECT matrix_info('dense'); - -SELECT madlib.matrix_ndims('"mat_B"', 'row=row_id, val=vector'); - ------------------------ Sparse format -------------------------------- --- Data for "matrix_A_sparse" can be obtained from --- SELECT matrix_info('sparse'); - -SELECT madlib.matrix_ndims('"matrix_A_sparse"', 'row="rowNum", col=col_num, val=entry'); - """ else: help_string = "No such option. Use {schema_madlib}.matrix_trans('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -604,27 +394,8 @@ SELECT {schema_madlib}.matrix_elem_mult( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A .* B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_elem_mult('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_elem_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_elem_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -652,27 +423,8 @@ SELECT {schema_madlib}.matrix_mult( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A * B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_mult('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -697,25 +449,8 @@ SELECT {schema_madlib}.matrix_trans( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A' --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_trans('"matrix_A"', 'row=row_id, val=row_vec', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_trans('"mat_A_sparse"', 'row="rowNum", val=entry', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_trans", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -738,20 +473,8 @@ SELECT {schema_madlib}.matrix_extract_row( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_extract_row('"mat_A"', 'row=row_id, val=row_vec', 0); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_extract_row('"mat_A_sparse"', 'row="rowNum", val=entry', 0); - """ return _get_help_message(schema_madlib, message, "matrix_extract_row", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -774,20 +497,8 @@ SELECT {schema_madlib}.matrix_extract_col( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_extract_col('"mat_A"', 'row=row_id, val=row_vec', 0); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_extract_col('"mat_A_sparse"', 'row="rowNum", val=entry', 0); - """ return _get_help_message(schema_madlib, message, "matrix_extract_col", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -800,7 +511,7 @@ def _min_max_help_message(schema_madlib, message, suffix, **kwargs): USAGE ------------------------------------------------------------ -SELECT {schema_madlib}.matrix_{0}( +SELECT {1}.matrix_{0}( 'matrix_in', -- Name of the table containing input matrix 'in_args', -- String argument containing matrix_in specific arguments -- (see matrix arguments below for options) @@ -820,23 +531,9 @@ The output table ('matrix_r' above) has the following columns '{0}' -- Vector of ordered {0} values 'index' -- Vector of ordered corresponding indices of {0} values - """.format(suffix) - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 1, 'mat_r', true, true); -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 2, 'mat_r', true, true); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 1, 'mat_r', true, true); -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 2, 'mat_r', true, true); - """.format(suffix) + """.format(suffix, schema_madlib) return _get_help_message(schema_madlib, message, "matrix_" + suffix, - functionality_str, usage_str, example_str) + functionality_str, usage_str) def matrix_max_help_message(schema_madlib, message, **kwargs): @@ -848,21 +545,19 @@ def matrix_min_help_message(schema_madlib, message, **kwargs): # ------------------------------------------------------------ def matrix_norm_help_message(schema_madlib, message, **kwargs): - """ Help message for Matrix norm + """ Help message for Matrix norm """ if not message: help_string = """ ------------------------------------------------------------ SUMMARY ------------------------------------------------------------ -Functionality: Matrix norm +Functionality: Matrix norm This function computes matrix norm values either in dense or sparse format. For more details on the function usage: SELECT {schema_madlib}.matrix_norm('usage'); -For an example on using this function: - SELECT {schema_madlib}.matrix_norm('example'); For more details on the two input formats (dense or sparse): SELECT {schema_madlib}.matrix_info(); """ @@ -904,27 +599,6 @@ These string arguments can be NULL if the default values are to be used. ------------------------------------------------------------ The output is a value which computes matrix norm. """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes matrix norm - ------------------------ Dense format -------------------------------- --- Data for "matrix_A" can be obtained from --- SELECT matrix_info('dense'); - -SELECT madlib.matrix_norm('"mat_A"', 'row=row_id, val=row_vec', '2'); -SELECT madlib.matrix_norm('"mat_A"', 'row=row_id, val=row_vec', 'inf'); - ------------------------ Sparse format -------------------------------- --- Data for "matrix_A_sparse" can be obtained from --- SELECT matrix_info('sparse'); - -SELECT madlib.matrix_norm('"mat_A_sparse"', 'row="rowNum", val=entry', '2'); -SELECT madlib.matrix_norm('"mat_A_sparse"', 'row="rowNum", val=entry', 'm'); - """ else: help_string = "No such option. Use {schema_madlib}.matrix_norm('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -940,7 +614,7 @@ def _agg_help_message(schema_madlib, message, suffix, **kwargs): USAGE ------------------------------------------------------------ -SELECT {schema_madlib}.matrix_{0}( +SELECT {1}.matrix_{0}( 'matrix_in', -- Name of the table containing input matrix 'in_args', -- String argument containing matrix_in specific arguments -- (see matrix arguments below for options) @@ -955,23 +629,9 @@ SELECT {schema_madlib}.matrix_{0}( OUTPUT ------------------------------------------------------------ The output is a vector containing the {0} along given dimension. - """.format(suffix) - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 1); -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 2); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 1); -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 2); - """ + """.format(suffix, schema_madlib) return _get_help_message(schema_madlib, message, "matrix_" + suffix, - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1004,24 +664,8 @@ SELECT {schema_madlib}.matrix_scalar_mult( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_scalar_mult('"mat_A"', 'row=row_id, val=row_vec', - 10, 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_scalar_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - 10, 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_scalar_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1047,29 +691,13 @@ SELECT {schema_madlib}.matrix_vec_mult( ------------------------------------------------------------ The output is an array representing the result of the vector multiplication. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_vec_mult('"mat_A"', 'row=row_id, val=row_vec', - array[1,2,3,4,5,6,7,8,9,10]::float8[]); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_scalar_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - array[1,2,3,4,5,6,7]::float8[]); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_vec_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ def matrix_eigen_help_message(schema_madlib, message, **kwargs): - """ Help message for Matrix eigen values extraction + """ Help message for Matrix eigen values extraction """ functionality_str = "Extract eigen values of matrix" usage_str = """ @@ -1090,22 +718,8 @@ SELECT {schema_madlib}.matrix_eigen( ------------------------------------------------------------ The output are eigen values of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_eigen('"mat_A"', 'row=row_id, val=row_vec', 'mat_r'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_eigen('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_r'); -SELECT * FROM mat_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_eigen", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1133,22 +747,8 @@ SELECT {schema_madlib}.matrix_pinv( ------------------------------------------------------------ The output is generic inverse of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_pinv('"mat_A"', 'row=row_id, val=row_vec', 'mat_r'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_pinv('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_r'); -SELECT * FROM mat_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_pinv", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1174,26 +774,8 @@ SELECT {schema_madlib}.matrix_cholesky( ------------------------------------------------------------ The output is cholesky decomposition of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_cholesky('"mat_A"', 'row=row_id, val=row_vec', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY row_id; -SELECT * FROM mat_result_l ORDER BY row_id; -SELECT * FROM mat_result_d ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_cholesky('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY "rowNum"; -SELECT * FROM mat_result_l ORDER BY "rowNum"; -SELECT * FROM mat_result_d ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_cholesky", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1221,24 +803,8 @@ SELECT {schema_madlib}.matrix_qr( ------------------------------------------------------------ The output is QR decomposition of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_qr('"mat_A"', 'row=row_id, val=row_vec', 'mat_result'); -SELECT * FROM mat_result_q ORDER BY row_id; -SELECT * FROM mat_result_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_qr('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_q', 'mat_r'); -SELECT * FROM mat_result_q ORDER BY "rowNum"; -SELECT * FROM mat_result_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_qr", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1266,28 +832,8 @@ SELECT {schema_madlib}.matrix_lu( ------------------------------------------------------------ The output is LU decomposition of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_lu('"mat_A"', 'row=row_id, val=row_vec', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY row_id; -SELECT * FROM mat_result_l ORDER BY row_id; -SELECT * FROM mat_result_u ORDER BY row_id; -SELECT * FROM mat_result_q ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_lu('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY "rowNum"; -SELECT * FROM mat_result_l ORDER BY "rowNum"; -SELECT * FROM mat_result_u ORDER BY "rowNum"; -SELECT * FROM mat_result_q ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_lu", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1312,20 +858,8 @@ SELECT {schema_madlib}.matrix_nuclear_norm( ------------------------------------------------------------ The output is nuclear norm computing of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_nuclear_norm('"mat_A"', 'row=row_id, val=row_vec'); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_nuclear_norm('"mat_A_sparse"', 'row="rowNum", val=entry'); - """ return _get_help_message(schema_madlib, message, "matrix_nuclear_norm", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1350,20 +884,8 @@ SELECT {schema_madlib}.matrix_rank( ------------------------------------------------------------ The output is rank computing of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_rank('"mat_A"', 'row=row_id, val=row_vec'); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_rank('"mat_A_sparse"', 'row="rowNum", val=entry'); - """ return _get_help_message(schema_madlib, message, "matrix_rank", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1391,20 +913,62 @@ SELECT {schema_madlib}.matrix_inverse( ------------------------------------------------------------ The output is inverse of the matrix. """ - example_str = """ + return _get_help_message(schema_madlib, message, "matrix_inverse", + functionality_str, usage_str) +# ------------------------------------------------------------ + +def matrix_sparsify_help_message(schema_madlib, message, **kwargs): + """ Help message for sparsifying a matrix + """ + functionality_str = "Matrix sparsify" + usage_str = """ ------------------------------------------------------------ - EXAMPLE + USAGE ------------------------------------------------------------ --- Use `matrix_info()' to get the data/table definitions for below matrices ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_inverse('"mat_A"', 'row=row_id, val=row_vec', 'mat_r'); -SELECT row_vec FROM mat_r ORDER BY row_id; +SELECT {schema_madlib}.matrix_sparsify( + 'matrix_in', -- Name of the table containing input matrix + 'in_args', -- String argument containing matrix_in specific arguments + -- (see matrix arguments below for options) + 'matrix_out' -- Name of the table to store result matrix + 'out_args' -- String argument containing matrix_out specific arguments + -- (see matrix arguments below for options) +); ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_inverse('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_r'); -SELECT row_vec FROM mat_r ORDER BY row_id; +{matrix_arg_str} +------------------------------------------------------------ + OUTPUT +------------------------------------------------------------ +The output is the sparse version of the matrix. """ - return _get_help_message(schema_madlib, message, "matrix_inverse", - functionality_str, usage_str, example_str) + return _get_help_message(schema_madlib, message, "matrix_sparsify", + functionality_str, usage_str) +# ------------------------------------------------------------ + +def matrix_densify_help_message(schema_madlib, message, **kwargs): + """ Help message for densifying a matrix + """ + functionality_str = "Matrix densify" + usage_str = """ +------------------------------------------------------------ + USAGE +------------------------------------------------------------ + +SELECT {schema_madlib}.matrix_densify( + 'matrix_in', -- Name of the table containing input matrix + 'in_args', -- String argument containing matrix_in specific arguments + -- (see matrix arguments below for options) + 'matrix_out' -- Name of the table to store result matrix + 'out_args' -- String argument containing matrix_out specific arguments + -- (see matrix arguments below for options) +); + +{matrix_arg_str} +------------------------------------------------------------ + OUTPUT +------------------------------------------------------------ +The output is the dense version of the matrix. + """ + return _get_help_message(schema_madlib, message, "matrix_densify", + functionality_str, usage_str) # ------------------------------------------------------------ diff --git a/src/ports/postgres/modules/linalg/svd.py_in b/src/ports/postgres/modules/linalg/svd.py_in index 1ad2ac9ab..3534479b8 100644 --- a/src/ports/postgres/modules/linalg/svd.py_in +++ b/src/ports/postgres/modules/linalg/svd.py_in @@ -1244,49 +1244,6 @@ def svd_help_message(schema_madlib, message, **kwargs): recon_error FLOAT8 -- Total quality score (i.e. approximation quality) for this set of orthonormal basis """.format(schema_madlib=schema_madlib) - elif message is not None and message.lower() in ('example', 'examples'): - return """ - CREATE TABLE mat ( - row_id integer, - row_vec double precision[] - ); - - -- example input data - COPY mat (row_id, row_vec) FROM stdin; - 1 {{691,58,899,163,159,533,604,582,269,390}} - 0 {{396,840,353,446,318,886,15,584,159,383}} - 3 {{462,532,787,265,982,306,600,608,212,885}} - 2 {{293,742,298,75,404,857,941,662,846,2}} - 5 {{327,946,368,943,7,516,272,24,591,204}} - 4 {{304,151,337,387,643,753,603,531,459,652}} - 7 {{458,959,774,376,228,354,300,669,718,565}} - 6 {{877,59,260,302,891,498,710,286,864,675}} - 9 {{882,761,398,688,761,405,125,484,222,873}} - 8 {{824,390,818,844,180,943,424,520,65,913}} - 11 {{492,220,576,289,321,261,173,1,44,241}} - 10 {{528,1,860,18,814,242,314,965,935,809}} - 13 {{350,192,211,633,53,783,30,444,176,932}} - 12 {{415,701,221,503,67,393,479,218,219,916}} - 15 {{739,651,678,577,273,935,661,47,373,618}} - 14 {{909,472,871,695,930,455,398,893,693,838}} - \. - - DROP TABLE if exists svd_u; - DROP TABLE if exists svd_v; - DROP TABLE if exists svd_s; - -- SVD for dense matrices - SELECT {schema_madlib}.svd('mat', 'svd', 'row_id', 10); - ---------------------------------------------------------------- - DROP TABLE if exists mat_sparse; - SELECT {schema_madlib}.matrix_sparsify('mat', NULL, 'mat_sparse'); - - DROP TABLE if exists svd_u; - DROP TABLE if exists svd_v; - DROP TABLE if exists svd_s; - -- SVD for sparse matrices - SELECT {schema_madlib}.svd_sparse('mat_sparse', 'svd', 'row_id', - 'col_id', 'value', 10); - """.format(schema_madlib=schema_madlib) else: return """ In linear algebra, the singular value decomposition (SVD) is a @@ -1295,9 +1252,6 @@ def svd_help_message(schema_madlib, message, **kwargs): ------- For an overview on usage, run: SELECT {schema_madlib}.svd('usage'); - ------- - For an example, run: - SELECT {schema_madlib}.svd('example') """.format(schema_madlib=schema_madlib) # ------------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in b/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in index 7e5b55ae2..4d8da94fe 100644 --- a/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in +++ b/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in @@ -155,21 +155,6 @@ def linear_solver_dense_help(schema_madlib, input_string=None, **kwargs): residual_norm DOUBLE PRECISION, -- Norm of the residual iters INTEGER -- Iterations of the algorithm - - ---------------------------------------------------------------- - Examples - ---------------------------------------------------------------- - SELECT {schema_madlib}.linear_solver_dense( - 'tbl_input', -- Input table which contains the matrix - 'tbl_result', -- Output table where the results are stored - 'row_id', -- Column name containing the row_id (zero base) - 'LHS', -- Column name containing the LHS - 'RHS', -- Column name containing the RHS - NULL, -- Grouping columns - 'direct', -- Classification of method used (direct) - 'algorithm = householderqr' -- Optional parameters - ); - ---------------------------------------------------------------- Summary ---------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in b/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in index 95514c71a..214500981 100644 --- a/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in +++ b/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in @@ -210,25 +210,6 @@ def linear_solver_sparse_help(schema_madlib, input_string = None, **kwargs): residual_norm DOUBLE PRECISION, -- Norm of the residual iters INTEGER -- Iterations of the algorithm - - ---------------------------------------------------------------- - Examples - ---------------------------------------------------------------- - SELECT {schema_madlib}.linear_solver_sparse( - 'lhs_tbl_source', -- Data table (A matrix) - 'rhs_tbl_source', -- Data table (b vector) - 'tbl_result', -- Result table - 'lhs_row_id', -- Name of column containing row_id - 'lhs_col_id', -- Name of column containing col_id - 'lhs_value' , -- Name of column containing value - 'rhs_row_id', -- Name of column containing row_id - 'rhs_value' , -- Name of column containing value - 50 , -- Number of variables - 'grouping_cols', -- Grouping columns (default: NULL) - 'direct', -- Method used (direct vs iterative) - 'algorithm = ldlt' -- Optimizer optional parameters - ); - ---------------------------------------------------------------- Summary ---------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pca/pca.py_in b/src/ports/postgres/modules/pca/pca.py_in index 5c71acc4a..286c57e2c 100644 --- a/src/ports/postgres/modules/pca/pca.py_in +++ b/src/ports/postgres/modules/pca/pca.py_in @@ -783,62 +783,7 @@ The result summary table ("rslt_summary_table" above) has the following columns specified in grouping_cols """.format(schema_madlib=schema_madlib) else: - if message is not None and \ - message.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- -DROP TABLE IF EXISTS mat_sparse; -CREATE TABLE mat_sparse ( - row_id integer, - col_id integer, - value double precision -); -INSERT INTO mat_sparse VALUES -(1, 1, 1.0), -(2, 2, 2.0), -(3, 3, 3.0), -(4, 4, 4.0), -(1, 5, 5.0), -(2, 4, 6.0), -(3, 2, 7.0), -(4, 3, 8.0); -\. - -DROP TABLE IF EXISTS result_table_sparse; -DROP TABLE IF EXISTS result_table_sparse_mean; -SELECT {schema_madlib}.pca_sparse_train('mat_sparse', 'result_table_sparse', -'row_id', 'col_id', 'val_id', 4, 5, 3); - -SELECT * FROM result_table_sparse ORDER BY row_id; - -DROP TABLE IF EXISTS mat_sparse_group; -CREATE TABLE mat_sparse_group ( - row_id integer, - col_id integer, - value double precision, - matrix_id integer); -INSERT INTO mat_sparse_group VALUES -(1, 1, 1.0, 1), -(2, 2, 2.0, 1), -(3, 3, 3.0, 1), -(4, 4, 4.0, 1), -(1, 5, 5.0, 1), -(2, 4, 6.0, 2), -(3, 2, 7.0, 2), -(4, 3, 8.0, 2); -\. - -DROP TABLE IF EXISTS result_table_sparsed_grouped; -DROP TABLE IF EXISTS result_table_sparsed_grouped_mean; -SELECT {schema_madlib}.pca_sparse_train('mat_sparse_group', 'result_table_sparsed_grouped', -'row_id', 'col_id', 'val_id', 4, 5, 0.8, 'matrix_id'); - -SELECT * FROM result_table_sparsed_grouped ORDER BY matrix_id, row_id; - """.format(schema_madlib=schema_madlib) - else: - return """ + return """ ---------------------------------------------------------------- Summary: Sparse PCA Training ---------------------------------------------------------------- @@ -928,62 +873,9 @@ The result summary table ("rslt_summary_table" above) has the following columns grouping_cols -- The grouping columns (with their types), if any, specified in grouping_cols """.format(schema_madlib=schema_madlib) + else: - if message is not None and \ - message.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- -DROP TABLE IF EXISTS mat; -CREATE TABLE mat ( - id integer, - row_vec double precision[] -); -COPY mat (id, row_vec) FROM stdin DELIMITER '|'; -1|{{1,2,3}} -2|{{2,1,2}} -3|{{3,2,1}} -\. - -DROP TABLE IF EXISTS result_table; -DROP TABLE IF EXISTS result_table_mean; -SELECT {schema_madlib}.pca_train( 'mat', - 'result_table', - 'id', - 3 - ); - -SELECT * FROM result_table ORDER BY row_id; - -DROP TABLE IF EXISTS mat_group; -CREATE TABLE mat_group ( - id integer, - row_vec double precision[], - matrix_id integer -); -INSERT INTO mat_group VALUES -(1, '{{1,2,3}}', 1), -(2, '{{2,1,2}}', 1), -(3, '{{3,2,1}}', 1), -(4, '{{1,2,3,4,5}}', 2), -(5, '{{2,5,2,4,1}}', 2), -(6, '{{5,4,3,2,1}}', 2); -\. - -DROP TABLE IF EXISTS result_table_grp; -DROP TABLE IF EXISTS result_table_grp_mean; -SELECT {schema_madlib}.pca_train( 'mat_group', - 'result_table_grp', - 'row_id', - 0.9, - 'matrix_id' - ); - -SELECT * FROM result_table_grp ORDER BY matrix_id, row_id; - """.format(schema_madlib=schema_madlib) - else: - return """ + return """ ---------------------------------------------------------------- Summary: PCA Training ---------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pca/pca_project.py_in b/src/ports/postgres/modules/pca/pca_project.py_in index 62bf2b12d..cc7a4ba26 100644 --- a/src/ports/postgres/modules/pca/pca_project.py_in +++ b/src/ports/postgres/modules/pca/pca_project.py_in @@ -86,83 +86,9 @@ The output is divided into three tables (two of which are optional) grouping_col -- The grouping columns present in the 'pc_table', if any ---------------------------------------------------------------- """.format(schema_madlib=schema_madlib) - else: - if usage_string is not None and \ - usage_string.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- --- Run pca_project() using a model table generated without grouping_cols. --- Create input table for pca_project() -DROP TABLE IF EXISTS mat_proj; -CREATE TABLE mat_proj ( - row_id integer, - row_vec double precision[] -); -COPY mat_proj (row_id, row_vec) FROM stdin DELIMITER '|'; -1|{{1,2,3}} -2|{{2,1,2}} -3|{{3,2,1}} -11|{{1,2,3}} -21|{{2,1,2}} -31|{{3,2,1}} -41|{{1,2,4}} -12|{{1,3,3}} -\. - --- NOTE: Use the 'result_table' created using the example shown in --- {schema_madlib}.pca_train('examples'), as the 'pc_table' parameter here. - -DROP TABLE IF EXISTS mat_proj_out; -SELECT {schema_madlib}.pca_project( - 'mat_proj', - 'result_table', - 'mat_proj_out', - 'row_id' - ); - -SELECT * FROM mat_proj_out; - ------------------------------------------------------------------------ - --- Run pca_project() using a model table generated with grouping_cols. --- Create input table for pca_project(), with grouping - -DROP TABLE IF EXISTS mat_proj_grouped; -CREATE TABLE mat_proj_grouped ( - row_id integer, - row_vec double precision[], - matrix_id integer -); -COPY mat_proj_grouped (row_id, row_vec, matrix_id) FROM stdin DELIMITER '|'; -1|{{1,2,3}}|1 -2|{{2,1,2}}|1 -3|{{3,2,1}}|1 -4|{{1,2,3,4,5}}|2 -5|{{2,1,2,4,5}}|2 -6|{{3,2,1,4,5}}|2 -\. - --- NOTE: Use the 'result_table_grp' created using the example shown --- in {schema_madlib}.pca_train('examples'), as the 'pc_table' parameter --- here. 'result_table_grp' was created with 'matrix_id' as the --- grouping column, and the table 'mat_proj_grouped' should also have the --- 'matrix_id' column in it. - -DROP TABLE IF EXISTS mat_proj_grouped_out; -SELECT {schema_madlib}.pca_project( - 'mat_proj_grouped', - 'result_table_grp', - 'mat_proj_grouped_out', - 'row_id' - ); - -SELECT * FROM mat_proj_grouped_out; - """.format(schema_madlib=schema_madlib) - else: - return """ + else: + return """ ---------------------------------------------------------------- Summary: PCA Projection ---------------------------------------------------------------- @@ -238,88 +164,7 @@ The output is divided into three tables (two of which are optional) ---------------------------------------------------------------- """.format(schema_madlib=schema_madlib) else: - if usage_string is not None and \ - usage_string.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- --- Run pca_sparse_project() using a model table generated without grouping_cols. --- Create input table for pca_sparse_project() - -DROP TABLE IF EXISTS sparse_proj_mat; -CREATE TABLE sparse_proj_mat ( - row_id integer, - col_id integer, - val_id integer -); -COPY sparse_proj_mat (row_id, col_id, val_id) FROM stdin delimiter '|'; -1|2|4 -1|5|6 -3|8|4 -8|1|2 -8|7|2 -9|3|4 -9|8|2 -\. - --- NOTE: Use the 'result_table_sparse' created using the example shown in --- {schema_madlib}.pca_sparse_train('examples'), as the 'pc_table' parameter here. - -SELECT {schema_madlib}.pca_sparse_project( - 'sparse_proj_mat', - 'result_table_sparse', - 'sparse_proj_mat_out', - 'row_id', - 'col_id', - 'val_id', - 10, - 10 - ); - -SELECT * FROM sparse_proj_mat_out; - - --- Run pca_sparse_project() using a model table generated with grouping_cols. --- Create input table for pca_sparse_project(), with grouping - -DROP TABLE IF EXISTS sparse_proj_mat_with_grouping; -CREATE TABLE sparse_proj_mat_with_grouping ( - row_id integer, - col_id integer, - val_id integer, - matrix_id integer -); -COPY sparse_proj_mat_with_grouping (row_id, col_id, val_id, matrix_id) FROM stdin delimiter '|'; -8|7|2|1 -9|3|4|1 -9|8|2|1 -1|2|4|2 -1|5|6|2 -6|6|12|2 -\. - --- NOTE: Use the 'result_table_sparsed_grouped' created using the example shown --- in {schema_madlib}.pca_sparse_train('examples'), as the 'pc_table' parameter --- here. 'result_table_sparsed_grouped' was created with 'matrix_id' as the --- grouping column, and the table 'sparse_proj_mat_with_grouping' should also have --- the 'matrix_id' column in it. - -SELECT {schema_madlib}.pca_sparse_project( - 'sparse_proj_mat_with_grouping', - 'result_table_sparsed_grouped', - 'sparse_proj_mat_with_grouping_out', - 'row_id', - 'col_id', - 'val_id', - 10, - 10 - ); - -SELECT * FROM sparse_proj_mat_with_grouping_out; - """.format(schema_madlib=schema_madlib) - else: - return """ + return """ ---------------------------------------------------------------- Summary: PCA Projection ---------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pmml/table_to_pmml.py_in b/src/ports/postgres/modules/pmml/table_to_pmml.py_in index 5fae7b949..16d5125b8 100644 --- a/src/ports/postgres/modules/pmml/table_to_pmml.py_in +++ b/src/ports/postgres/modules/pmml/table_to_pmml.py_in @@ -82,44 +82,6 @@ SELECT {schema_madlib}.pmml( OUTPUT ------------------------------------------------------------------ The output of this function is a standard PMML document. - ------------------------------------------------------------------- - OUTPUT ------------------------------------------------------------------- --- Create data set -CREATE TABLE patients( id integer NOT NULL, - second_attack integer, - treatment integer, - trait_anxiety integer); -INSERT INTO patients(id, second_attack, treatment, trait_anxiety) VALUES -( 1, 1, 1, 70), -( 3, 1, 1, 50), -( 5, 1, 0, 40), -( 7, 1, 0, 75), -( 9, 1, 0, 70), -(11, 0, 1, 65), -(13, 0, 1, 45), -(15, 0, 1, 40), -(17, 0, 0, 55), -(19, 0, 0, 50), -( 2, 1, 1, 80), -( 4, 1, 0, 60), -( 6, 1, 0, 65), -( 8, 1, 0, 80), -(10, 1, 0, 60), -(12, 0, 1, 50), -(14, 0, 1, 35), -(16, 0, 1, 50), -(18, 0, 0, 45), -(20, 0, 0, 60); --- train the model -SELECT madlib.logregr_train( - 'patients', - 'patients_logregr', - 'second_attack', - 'ARRAY[1, treatment, trait_anxiety]'); --- pmml export -SELECT madlib.pmml('patients_logregr'); """ return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in b/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in index f9a1ed9f6..26b0e1fd0 100644 --- a/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in +++ b/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in @@ -2413,8 +2413,6 @@ predict the value of a target variable based on several input variables. For more details on the function usage: SELECT {schema_madlib}.tree_train('usage'); -For an example on using this function: - SELECT {schema_madlib}.tree_train('example'); """ elif message.lower().strip() in ['usage', 'help', '?']: help_string = """ @@ -2508,54 +2506,6 @@ The output summary table ('output_table_summary') has the following columns: null_proxy -- String used as replacement for NULL values (NULL if null_as_category = False) - """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- -DROP TABLE IF EXISTS dummy_dt_con_src CASCADE; -CREATE TABLE dummy_dt_con_src ( - id INTEGER, - cat INTEGER[], - con FLOAT8[], - y FLOAT8 -); - -INSERT INTO dummy_dt_src VALUES -(1, '{{0}}'::INTEGER[], ARRAY[0], 0.5), -(2, '{{0}}'::INTEGER[], ARRAY[1], 0.5), -(3, '{{0}}'::INTEGER[], ARRAY[4], 0.5), -(4, '{{0}}'::INTEGER[], ARRAY[4], 0.5), -(5, '{{0}}'::INTEGER[], ARRAY[4], 0.5), -(6, '{{0}}'::INTEGER[], ARRAY[5], 0.1), -(7, '{{0}}'::INTEGER[], ARRAY[6], 0.1), -(8, '{{1}}'::INTEGER[], ARRAY[9], 0.1); -(9, '{{1}}'::INTEGER[], ARRAY[9], 0.1); -(10, '{{1}}'::INTEGER[], ARRAY[9], 0.1); -(11, '{{1}}'::INTEGER[], ARRAY[9], 0.1); - -DROP TABLE IF EXISTS tree_out, tree_out_summary; -SELECT madlib.tree_train( - 'dummy_dt_src', - 'tree_out', - 'id', - 'y', - 'cat, con', - '', - 'mse', - NULL::Text, - NULL::Text, - 3, - 2, - 1, - 5); - -SELECT madlib.tree_display('tree_out'); --- View the impurity importance value of each feature -DROP TABLE IF EXISTS var_imp_out; -SELECT madlib.get_var_importance('tree_out', 'var_imp_out'); -SELECT * FROM var_imp_out; """ else: help_string = "No such option. Use {schema_madlib}.tree_train('usage')" @@ -2613,21 +2563,6 @@ possible value of the response variable. The columns are labeled as 'estimated_prob_', where represents for each value of the response. """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Assuming the example of tree_train() has been run -SELECT {schema_madlib}.tree_predict( - 'tree_out', - 'dummy_dt_src', - 'tree_predict_out', - 'response' -); - -SELECT * FROM tree_predict_out; - """ else: help_string = "No such option. Use {schema_madlib}.tree_predict('usage')" return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in b/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in index e10e2ecaf..fae91bdbc 100644 --- a/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in +++ b/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in @@ -65,8 +65,6 @@ predict the value of a target variable based on several input variables. For more details on the function usage: SELECT {schema_madlib}.forest_train('usage'); -For an example on using this function: - SELECT {schema_madlib}.forest_train('example'); """ elif message.lower().strip() in ['usage', 'help', '?']: help_string = """ @@ -184,57 +182,6 @@ it has the following columns: features. The order corresponds to the order of the variables as found in con_features in _summary. """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- -DROP TABLE IF EXISTS dt_golf; -CREATE TABLE dt_golf ( - id integer NOT NULL, - "OUTLOOK" text, - temperature double precision, - humidity double precision, - windy text, - class text -); - -INSERT INTO dt_golf (id,"OUTLOOK",temperature,humidity,windy,class) VALUES -(1, 'sunny', 85, 85, 'false', 'Don''t Play'), -(2, 'sunny', 80, 90, 'true', 'Don''t Play'), -(3, 'overcast', 83, 78, 'false', 'Play'), -(4, 'rain', 70, 96, 'false', 'Play'), -(5, 'rain', 68, 80, 'false', 'Play'), -(6, 'rain', 65, 70, 'true', 'Don''t Play'), -(7, 'overcast', 64, 65, 'true', 'Play'), -(8, 'sunny', 72, 95, 'false', 'Don''t Play'), -(9, 'sunny', 69, 70, 'false', 'Play'), -(10, 'rain', 75, 80, 'false', 'Play'), -(11, 'sunny', 75, 70, 'true', 'Play'), -(12, 'overcast', 72, 90, 'true', 'Play'), -(13, 'overcast', 81, 75, 'false', 'Play'), -(14, 'rain', 71, 80, 'true', 'Don''t Play'); - -DROP TABLE IF EXISTS train_output, train_output_group, train_output_summary; -SELECT madlib.forest_train('dt_golf', -- source table - 'train_output', -- output model table - 'id', -- id column - 'class', -- response - '"OUTLOOK", temperature, humidity, windy', -- features - NULL, -- exclude columns - NULL, -- grouping columns - 20::integer, -- number of trees - 2::integer, -- number of random features - TRUE::boolean, -- variable importance - 1::integer, -- num_permutations - 8::integer, -- max depth - 3::integer, -- min split - 1::integer, -- min bucket - 10::integer -- number of splits per continuous variable -); -SELECT madlib.get_tree('train_output',1,2,FALSE); - - """ else: help_string = "No such option. Use {schema_madlib}.forest_train('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -1680,19 +1627,6 @@ possible value of the response variable. The columns are labeled as of the response. This is only for the classification models, and the value is the fraction of votes in each category. - """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Assuming the example of forest_train has been run -SELECT {schema_madlib}.forest_predict( - 'forest_out', - 'dummy_dt_src', - 'forest_predict_out', - 'response' -); """ else: help_string = "No such option. Use {schema_madlib}.forest_predict('usage')" diff --git a/src/ports/postgres/modules/regress/linear.py_in b/src/ports/postgres/modules/regress/linear.py_in index a81b78d29..70fe78a6e 100644 --- a/src/ports/postgres/modules/regress/linear.py_in +++ b/src/ports/postgres/modules/regress/linear.py_in @@ -232,9 +232,6 @@ def linregr_help_message(schema_madlib, message, **kwargs): For more details on function usage: SELECT {schema_madlib}.linregr_train('usage') - - For an example on using the function: - SELECT {schema_madlib}.linregr_train('example') """ elif message in ['usage', 'help', '?']: help_string = """ @@ -276,46 +273,6 @@ def linregr_help_message(schema_madlib, message, **kwargs): 'num_rows_processed' INTEGER, -- total number of rows that are used 'num_missing_rows_skipped' INTEGER -- total number of rows that are skipped because of NULL values """ - elif message in ['example', 'examples']: - help_string = """ - CREATE TABLE houses (id INT, tax INT, - bedroom INT, bath FLOAT, - price INT, size INT, lot INT); - COPY houses FROM STDIN WITH DELIMITER '|'; - 1 | 590 | 2 | 1 | 50000 | 770 | 22100 - 2 | 1050 | 3 | 2 | 85000 | 1410 | 12000 - 3 | 20 | 3 | 1 | 22500 | 1060 | 3500 - 4 | 870 | 2 | 2 | 90000 | 1300 | 17500 - 5 | 1320 | 3 | 2 | 133000 | 1500 | 30000 - 6 | 1350 | 2 | 1 | 90500 | 820 | 25700 - 7 | 2790 | 3 | 2.5 | 260000 | 2130 | 25000 - 8 | 680 | 2 | 1 | 142500 | 1170 | 22000 - 9 | 1840 | 3 | 2 | 160000 | 1500 | 19000 - 10 | 3680 | 4 | 2 | 240000 | 2790 | 20000 - 11 | 1660 | 3 | 1 | 87000 | 1030 | 17500 - 12 | 1620 | 3 | 2 | 118600 | 1250 | 20000 - 13 | 3100 | 3 | 2 | 140000 | 1760 | 38000 - 14 | 2070 | 2 | 3 | 148000 | 1550 | 14000 - 15 | 650 | 3 | 1.5 | 65000 | 1450 | 12000 - \. - - -- Train a regression model. First, single regression for all data. - SELECT {schema_madlib}.linregr_train( 'houses', - 'houses_linregr', - 'price', - 'ARRAY[1, tax, bath, size]' - ); - -- Generate three output models, one for each value of "bedroom". - SELECT {schema_madlib}.linregr_train('houses', - 'houses_linregr_bedroom', - 'price', - 'ARRAY[1, tax, bath, size]', - 'bedroom' - ); - -- Examine the resulting models. - SELECT * FROM houses_linregr; - SELECT * FROM houses_linregr_bedroom; - """ else: help_string = "No such option. Use {schema_madlib}.linregr_train()" diff --git a/src/ports/postgres/modules/regress/logistic.py_in b/src/ports/postgres/modules/regress/logistic.py_in index 77ea465bf..812d4cd08 100644 --- a/src/ports/postgres/modules/regress/logistic.py_in +++ b/src/ports/postgres/modules/regress/logistic.py_in @@ -416,9 +416,6 @@ that can be represented with a Boolean expression. For more details on function usage: SELECT {schema_madlib}.logregr_train('usage') - -For a small example on using the function: - SELECT {schema_madlib}.logregr_train('example') """ elif message in ['usage', 'help', '?']: @@ -467,51 +464,6 @@ A summary table named _summary is also created at the same time, whic 'num_missing_rows_skipped' integer, -- total number of rows skipped 'grouping_col' varchar -- grouping columns used in the regression """ - elif message in ['example', 'examples']: - - help_string = """ -CREATE TABLE patients( id INTEGER NOT NULL, - second_attack BOOLEAN, - treatment INTEGER, - trait_anxiety INTEGER); -COPY patients FROM STDIN WITH DELIMITER '|'; - 1 | True | 1 | 70 - 3 | True | 1 | 50 - 5 | True | 0 | 40 - 7 | True | 0 | 75 - 9 | True | 0 | 70 - 11 | False | 1 | 65 - 13 | False | 1 | 45 - 15 | False | 1 | 40 - 17 | False | 0 | 55 - 19 | False | 0 | 50 - 2 | True | 1 | 80 - 4 | True | 0 | 60 - 6 | True | 0 | 65 - 8 | True | 0 | 80 - 10 | True | 0 | 60 - 12 | False | 1 | 50 - 14 | False | 1 | 35 - 16 | False | 1 | 50 - 18 | False | 0 | 45 - 20 | False | 0 | 60 -\. - --- Drop output tables before calling the function -DROP TABLE IF EXISTS patients_logregr; -DROP TABLE IF EXISTS patients_logregr_summary; - -SELECT madlib.logregr_train( 'patients', - 'patients_logregr', - 'second_attack', - 'ARRAY[1, treatment, trait_anxiety]', - NULL, - 20, - 'irls' - ); - -SELECT * from patients_logregr; - """ else: help_string = "No such option. Use {schema_madlib}.logregr_train('help')" diff --git a/src/ports/postgres/modules/regress/multilogistic.py_in b/src/ports/postgres/modules/regress/multilogistic.py_in index 66de5efc4..51d9190d0 100644 --- a/src/ports/postgres/modules/regress/multilogistic.py_in +++ b/src/ports/postgres/modules/regress/multilogistic.py_in @@ -560,9 +560,6 @@ coefficients that maximizes the likelihood of the observations. For more details on function usage: SELECT {schema_madlib}.mlogregr_train('usage') - -For an example on using the function: - SELECT {schema_madlib}.mlogregr_train('example') """ elif message in ['usage', 'help', '?']: help_string = """ @@ -614,89 +611,6 @@ The output summary table named as <'output_table'>_summary has the following col vcov -- DOUBLE PRECISION[], Covariance matrix coef -- DOUBLE PRECISION[], Coefficients of regression """ - elif message in ['example', 'examples']: - help_string = """ --- Create sample data set -DROP TABLE IF EXISTS test3; -CREATE TABLE test3 ( - feat1 INTEGER, - feat2 INTEGER, - cat INTEGER -); -INSERT INTO test3(feat1, feat2, cat) VALUES -(1,35,1), -(2,33,0), -(3,39,1), -(1,37,1), -(2,31,1), -(3,36,0), -(2,36,1), -(2,31,1), -(2,41,1), -(2,37,1), -(1,44,1), -(3,33,2), -(1,31,1), -(2,44,1), -(1,35,1), -(1,44,0), -(1,46,0), -(2,46,1), -(2,46,2), -(3,49,1), -(2,39,0), -(2,44,1), -(1,47,1), -(1,44,1), -(1,37,2), -(3,38,2), -(1,49,0), -(2,44,0), -(3,61,2), -(1,65,2), -(3,67,1), -(3,65,2), -(1,65,2), -(2,67,2), -(1,65,2), -(1,62,2), -(3,52,2), -(3,63,2), -(2,59,2), -(3,65,2), -(2,59,0), -(3,67,2), -(3,67,2), -(3,60,2), -(3,67,2), -(3,62,2), -(2,54,2), -(3,65,2), -(3,62,2), -(2,59,2), -(3,60,2), -(3,63,2), -(3,65,2), -(2,63,1), -(2,67,2), -(2,65,2), -(2,62,2), -(NULL,67,2), -(2,NULL,2), -(NULL,NULL,2), -(2,62,NULL); - --- Run the multilogistic regression function. -DROP TABLE IF EXISTS test3_output; -DROP TABLE IF EXISTS test3_output_summary; -SELECT madlib.mlogregr_train('test3', - 'test3_output', - 'cat', - 'ARRAY[1, feat1, feat2]', - 0, - 'max_iter=20, optimizer=irls, precision=0.0001' - ); - """ else: help_string = "No such option. Use {schema_madlib}.mlogregr_train()" diff --git a/src/ports/postgres/modules/sample/balance_sample.py_in b/src/ports/postgres/modules/sample/balance_sample.py_in index 28cd11c6f..391d1b4e4 100644 --- a/src/ports/postgres/modules/sample/balance_sample.py_in +++ b/src/ports/postgres/modules/sample/balance_sample.py_in @@ -722,7 +722,6 @@ output table size. For more details on function usage: SELECT {schema_madlib}.balance_sample('usage'); - SELECT {schema_madlib}.balance_sample('example'); """ elif message.lower() in ['usage', 'help', '?']: help_string = """ @@ -768,60 +767,6 @@ is FALSE), a row can be selected at most once. The output_table would contain the required number of samples, along with a new column named __madlib_id__, that contain unique numbers for all sampled rows. -""" - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- - --- Create an input table -DROP TABLE IF EXISTS test; - -CREATE TABLE test( - id1 INTEGER, - id2 INTEGER, - gr1 INTEGER, - gr2 INTEGER -); - -INSERT INTO test VALUES -(1,0,1,1), -(2,0,1,1), -(3,0,1,1), -(4,0,1,1), -(5,0,1,1), -(6,0,1,1), -(7,0,1,1), -(8,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(0,1,1,2), -(0,2,1,2), -(0,3,1,2), -(0,4,1,2), -(0,5,1,2), -(0,6,1,2), -(10,10,2,2), -(20,20,2,2), -(30,30,2,2), -(40,40,2,2), -(50,50,2,2), -(60,60,2,2), -(70,70,2,2) -; - --- Sample without replacement -DROP TABLE IF EXISTS out; -SELECT balance_sample('test', 'out', 'gr1', 'undersample', NULL, NULL, FALSE); -SELECT * FROM out; - ---- Sample with replacement -DROP TABLE IF EXISTS out_sr2; -SELECT balance_sample('test', 'out', 'gr1', 'undersample', NULL, NULL, TRUE); -SELECT * FROM out; """ else: help_string = "No such option. Use {schema_madlib}.balance_sample()" diff --git a/src/ports/postgres/modules/sample/stratified_sample.py_in b/src/ports/postgres/modules/sample/stratified_sample.py_in index 0621d61c1..90d12d544 100644 --- a/src/ports/postgres/modules/sample/stratified_sample.py_in +++ b/src/ports/postgres/modules/sample/stratified_sample.py_in @@ -208,7 +208,6 @@ whole table is a single strata. For more details on function usage: SELECT {schema_madlib}.stratified_sample('usage'); - SELECT {schema_madlib}.stratified_sample('example'); """ elif message.lower() in ['usage', 'help', '?']: help_string = """ @@ -243,64 +242,8 @@ If with_replacement is TRUE, each sample is independent (the same row may be selected in the sample set more than once). Else (if with_replacement is FALSE), a row can be selected at most once. ); -""" - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- - --- Create an input table -DROP TABLE IF EXISTS test; - -CREATE TABLE test( - id1 INTEGER, - id2 INTEGER, - gr1 INTEGER, - gr2 INTEGER -); - -INSERT INTO test VALUES -(1,0,1,1), -(2,0,1,1), -(3,0,1,1), -(4,0,1,1), -(5,0,1,1), -(6,0,1,1), -(7,0,1,1), -(8,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(0,1,1,2), -(0,2,1,2), -(0,3,1,2), -(0,4,1,2), -(0,5,1,2), -(0,6,1,2), -(10,10,2,2), -(20,20,2,2), -(30,30,2,2), -(40,40,2,2), -(50,50,2,2), -(60,60,2,2), -(70,70,2,2) -; - --- Sample without replacement -DROP TABLE IF EXISTS out; -SELECT madlib.stratified_sample('test', 'out', 0.5, 'gr1,gr2', 'id1,id2', - FALSE); -SELECT * FROM out; - --- Sample with replacement -DROP TABLE IF EXISTS out; -SELECT madlib.stratified_sample('test', 'out', 0.5, 'gr1,gr2', 'id1,id2', - TRUE); -SELECT * FROM out; """ else: - help_string = "No such option. Use {schema_madlib}.graph_sssp()" + help_string = "No such option. Use {schema_madlib}.stratified_sample()" return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/sample/stratified_sample.sql_in b/src/ports/postgres/modules/sample/stratified_sample.sql_in index c76211579..6f0c23339 100644 --- a/src/ports/postgres/modules/sample/stratified_sample.sql_in +++ b/src/ports/postgres/modules/sample/stratified_sample.sql_in @@ -253,7 +253,7 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); ------------------------------------------------------------------------------- -- Online help -CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample_help( +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample( message VARCHAR ) RETURNS VARCHAR AS $$ PythonFunction(sample, stratified_sample, stratified_sample_help) @@ -262,9 +262,9 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); ------------------------------------------------------------------------------- -CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample_help() +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample() RETURNS VARCHAR AS $$ - SELECT MADLIB_SCHEMA.stratified_sample_help(''); + SELECT MADLIB_SCHEMA.stratified_sample(''); $$ LANGUAGE sql IMMUTABLE m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); ------------------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/sample/train_test_split.py_in b/src/ports/postgres/modules/sample/train_test_split.py_in index 011b14f6d..17388bb5a 100644 --- a/src/ports/postgres/modules/sample/train_test_split.py_in +++ b/src/ports/postgres/modules/sample/train_test_split.py_in @@ -199,7 +199,6 @@ performed. For more details on function usage: SELECT {schema_madlib}.train_test_split('usage'); - SELECT {schema_madlib}.train_test_split('example'); """ elif message.lower() in ['usage', 'help', '?']: help_string = """ @@ -242,78 +241,8 @@ If with_replacement is TRUE, each sample is independent (the same row may be selected in the sample set more than once). Else (if with_replacement is FALSE), a row can be selected at most once. ); -""" - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- - --- Create an input table -DROP TABLE IF EXISTS test; - -CREATE TABLE test( - id1 INTEGER, - id2 INTEGER, - gr1 INTEGER, - gr2 INTEGER -); - -INSERT INTO test VALUES -(1,0,1,1), -(2,0,1,1), -(3,0,1,1), -(4,0,1,1), -(5,0,1,1), -(6,0,1,1), -(7,0,1,1), -(8,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(0,1,1,2), -(0,2,1,2), -(0,3,1,2), -(0,4,1,2), -(0,5,1,2), -(0,6,1,2), -(10,10,2,2), -(20,20,2,2), -(30,30,2,2), -(40,40,2,2), -(50,50,2,2), -(60,60,2,2), -(70,70,2,2) -; - --- Sample without replacement -DROP TABLE IF EXISTS out; -SELECT madlib.train_test_split( - 'test', -- Source table - 'out', -- Output table - 0.5, -- Sample proportion - 0.5, -- Sample proportion - 'gr1,gr2', -- Strata definition - 'id1,id2', -- Columns to output - FALSE, -- Sample without replacement - FALSE); -- Do not separate output tables -SELECT * FROM out ORDER BY split,gr1,gr2,id1,id2; - --- Sample with replacement -DROP TABLE IF EXISTS out_train, out_test; -SELECT madlib.train_test_split( - 'test', -- Source table - 'out', -- Output table - 0.5, -- train_proportion - NULL, -- Default = 1 - train_proportion = 0.5 - 'gr1,gr2', -- Strata definition - 'id1,id2', -- Columns to output - TRUE, -- Sample with replacement - TRUE); -- Separate output tables -SELECT * FROM out_train ORDER BY gr1,gr2,id1,id2; """ else: - help_string = "No such option. Use {schema_madlib}.graph_sssp()" + help_string = "No such option. Use {schema_madlib}.train_test_split()" return help_string.format(schema_madlib=schema_madlib) diff --git a/src/ports/postgres/modules/stats/correlation.py_in b/src/ports/postgres/modules/stats/correlation.py_in index 45ff05e91..005de7501 100644 --- a/src/ports/postgres/modules/stats/correlation.py_in +++ b/src/ports/postgres/modules/stats/correlation.py_in @@ -414,57 +414,6 @@ triangle set to NULL. To obtain the result from the output_table in this matrix format ensure to order the elements using the 'column_position' column. """.format(schema_madlib=schema_madlib, func=func) - elif message is not None and message.lower() in ('example', 'examples'): - return """ -DROP TABLE IF EXISTS example_data; -CREATE TABLE example_data( - id SERIAL, - outlook text, - temperature float8, - humidity float8, - windy text, - class text) ; - -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('sunny', 85, 85, 'false', E'Dont Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('sunny', 80, 90, 'true', E'Dont Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('overcast', 83, 78, 'false', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('rain', 70, 96, 'false', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('rain', 68, 80, 'false', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('rain', 65, 70, 'true', E'Dont Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('overcast', 64, 65, 'true', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('sunny', 72, 95, 'false', E'Dont Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('sunny', 69, 70, 'false', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('rain', 75, 80, 'false', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('sunny', 75, 70, 'true', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('overcast', 72, 90, 'true', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('overcast', 81, 75, 'false', 'Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES('rain', 71, 80, 'true', E'Dont Play'); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES(NULL, 100, 100, 'true', NULL); -INSERT INTO example_data(outlook, temperature, humidity, windy, class) -VALUES(NULL, 110, 100, 'true', NULL); - -SELECT madlib.{func}('example_data', 'example_data_output'); -SELECT madlib.{func}('example_data', 'example_data_output', '*'); -SELECT madlib.{func}('example_data', 'example_data_output', 'temperature, humidity'); - --- To get the {func} matrix from output table: -SELECT * from example_data_output order by column_position; - """.format(func=func) else: if cov: return """ @@ -478,9 +427,6 @@ covariance is negative. The sign of the covariance therefore shows the tendency ------- For an overview on usage, run: SELECT {schema_madlib}.covariance('usage'); -------- -For examples: - SELECT {schema_madlib}.covariance('example'); """.format(schema_madlib=schema_madlib) else: return """ @@ -492,8 +438,5 @@ perfectly anti-correlated. ------- For an overview on usage, run: SELECT {schema_madlib}.correlation('usage'); -------- -For examples: - SELECT {schema_madlib}.correlation('example'); """.format(schema_madlib=schema_madlib) # ------------------------------------------------------------------------------ diff --git a/src/ports/postgres/modules/stats/cox_prop_hazards.py_in b/src/ports/postgres/modules/stats/cox_prop_hazards.py_in index 706503a9e..f73c14fcf 100644 --- a/src/ports/postgres/modules/stats/cox_prop_hazards.py_in +++ b/src/ports/postgres/modules/stats/cox_prop_hazards.py_in @@ -64,8 +64,6 @@ the probability that death has happened before time t. For more details on function usage: SELECT {schema_madlib}.coxph_train('usage') -For an example on using the function: - SELECT {schema_madlib}.coxph_train('example') """ elif message in ['usage', 'help', '?']: @@ -110,55 +108,6 @@ The output summary table is named as _summary has the following co due to missing values """ - - elif message in ['example', 'examples']: - help_string = """ -DROP TABLE IF EXISTS sample_data; -CREATE TABLE sample_data ( - id INTEGER NOT NULL, - grp DOUBLE PRECISION, - wbc DOUBLE PRECISION, - timedeath INTEGER, - status BOOLEAN -); - -COPY sample_data FROM STDIN DELIMITER '|'; - 0 | 0 | 1.45 | 35 | t - 1 | 0 | 1.47 | 34 | t - 3 | 0 | 2.2 | 32 | t - 4 | 0 | 1.78 | 25 | t - 5 | 0 | 2.57 | 23 | t - 6 | 0 | 2.32 | 22 | t - 7 | 0 | 2.01 | 20 | t - 8 | 0 | 2.05 | 19 | t - 9 | 0 | 2.16 | 17 | t - 10 | 0 | 3.6 | 16 | t - 11 | 1 | 2.3 | 15 | t - 12 | 0 | 2.88 | 13 | t - 13 | 1 | 1.5 | 12 | t - 14 | 0 | 2.6 | 11 | t - 15 | 0 | 2.7 | 10 | t - 16 | 0 | 2.8 | 9 | t - 17 | 1 | 2.32 | 8 | t - 18 | 0 | 4.43 | 7 | t - 19 | 0 | 2.31 | 6 | t - 20 | 1 | 3.49 | 5 | t - 21 | 1 | 2.42 | 4 | t - 22 | 1 | 4.01 | 3 | t - 23 | 1 | 4.91 | 2 | t - 24 | 1 | 5 | 1 | t -\. - -SELECT {schema_madlib}.coxph_train( - 'sample_data', - 'sample_cox', - 'timedeath', - 'ARRAY[grp,wbc]', - 'status'); - -SELECT * FROM sample_cox; - """ - else: help_string = "No such option. Use {schema_madlib}.coxph_train()" diff --git a/src/ports/postgres/modules/summary/summary.py_in b/src/ports/postgres/modules/summary/summary.py_in index 1dd6c61d2..ecd872646 100644 --- a/src/ports/postgres/modules/summary/summary.py_in +++ b/src/ports/postgres/modules/summary/summary.py_in @@ -159,43 +159,6 @@ def summary_help_message(schema_madlib, message, **kwargs): - most_frequent_values : Most frequent values - mfv_frequencies : Frequency of the most frequent values """.format(madlib=schema_madlib) - elif message is not None and message.lower() in ('example', 'examples'): - return """ - DROP TABLE IF EXISTS example_data; - CREATE TABLE example_data( - id SERIAL, - outlook text, - temperature float8, - humidity float8, - windy text, - class text) ; - - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('sunny', 85, 85, 'false', E'Don\\'t Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('sunny', 80, 90, 'true', E'Don\\'t Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('overcast', 83, 78, 'false', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('rain', 70, 96, 'false', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('rain', 68, 80, 'false', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('rain', 65, 70, 'true', E'Don\\'t Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('overcast', 64, 65, 'true', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('sunny', 72, 95, 'false', E'Don\\'t Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('sunny', 69, 70, 'false', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('rain', 75, 80, 'false', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('sunny', 75, 70, 'true', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('overcast', 72, 90, 'true', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('overcast', 81, 75, 'false', 'Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('rain', 71, 80, 'true', E'Don\\'t Play'); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES(' ', 100, 100, 'true', ' '); - INSERT INTO example_data(outlook, temperature, humidity, windy, class) VALUES('', 110, 100, 'true', ''); - - SELECT madlib.summary('example_data', 'example_data_output'); - SELECT madlib.summary('example_data', 'example_data_output', 'windy'); - SELECT madlib.summary('example_data', 'example_data_output', 'windy,humidity'); - SELECT madlib.summary('example_data', 'example_data_output', 'id', 'windy'); - SELECT madlib.summary('example_data', 'example_data_output', NULL, NULL, True, True, array[0.1, 0.2, 0.3]); - SELECT madlib.summary('example_data', 'example_data_output', NULL, NULL, True, True, array[0.1, 0.2, 0.3], 2); - SELECT madlib.summary('example_data', 'example_data_output', NULL, NULL, True, True, array[0.1, 0.2, 0.3], 2, False); - SELECT madlib.summary('example_data', 'example_data_output', NULL, NULL, True, True, array[0.1, 0.2, 0.3], 2, False, 2); - """ else: return """ 'summary' is a generic function used to produce summary statistics @@ -204,7 +167,4 @@ def summary_help_message(schema_madlib, message, **kwargs): ------- For an overview on usage, run: SELECT {madlib}.summary('usage'); - ------- - For an example, run: - SELECT {madlib}.summary('example') """.format(madlib=schema_madlib) diff --git a/src/ports/postgres/modules/svm/svm.py_in b/src/ports/postgres/modules/svm/svm.py_in index b8780ab71..d2d22c46f 100644 --- a/src/ports/postgres/modules/svm/svm.py_in +++ b/src/ports/postgres/modules/svm/svm.py_in @@ -516,9 +516,6 @@ def svm_one_class_help(schema_madlib, message, is_svc, **kwargs): For more details on function usage: SELECT {schema_madlib}.{method}('usage') - - For a small example on using the function: - SELECT {schema_madlib}.{method}('example') """.format(**args) usage = """ @@ -627,89 +624,11 @@ def svm_one_class_help(schema_madlib, message, is_svc, **kwargs): gaussian_usage = get_svc_gaussian_usage_string() poly_usage = get_svc_poly_usage_string() - example_usage = """ - --------------------------------------------------------------------------- - EXAMPLES - --------------------------------------------------------------------------- - - Create an input data set. - - CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT, - size INT, lot INT); - COPY houses FROM STDIN WITH DELIMITER '|'; - 1 | 590 | 2 | 1 | 50000 | 770 | 22100 - 2 | 1050 | 3 | 2 | 85000 | 1410 | 12000 - 3 | 20 | 3 | 1 | 22500 | 1060 | 3500 - 4 | 870 | 2 | 2 | 90000 | 1300 | 17500 - 5 | 1320 | 3 | 2 | 133000 | 1500 | 30000 - 6 | 1350 | 2 | 1 | 90500 | 820 | 25700 - 7 | 2790 | 3 | 2.5 | 260000 | 2130 | 25000 - 8 | 680 | 2 | 1 | 142500 | 1170 | 22000 - 9 | 1840 | 3 | 2 | 160000 | 1500 | 19000 - 10 | 3680 | 4 | 2 | 240000 | 2790 | 20000 - 11 | 1660 | 3 | 1 | 87000 | 1030 | 17500 - 12 | 1620 | 3 | 2 | 118600 | 1250 | 20000 - 13 | 3100 | 3 | 2 | 140000 | 1760 | 38000 - 14 | 2070 | 2 | 3 | 148000 | 1550 | 14000 - 15 | 650 | 3 | 1.5 | 65000 | 1450 | 12000 - \. - - - Generate a non-linear one-class SVM using a Gaussian kernel. We - specify the initial step size and maximum number of iterations to run. - As part of the kernel parameter, we choose 10 as the dimension of the - space where we train SVM. A larger number will lead to a more powerful - model but run the risk of overfitting. As a result, the model will be a - 10 dimensional vector. - - select {schema_madlib}.svm_one_class('houses', - 'houses_one_class_gaussian', - 'ARRAY[1,tax,bedroom,bath,size,lot,price]', - 'gaussian', - 'gamma=0.01,n_components=10', - NULL, - 'max_iter=250, init_stepsize=100,lambda=0.9' - ); - - - Create a test data set. - DROP TABLE IF EXISTS houses_novelty_test; - CREATE TABLE houses_novelty_test (id INT, tax INT, bedroom INT, bath FLOAT, price INT, - size INT, lot INT); - COPY houses_novelty_test FROM STDIN WITH DELIMITER '|'; - 1 | 33590 | 12 | 11 | 5000000 | 12770 | 221100 - 2 | 1050 | 31 | 21 | 85000000 | 141210 | 120010 - 3 | 233330 | 13 | 11 | 22500000 | 112060 | 351100 - 4 | 833370 | 12 | 12 | 9000000 | 130120 | 1751100 - 5 | 132330 | 31 | 12 | 133000000 | 150120 | 30011100 - 6 | 135330 | 21 | 11 | 90500000 | 8212120 | 25711100 - 7 | 279330 | 31 | 21.5 | 260000000 | 213012 | 25011100 - 8 | 6803333 | 12 | 11 | 142500000 | 117012 | 22111000 - 9 | 33331840 | 31 | 12 | 160000000 | 150120 | 19011100 - 10 | 3780 | 4 | 2 | 220000 | 2790 | 21000 - 11 | 1760 | 3 | 1 | 77000 | 1030 | 18500 - 12 | 1520 | 3 | 2 | 128600 | 1250 | 21000 - 13 | 3000 | 3 | 2 | 130000 | 1760 | 37000 - 14 | 2170 | 2 | 3 | 138000 | 1550 | 13000 - 15 | 750 | 3 | 1.5 | 75000 | 1450 | 13000 - \. - - - Use the prediction function to evaluate the models. The predicted - results are in the prediction column and the actual data is in the - target column. - -- For the Gaussian model: - SELECT {schema_madlib}.svm_predict('houses_one_class_gaussian', - 'houses_test', - 'id', - 'houses_pred_gaussian'); - -- View the results of the prediction function: - SELECT * FROM houses_novelty_test JOIN houses_pred_gaussian USING (id) ORDER BY id; - - """.format(**args) if not message: return summary elif message.lower() in ('usage', 'help', '?'): return usage - elif message.lower() == 'example': - return example_usage elif message.lower() == 'params': return params_usage elif message.lower() == 'gaussian': @@ -849,84 +768,10 @@ def svm_help(schema_madlib, message, is_svc, **kwargs): gaussian_usage = get_svc_gaussian_usage_string() poly_usage = get_svc_poly_usage_string() - example_usage = """ - --------------------------------------------------------------------------- - EXAMPLES - --------------------------------------------------------------------------- - - Create an input data set. - - CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT, - size INT, lot INT); - COPY houses FROM STDIN WITH DELIMITER '|'; - 1 | 590 | 2 | 1 | 50000 | 770 | 22100 - 2 | 1050 | 3 | 2 | 85000 | 1410 | 12000 - 3 | 20 | 3 | 1 | 22500 | 1060 | 3500 - 4 | 870 | 2 | 2 | 90000 | 1300 | 17500 - 5 | 1320 | 3 | 2 | 133000 | 1500 | 30000 - 6 | 1350 | 2 | 1 | 90500 | 820 | 25700 - 7 | 2790 | 3 | 2.5 | 260000 | 2130 | 25000 - 8 | 680 | 2 | 1 | 142500 | 1170 | 22000 - 9 | 1840 | 3 | 2 | 160000 | 1500 | 19000 - 10 | 3680 | 4 | 2 | 240000 | 2790 | 20000 - 11 | 1660 | 3 | 1 | 87000 | 1030 | 17500 - 12 | 1620 | 3 | 2 | 118600 | 1250 | 20000 - 13 | 3100 | 3 | 2 | 140000 | 1760 | 38000 - 14 | 2070 | 2 | 3 | 148000 | 1550 | 14000 - 15 | 650 | 3 | 1.5 | 65000 | 1450 | 12000 - \. - - - Train a classification model, using a linear model. - - SELECT {schema_madlib}.svm_classification('houses', - 'houses_svm', - 'price < 100000', - 'ARRAY[1, tax, bath, size]'); - - - Generate a nonlinear model using a Gaussian kernel. This time we - specify the initial step size and maximum number of iterations to run. - As part of the kernel parameter, we choose 10 as the dimension of the - space where we train SVM. A larger number will lead to a more powerful - model but run the risk of overfitting. As a result, the model will be a - 10 dimensional vector, instead of 4 as in the case of linear model. - - SELECT {schema_madlib}.svm_classification( 'houses', - 'houses_svm_gaussian', - 'price < 100000', - 'ARRAY[1, tax, bath, size]', - 'gaussian', - 'n_components=10', - '', - 'init_stepsize=1, max_iter=200'); - - - Use the prediction function to evaluate the models. The predicted - results are in the prediction column and the actual data is in the - target column. - - -- For the linear model: - SELECT {schema_madlib}.svm_predict('houses_svm', - 'houses', - 'id', - 'houses_pred'); - SELECT *, price < 100000 AS target - FROM houses JOIN houses_pred - USING (id) ORDER BY id; - - -- For the Gaussian model: - SELECT {schema_madlib}.svm_predict('houses_svm_gaussian', - 'houses', - 'id', - 'houses_pred_gaussian'); - SELECT *, price < 100000 AS target - FROM houses JOIN houses_pred_gaussian - USING (id) ORDER BY id; - """.format(**args) - if not message: return summary elif message.lower() in ('usage', 'help', '?'): return usage - elif message.lower() in ('example', 'examples'): - return example_usage elif message.lower() == 'params': return params_usage elif message.lower() == 'gaussian': diff --git a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in index cbcd9b7f0..1238104de 100644 --- a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in +++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in @@ -490,9 +490,6 @@ class MiniBatchDocumentation: For more details on function usage: SELECT {schema_madlib}.{method}('usage') - - For a small example on using the function: - SELECT {schema_madlib}.{method}('example') """.format(**locals()) usage = """ @@ -557,58 +554,11 @@ class MiniBatchDocumentation: for normalization). """.format(**locals()) - example = """ - -- Create input table - CREATE TABLE iris_data( - id INTEGER, - attributes NUMERIC[], - class_text text, - class INTEGER, - state VARCHAR - ); - - COPY iris_data (attributes, class_text, class, state) FROM STDIN NULL '?' DELIMITER '|'; - {4.4,3.2,1.3,0.2}|Iris_setosa|1|Alaska - {5.0,3.5,1.6,0.6}|Iris_setosa|1|Alaska - {5.1,3.8,1.9,0.4}|Iris_setosa|1|Alaska - {4.8,3.0,1.4,0.3}|Iris_setosa|1|Alaska - {5.1,3.8,1.6,0.2}|Iris_setosa|1|Alaska - {5.7,2.8,4.5,1.3}|Iris_versicolor|2|Alaska - {6.3,3.3,4.7,1.6}|Iris_versicolor|2|Alaska - {4.9,2.4,3.3,1.0}|Iris_versicolor|2|Alaska - {6.6,2.9,4.6,1.3}|Iris_versicolor|2|Alaska - {5.2,2.7,3.9,1.4}|Iris_versicolor|2|Alaska - {5.0,2.0,3.5,1.0}|Iris_versicolor|2|Alaska - {4.8,3.0,1.4,0.1}|Iris_setosa|1|Tennessee - {4.3,3.0,1.1,0.1}|Iris_setosa|1|Tennessee - {5.8,4.0,1.2,0.2}|Iris_setosa|1|Tennessee - {5.7,4.4,1.5,0.4}|Iris_setosa|1|Tennessee - {5.4,3.9,1.3,0.4}|Iris_setosa|1|Tennessee - {6.0,2.9,4.5,1.5}|Iris_versicolor|2|Tennessee - {5.7,2.6,3.5,1.0}|Iris_versicolor|2|Tennessee - {5.5,2.4,3.8,1.1}|Iris_versicolor|2|Tennessee - {5.5,2.4,3.7,1.0}|Iris_versicolor|2|Tennessee - {5.8,2.7,3.9,1.2}|Iris_versicolor|2|Tennessee - {6.0,2.7,5.1,1.6}|Iris_versicolor|2|Tennessee - \. - - -- #TODO add description here - DROP TABLE IF EXISTS iris_data_batch, iris_data_batch_standardization, iris_data_batch_summary; - SELECT madlib.minibatch_preprocessor('iris_data', 'iris_data_batch', 'class_text', 'attributes', 3); - - - -- #TODO add description here NULL buffer size - DROP TABLE IF EXISTS iris_data_batch, iris_data_batch_standardization, iris_data_batch_summary; - SELECT madlib.minibatch_preprocessor('iris_data', 'iris_data_batch', 'class_text', 'attributes'); - - """ if not message: return summary elif message.lower() in ('usage', 'help', '?'): return usage - elif message.lower() == 'example': - return example return """ No such option. Use "SELECT {schema_madlib}.minibatch_preprocessor()" for help. diff --git a/src/ports/postgres/modules/utilities/path.py_in b/src/ports/postgres/modules/utilities/path.py_in index 37457ff24..acbaf8d52 100644 --- a/src/ports/postgres/modules/utilities/path.py_in +++ b/src/ports/postgres/modules/utilities/path.py_in @@ -321,9 +321,6 @@ involved like aggregation. For more details on function usage: SELECT {schema_madlib}.path('usage'); - -For a small example on using the function: - SELECT {schema_madlib}.path('example'); """.format(schema_madlib=schema_madlib) usage_string = """ @@ -345,76 +342,10 @@ SELECT {schema_madlib}.path( ); """.format(schema_madlib=schema_madlib) - example_string = """ ---------------------------------------------------------------------------- - EXAMPLE ---------------------------------------------------------------------------- -- Create an input data set. - -DROP TABLE IF EXISTS eventlog, path_output, path_output_tuples; -CREATE TABLE eventlog (event_timestamp TIMESTAMP, - user_id INT, - session_id INT, - page TEXT, - revenue FLOAT); -INSERT INTO eventlog VALUES -('04/15/2015 01:03:00', 100821, 100, 'LANDING', 0), -('04/15/2015 01:04:00', 100821, 100, 'WINE', 0), -('04/15/2015 01:05:00', 100821, 100, 'CHECKOUT', 39), -('04/15/2015 02:06:00', 100821, 101, 'WINE', 0), -('04/15/2015 02:09:00', 100821, 101, 'WINE', 0), -('04/15/2015 01:15:00', 101121, 102, 'LANDING', 0), -('04/15/2015 01:16:00', 101121, 102, 'WINE', 0), -('04/15/2015 01:17:00', 101121, 102, 'CHECKOUT', 15), -('04/15/2015 01:18:00', 101121, 102, 'LANDING', 0), -('04/15/2015 01:19:00', 101121, 102, 'HELP', 0), -('04/15/2015 01:21:00', 101121, 102, 'WINE', 0), -('04/15/2015 01:22:00', 101121, 102, 'CHECKOUT', 23), -('04/15/2015 02:15:00', 101331, 103, 'LANDING', 0), -('04/15/2015 02:16:00', 101331, 103, 'WINE', 0), -('04/15/2015 02:17:00', 101331, 103, 'HELP', 0), -('04/15/2015 02:18:00', 101331, 103, 'WINE', 0), -('04/15/2015 02:19:00', 101331, 103, 'CHECKOUT', 16), -('04/15/2015 02:22:00', 101443, 104, 'BEER', 0), -('04/15/2015 02:25:00', 101443, 104, 'CHECKOUT', 12), -('04/15/2015 02:29:00', 101881, 105, 'LANDING', 0), -('04/15/2015 02:30:00', 101881, 105, 'BEER', 0), -('04/15/2015 01:05:00', 102201, 106, 'LANDING', 0), -('04/15/2015 01:06:00', 102201, 106, 'HELP', 0), -('04/15/2015 01:09:00', 102201, 106, 'LANDING', 0), -('04/15/2015 02:15:00', 102201, 107, 'WINE', 0), -('04/15/2015 02:16:00', 102201, 107, 'BEER', 0), -('04/15/2015 02:17:00', 102201, 107, 'WINE', 0), -('04/15/2015 02:18:00', 102871, 108, 'BEER', 0), -('04/15/2015 02:19:00', 102871, 108, 'WINE', 0), -('04/15/2015 02:22:00', 102871, 108, 'CHECKOUT', 21), -('04/15/2015 02:25:00', 102871, 108, 'LANDING', 0), -('04/15/2015 02:17:00', 103711, 109, 'BEER', 0), -('04/15/2015 02:18:00', 103711, 109, 'LANDING', 0), -('04/15/2015 02:19:00', 103711, 109, 'WINE', 0); - -- Calculate the revenue by checkout: - -SELECT {schema_madlib}.path( - 'eventlog', -- Name of input table - 'path_output', -- Table name to store path results - 'session_id', -- Partition input table by session - 'event_timestamp ASC', -- Order partitions in input table by time - 'buy:=page=''CHECKOUT''', -- Define a symbol for checkout events - '(buy)', -- Pattern search: purchase - 'sum(revenue) as checkout_rev', -- Aggregate: sum revenue by checkout - TRUE -- Persist matches - ); - -SELECT * FROM path_output ORDER BY session_id, match_id; - """.format(schema_madlib=schema_madlib) - if not message: return summary_string elif message.lower() in ('usage', 'help', '?'): return usage_string - elif message.lower() in ('example', 'examples'): - return example_string else: return """ No such option. Use "SELECT {schema_madlib}.path()" for help. diff --git a/src/ports/postgres/modules/utilities/sessionize.py_in b/src/ports/postgres/modules/utilities/sessionize.py_in index ccd0a3e23..278e1f8d2 100644 --- a/src/ports/postgres/modules/utilities/sessionize.py_in +++ b/src/ports/postgres/modules/utilities/sessionize.py_in @@ -131,15 +131,12 @@ def sessionize_help_message(schema_madlib, message, **kwargs): ----------------------------------------------------------------------------------- Functionality: Sessionize -The MADlib sessionize function performs time-oriented session reconstruction on a -data set comprising a sequence of events. A defined period of inactivity indicates +The MADlib sessionize function performs time-oriented session reconstruction on a +data set comprising a sequence of events. A defined period of inactivity indicates the end of one session and beginning of the next session. For more details on function usage: SELECT {schema_madlib}.sessionize('usage'); - -For a small example on using the function: - SELECT {schema_madlib}.sessionize('example'); """.format(schema_madlib=schema_madlib) usage_string = """ @@ -157,92 +154,17 @@ SELECT {schema_madlib}.sessionize( -- a session 'output_cols' -- str, An optional valid postgres SELECT expression for the -- output table/view (default *) - 'create_view' -- boolean, Optional parameter to specify if output is a + 'create_view' -- boolean, Optional parameter to specify if output is a -- view or materilized to a table (default True) ); """.format(schema_madlib=schema_madlib) - example_string = """ ------------------------------------------------------------------------------------ - EXAMPLE ------------------------------------------------------------------------------------ -- Create an input data set: - -DROP TABLE IF EXISTS eventlog; -CREATE TABLE eventlog (event_timestamp TIMESTAMP, - user_id INT, - page TEXT, - revenue FLOAT); -INSERT INTO eventlog VALUES -('04/15/2015 02:19:00', 101331, 'CHECKOUT', 16), -('04/15/2015 02:17:00', 202201, 'WINE', 0), -('04/15/2015 03:18:00', 202201, 'BEER', 0), -('04/15/2015 01:03:00', 100821, 'LANDING', 0), -('04/15/2015 01:04:00', 100821, 'WINE', 0), -('04/15/2015 01:05:00', 100821, 'CHECKOUT', 39), -('04/15/2015 02:06:00', 100821, 'WINE', 0), -('04/15/2015 02:09:00', 100821, 'WINE', 0), -('04/15/2015 02:15:00', 101331, 'LANDING', 0), -('04/15/2015 02:16:00', 101331, 'WINE', 0), -('04/15/2015 02:17:00', 101331, 'HELP', 0), -('04/15/2015 02:18:00', 101331, 'WINE', 0), -('04/15/2015 02:29:00', 201881, 'LANDING', 0), -('04/15/2015 02:30:00', 201881, 'BEER', 0), -('04/15/2015 01:05:00', 202201, 'LANDING', 0), -('04/15/2015 01:06:00', 202201, 'HELP', 0), -('04/15/2015 01:09:00', 202201, 'LANDING', 0), -('04/15/2015 02:15:00', 202201, 'WINE', 0), -('04/15/2015 02:16:00', 202201, 'BEER', 0), -('04/15/2015 03:19:00', 202201, 'WINE', 0), -('04/15/2015 03:22:00', 202201, 'CHECKOUT', 21); - -- Sessionize the table for each user_id, and obtain only the user_id, with partition -expression, event_timestamp and session_id: - -SELECT {schema_madlib}.sessionize( - 'eventlog', -- Name of input table - 'sessionize_output', -- Table name to store sessionized results - 'user_id', -- Partition input table by session - 'event_timestamp', -- Order partitions in input table by time - '0:30:0' -- Use 30 minute time out to define sessions - ); - -- View the output table containing the session IDs: - -SELECT * FROM sessionize_output; - -DROP VIEW sessionize_output; - -- Sessionize the table for each user_id, and materialize all columns from -source table into an output table: - -SELECT {schema_madlib}.sessionize( - 'eventlog', -- Name of input table - 'sessionize_output', -- Table name to store sessionized results - 'user_id < 200000', -- Partition input table by session - 'event_timestamp', -- Order partitions in input table by time - '180', -- Use 3 minutes (180 seconds) to define sessions - 'event_timestamp, user_id, user_id < 200000 AS "Department-A1"', - -- Select only the required columns, along with the - -- session id column that is selected by default - 'false' -- Materialize results into a table, and not a view - ); - -- View the output table containing the session IDs: - -SELECT * FROM sessionize_output WHERE "Department-A1"='TRUE'; - -DROP TABLE sessionize_output; - """.format(schema_madlib=schema_madlib) - help_string = summary_string if not message: return summary_string elif message.lower() in ('usage', 'help', '?'): return usage_string - elif message.lower() == 'example': - return example_string else: return """ No such option. Use "SELECT {schema_madlib}.sessionize()" for help.