From 23081cd3fbda244d682aaf0da7b23b6bb7575317 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 21 Apr 2020 11:56:38 -0700 Subject: [PATCH 01/28] Copy histogram utils from custom monitoring --- .../verta/_internal_utils/_data_utils.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 client/verta/verta/_internal_utils/_data_utils.py diff --git a/client/verta/verta/_internal_utils/_data_utils.py b/client/verta/verta/_internal_utils/_data_utils.py new file mode 100644 index 0000000000..79a9aab869 --- /dev/null +++ b/client/verta/verta/_internal_utils/_data_utils.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +def calculate_bin_boundaries(data, num_bins=10): + """ + Calculates boundaries for `num_bins` equally-spaced histogram bins. + + Parameters + ---------- + data : sequence of numbers + Continuous data to be binned. + num_bins : int, default 10 + Number of bins to use. + + Returns + ------- + bin_boundaries : list of float with length `num_bins`+1 + Boundaries of histogram bins. + + """ + start, stop = min(data), max(data) + space = (stop - start)/num_bins + return [start + space*i for i in range(num_bins+1)] + + +def calculate_reference_counts(data, bin_boundaries): + """ + Fits `data` into the histogram bins defined by `bin_boundaries`. + + Parameters + ---------- + data : sequence of numbers + Numerical data to be binned. + bin_boundaries : list of float of length N+1 + Boundaries for a histogram's N bins. + + Returns + ------- + list of float with length N + Counts of `data` values in each bin defined by `bin_boundaries`. + + Raises + ------ + ValueError + If `data` contains a value outside the range defined by `bin_boundaries`. + + """ + # TODO: there is definitely a faster way to do this + reference_counts = [] + for l, r in zip(bin_boundaries[:-1], bin_boundaries[1:]): + count = len([datum for datum in data if l <= datum < r]) + reference_counts.append(count) + + return reference_counts From 47f3c2b75dea6a8a08ea3c8fc6eb4a4ac7a355f6 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 21 Apr 2020 15:54:26 -0700 Subject: [PATCH 02/28] Consolidate into one fn --- .../verta/_internal_utils/_data_utils.py | 47 +++++++------------ 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/client/verta/verta/_internal_utils/_data_utils.py b/client/verta/verta/_internal_utils/_data_utils.py index 79a9aab869..4e2851b55d 100644 --- a/client/verta/verta/_internal_utils/_data_utils.py +++ b/client/verta/verta/_internal_utils/_data_utils.py @@ -1,53 +1,38 @@ # -*- coding: utf-8 -*- -def calculate_bin_boundaries(data, num_bins=10): +def calculate_float_histogram(data, num_bins=10): """ - Calculates boundaries for `num_bins` equally-spaced histogram bins. + Calculates a histogram for continuous `data`. Parameters ---------- - data : sequence of numbers + data : list-like of float Continuous data to be binned. num_bins : int, default 10 Number of bins to use. Returns ------- - bin_boundaries : list of float with length `num_bins`+1 - Boundaries of histogram bins. + histogram : dict """ + # calculate bin boundaries start, stop = min(data), max(data) space = (stop - start)/num_bins - return [start + space*i for i in range(num_bins+1)] + bin_boundaries = [start + space*i for i in range(num_bins+1)] - -def calculate_reference_counts(data, bin_boundaries): - """ - Fits `data` into the histogram bins defined by `bin_boundaries`. - - Parameters - ---------- - data : sequence of numbers - Numerical data to be binned. - bin_boundaries : list of float of length N+1 - Boundaries for a histogram's N bins. - - Returns - ------- - list of float with length N - Counts of `data` values in each bin defined by `bin_boundaries`. - - Raises - ------ - ValueError - If `data` contains a value outside the range defined by `bin_boundaries`. - - """ - # TODO: there is definitely a faster way to do this + # fit `data` into bins reference_counts = [] for l, r in zip(bin_boundaries[:-1], bin_boundaries[1:]): count = len([datum for datum in data if l <= datum < r]) reference_counts.append(count) - return reference_counts + return { + 'histogram': { + 'float': { + 'bucket_limits': bin_boundaries, + 'count': reference_counts, + }, + }, + 'type': "float", + } From 54afc6a73c0c2c377fd18146d8a8aa7da9da78ce Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Thu, 23 Apr 2020 07:50:12 -0700 Subject: [PATCH 03/28] Rename utils file --- .../verta/_internal_utils/{_data_utils.py => _histogram_utils.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename client/verta/verta/_internal_utils/{_data_utils.py => _histogram_utils.py} (100%) diff --git a/client/verta/verta/_internal_utils/_data_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py similarity index 100% rename from client/verta/verta/_internal_utils/_data_utils.py rename to client/verta/verta/_internal_utils/_histogram_utils.py From d37ef13729b9a17efbf7b6ef43a28cb558352b7f Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Thu, 23 Apr 2020 08:22:03 -0700 Subject: [PATCH 04/28] Create and PUT float histogram --- .../verta/_internal_utils/_histogram_utils.py | 10 ++++++ client/verta/verta/client.py | 31 ++++++++++++++++--- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index 4e2851b55d..81439dbd7c 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -1,5 +1,11 @@ # -*- coding: utf-8 -*- +def calculate_binary_histogram(data): + raise NotImplementedError + +def calculate_discrete_histogram(data): + raise NotImplementedError + def calculate_float_histogram(data, num_bins=10): """ Calculates a histogram for continuous `data`. @@ -36,3 +42,7 @@ def calculate_float_histogram(data, num_bins=10): }, 'type': "float", } + + +class HistogramError(TypeError): + pass diff --git a/client/verta/verta/client.py b/client/verta/verta/client.py index c88b4ca0a5..3753891740 100644 --- a/client/verta/verta/client.py +++ b/client/verta/verta/client.py @@ -39,6 +39,7 @@ from ._internal_utils import _artifact_utils from ._internal_utils import _config_utils from ._internal_utils import _git_utils +from ._internal_utils import _histogram_utils from ._internal_utils import _pip_requirements_utils from ._internal_utils import _utils @@ -3586,11 +3587,33 @@ def log_training_data(self, train_features, train_targets, overwrite=False): train_df = train_features.join(train_targets) - tempf = tempfile.TemporaryFile('w+') - train_df.to_csv(tempf, index=False) - tempf.seek(0) + histograms = {'features': {}} + for colname in train_df: + col = train_df[colname] + histogram = None - self._log_artifact("train_data", tempf, _CommonService.ArtifactTypeEnum.DATA, 'csv', overwrite=overwrite) + try: # binary + pass#histogram = _histogram_utils.calculate_binary_histogram(col) + except _histogram_utils.HistogramError: + pass + + try: # discrete/categorical + pass#histogram = _histogram_utils.calculate_discrete_histogram(col) + except _histogram_utils.HistogramError: + pass + + # continuous + histogram = _histogram_utils.calculate_float_histogram(col) + + histograms['features'][colname] = histogram + + endpoint = "{}://{}/api/v1/monitoring/data/references/{}".format( + self._conn.scheme, + self._conn.socket, + self.id, + ) + response = _utils.make_request("PUT", endpoint, self._conn, json=histograms) + _utils.raise_for_http_error(response) def fetch_artifacts(self, keys): """ From 8d548cadb4e581c3779ddd991c04833d5b790f2a Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Thu, 23 Apr 2020 08:31:00 -0700 Subject: [PATCH 05/28] Move branch logic to util fn --- .../verta/_internal_utils/_histogram_utils.py | 14 ++++++++++++++ client/verta/verta/client.py | 17 +---------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index 81439dbd7c..bd87836c64 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -1,5 +1,19 @@ # -*- coding: utf-8 -*- +def calculate_histogram(data): + try: # binary + pass#return calculate_binary_histogram(col) + except HistogramError: + pass + + try: # discrete/categorical + pass#return calculate_discrete_histogram(col) + except HistogramError: + pass + + # continuous + return calculate_float_histogram(data) + def calculate_binary_histogram(data): raise NotImplementedError diff --git a/client/verta/verta/client.py b/client/verta/verta/client.py index 3753891740..9c21e5aff9 100644 --- a/client/verta/verta/client.py +++ b/client/verta/verta/client.py @@ -3589,22 +3589,7 @@ def log_training_data(self, train_features, train_targets, overwrite=False): histograms = {'features': {}} for colname in train_df: - col = train_df[colname] - histogram = None - - try: # binary - pass#histogram = _histogram_utils.calculate_binary_histogram(col) - except _histogram_utils.HistogramError: - pass - - try: # discrete/categorical - pass#histogram = _histogram_utils.calculate_discrete_histogram(col) - except _histogram_utils.HistogramError: - pass - - # continuous - histogram = _histogram_utils.calculate_float_histogram(col) - + histogram = _histogram_utils.calculate_histogram(train_df[colname]) histograms['features'][colname] = histogram endpoint = "{}://{}/api/v1/monitoring/data/references/{}".format( From cf79e5c69381aebc4e670b85ea422700bce5d407 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Thu, 23 Apr 2020 08:38:52 -0700 Subject: [PATCH 06/28] Add TODO --- client/verta/verta/_internal_utils/_histogram_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index bd87836c64..b58fc14ee6 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -58,5 +58,5 @@ def calculate_float_histogram(data, num_bins=10): } -class HistogramError(TypeError): +class HistogramError(TypeError): # TODO: move to exceptions submodule pass From b75fe2d940f692a86bd613477c6e04147d57547b Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Thu, 23 Apr 2020 16:52:11 -0700 Subject: [PATCH 07/28] Impl discrete histogram --- .../verta/_internal_utils/_histogram_utils.py | 69 ++++++++++++++++--- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index b58fc14ee6..bedbd23b2b 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -2,23 +2,76 @@ def calculate_histogram(data): try: # binary - pass#return calculate_binary_histogram(col) + pass#return calculate_binary_histogram(data) except HistogramError: pass - try: # discrete/categorical - pass#return calculate_discrete_histogram(col) + try: # discrete + return calculate_discrete_histogram(data) except HistogramError: pass # continuous return calculate_float_histogram(data) -def calculate_binary_histogram(data): - raise NotImplementedError - def calculate_discrete_histogram(data): - raise NotImplementedError + """ + Calculates a histogram for discrete `data`. + + Parameters + ---------- + data : pandas.Series of int + Discrete data to be binned. + + Returns + ------- + histogram : dict + + Raises + ------ + HistogramError + + """ + value_counts = data.value_counts().sort_index() + values = value_counts.index.tolist() + counts = value_counts.values.tolist() + + # reject non-numbers + try: + values = list(map(float, values)) + except ValueError: + raise HistogramError( + "values must be castable to numbers" + ) + + # reject non-integral floats + if not all(value.is_integer() for value in values): + raise HistogramError( + "values must be integers" + ) + values = list(map(int, values)) + + # heuristic: reject if too many values + if len(values) > 10: + raise HistogramError( + "got {} possible discrete values but heuristic says the maximum is 10".format(len(values)) + ) + + # heuristic: reject if counts don't seem high enough + if value_counts.mean() < 10: + raise HistogramError( + "heuristic says that each discrete value should average at least 10 appearances" + ) + + return { + 'histogram': { + 'discrete': { + 'bucket_values': values, + 'count': counts, + }, + }, + 'type': "discrete", + } def calculate_float_histogram(data, num_bins=10): """ @@ -26,7 +79,7 @@ def calculate_float_histogram(data, num_bins=10): Parameters ---------- - data : list-like of float + data : pandas.Series of float Continuous data to be binned. num_bins : int, default 10 Number of bins to use. From 68e3c88888e20735eeb017daac1a1e1e55bc7d91 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 11:45:17 -0700 Subject: [PATCH 08/28] Impl binanry histogram --- .../verta/_internal_utils/_histogram_utils.py | 69 +- .../workflows/demos/census-end-to-end.ipynb | 643 +++++++++++++----- 2 files changed, 550 insertions(+), 162 deletions(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index bedbd23b2b..bce84b44c9 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -1,8 +1,11 @@ # -*- coding: utf-8 -*- +from ..external import six + + def calculate_histogram(data): try: # binary - pass#return calculate_binary_histogram(data) + return calculate_binary_histogram(data) except HistogramError: pass @@ -14,6 +17,70 @@ def calculate_histogram(data): # continuous return calculate_float_histogram(data) +def calculate_binary_histogram(data): + """ + Parameters + ---------- + data : pandas.Series + Binary data to be binned. + + Returns + ------- + histogram : dict + + Raises + ------ + HistogramError + + """ + values = data.values.tolist() + + zeros = 0 + ones = 0 + for value in values: + if isinstance(value, bool): + if value == False: + zeros += 1 + continue + elif value == True: + ones += 1 + continue + + if isinstance(value, six.string_types): + # handle bool-like strings + if value.lower() == "false": + zeros += 1 + continue + elif value.lower() == "true": + ones += 1 + continue + + # handle num-like strings (falls through to numeric case) + try: + value = float(value) + except ValueError: + pass + + if isinstance(value, (six.integer_types, float)): + if value == 0: + zeros += 1 + continue + elif value == 1: + ones += 1 + continue + + # unsupported value + raise HistogramError("invalid binanry value {}".format(value)) + + return { + 'histogram': { + 'binary': { + 'count': [zeros, ones], + }, + }, + 'type': "binary", + } + def calculate_discrete_histogram(data): """ Calculates a histogram for discrete `data`. diff --git a/client/workflows/demos/census-end-to-end.ipynb b/client/workflows/demos/census-end-to-end.ipynb index 6df10250b6..1cc025a92c 100644 --- a/client/workflows/demos/census-end-to-end.ipynb +++ b/client/workflows/demos/census-end-to-end.ipynb @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "HOST = \"app.verta.ai\"\n", + "HOST = \"dev.verta.ai\"\n", "\n", "PROJECT_NAME = \"Census Income Classification\"\n", "EXPERIMENT_NAME = \"Logistic Regression\"" @@ -152,7 +152,235 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agecapital-gaincapital-losshours-per-weekworkclass_local-govworkclass_privateworkclass_self-emp-incworkclass_self-emp-not-incworkclass_state-govworkclass_without-pay...occupation_handlers-cleanersoccupation_machine-op-inspctoccupation_other-serviceoccupation_priv-house-servoccupation_prof-specialtyoccupation_protective-servoccupation_salesoccupation_tech-supportoccupation_transport-moving>50k
0440040010000...0000000000
1210040010000...0010000000
2537298060010000...0000001001
3490040010000...0010000000
4530148540010000...0000001001
\n", + "

5 rows × 44 columns

\n", + "
" + ], + "text/plain": [ + " age capital-gain capital-loss hours-per-week workclass_local-gov \\\n", + "0 44 0 0 40 0 \n", + "1 21 0 0 40 0 \n", + "2 53 7298 0 60 0 \n", + "3 49 0 0 40 0 \n", + "4 53 0 1485 40 0 \n", + "\n", + " workclass_private workclass_self-emp-inc workclass_self-emp-not-inc \\\n", + "0 1 0 0 \n", + "1 1 0 0 \n", + "2 1 0 0 \n", + "3 1 0 0 \n", + "4 1 0 0 \n", + "\n", + " workclass_state-gov workclass_without-pay ... \\\n", + "0 0 0 ... \n", + "1 0 0 ... \n", + "2 0 0 ... \n", + "3 0 0 ... \n", + "4 0 0 ... \n", + "\n", + " occupation_handlers-cleaners occupation_machine-op-inspct \\\n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "\n", + " occupation_other-service occupation_priv-house-serv \\\n", + "0 0 0 \n", + "1 1 0 \n", + "2 0 0 \n", + "3 1 0 \n", + "4 0 0 \n", + "\n", + " occupation_prof-specialty occupation_protective-serv occupation_sales \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 1 \n", + "3 0 0 0 \n", + "4 0 0 1 \n", + "\n", + " occupation_tech-support occupation_transport-moving >50k \n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 1 \n", + "3 0 0 0 \n", + "4 0 0 1 \n", + "\n", + "[5 rows x 44 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_train = pd.read_csv(train_data_filename)\n", "X_train = df_train.iloc[:,:-1]\n", @@ -197,7 +425,19 @@ "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "set email from environment\n", + "set developer key from environment\n", + "connection successfully established\n", + "set existing Project: Census Income Classification from personal workspace\n", + "set existing Experiment: Logistic Regression\n" + ] + } + ], "source": [ "from verta import Client\n", "from verta.utils import ModelAPI\n", @@ -220,46 +460,53 @@ "metadata": { "scrolled": true }, - "outputs": [], - "source": [ - "def run_experiment(hyperparams):\n", - " # create object to track experiment run\n", - " run = client.set_experiment_run()\n", - " \n", - " # create validation split\n", - " (X_val_train, X_val_test,\n", - " y_val_train, y_val_test) = model_selection.train_test_split(X_train, y_train,\n", - " test_size=0.2,\n", - " shuffle=True)\n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "created new ExperimentRun: Run 558931588011935496772\n", + "{'C': 1e-06, 'solver': 'lbfgs', 'max_iter': 15} Validation accuracy: 0.7952\n", + "upload complete (custom_modules.zip)\n", + "upload complete (model.pkl)\n", + "upload complete (model_api.json)\n", + "upload complete (requirements.txt)\n" + ] + } + ], + "source": [ + "hyperparams = hyperparam_sets[0]\n", + "\n", + "# create object to track experiment run\n", + "run = client.set_experiment_run()\n", + "\n", + "# create validation split\n", + "(X_val_train, X_val_test,\n", + " y_val_train, y_val_test) = model_selection.train_test_split(X_train, y_train,\n", + " test_size=0.2,\n", + " shuffle=True)\n", + "\n", + "# log hyperparameters\n", + "run.log_hyperparameters(hyperparams)\n", + "print(hyperparams, end=' ')\n", "\n", - " # log hyperparameters\n", - " run.log_hyperparameters(hyperparams)\n", - " print(hyperparams, end=' ')\n", - " \n", - " # create and train model\n", - " model = linear_model.LogisticRegression(**hyperparams)\n", - " model.fit(X_train, y_train)\n", - " \n", - " # calculate and log validation accuracy\n", - " val_acc = model.score(X_val_test, y_val_test)\n", - " run.log_metric(\"val_acc\", val_acc)\n", - " print(\"Validation accuracy: {:.4f}\".format(val_acc))\n", - " \n", - " # create deployment artifacts\n", - " model_api = ModelAPI(X_train, model.predict(X_train))\n", - " requirements = [\"scikit-learn\"]\n", - " \n", - " # save and log model\n", - " run.log_model(model, model_api=model_api)\n", - " run.log_requirements(requirements)\n", - " run.log_training_data(X_train, y_train)\n", - " \n", - " # log Git information as code version\n", - " run.log_code()\n", - " \n", - "pool = multiprocessing.Pool()\n", - "pool.map(run_experiment, hyperparam_sets)\n", - "pool.close()" + "# create and train model\n", + "model = linear_model.LogisticRegression(**hyperparams)\n", + "model.fit(X_train, y_train)\n", + "\n", + "# calculate and log validation accuracy\n", + "val_acc = model.score(X_val_test, y_val_test)\n", + "run.log_metric(\"val_acc\", val_acc)\n", + "print(\"Validation accuracy: {:.4f}\".format(val_acc))\n", + "\n", + "# create deployment artifacts\n", + "model_api = ModelAPI(X_train, model.predict(X_train))\n", + "requirements = [\"scikit-learn\"]\n", + "\n", + "# save and log model\n", + "run.log_model(model, model_api=model_api)\n", + "run.log_requirements(requirements)\n", + "run.log_training_data(X_train, y_train)" ] }, { @@ -269,141 +516,215 @@ "---" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Revisit Workflow" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Retrieve Best Run" - ] - }, { "cell_type": "code", "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [ - "best_run = expt.expt_runs.sort(\"metrics.val_acc\", descending=True)[0]\n", - "print(\"Validation Accuracy: {:.4f}\".format(best_run.get_metric(\"val_acc\")))\n", - "\n", - "best_hyperparams = best_run.get_hyperparameters()\n", - "print(\"Hyperparameters: {}\".format(best_hyperparams))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train on Full Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "model = linear_model.LogisticRegression(multi_class='auto', **best_hyperparams)\n", - "model.fit(X_train, y_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculate Accuracy on Full Training Set" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "train_acc = model.score(X_train, y_train)\n", - "print(\"Training accuracy: {:.4f}\".format(train_acc))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Make Live Predictions" + "outputs": [ + { + "data": { + "text/plain": [ + "name: Run 558931588011935496772\n", + "url: https://dev.verta.ai/Convoliution/projects/8f7dc23e-3e0a-40b6-a885-d37f21ad810b/exp-runs/764fbf70-c579-4186-b8b6-9487f34b71a3\n", + "description: \n", + "tags: []\n", + "attributes: {}\n", + "id: 764fbf70-c579-4186-b8b6-9487f34b71a3\n", + "experiment id: cdefd3f4-b54d-434f-8e02-38a3eec7ad0b\n", + "project id: 8f7dc23e-3e0a-40b6-a885-d37f21ad810b\n", + "hyperparameters: {'C': 1e-06, 'solver': 'lbfgs', 'max_iter': 15}\n", + "observations: {}\n", + "metrics: {'val_acc': 0.7951907131011609}\n", + "artifact keys: ['model.pkl', 'requirements.txt', 'custom_modules', 'model_api.json']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "run.deploy(wait=True)\n", + "run" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, - "outputs": [], - "source": [ - "model_id = " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prepare Data" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "df_test = pd.read_csv(test_data_filename)\n", - "X_test = df_test.iloc[:,:-1]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Deployed Model" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "from verta._demo_utils import DeployedModel\n", - "\n", - "deployed_model = DeployedModel(HOST, model_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Query Deployed Model" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[0]\n", + "[0]\n", + "[1]\n", + "[0]\n", + "[1]\n", + "[0]\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdeployed_model\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_deployed_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdeployed_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/verta/verta/deployment.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, compress, max_retries, always_retry_404, always_retry_429)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0mnum_retries\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mnum_retries\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mmax_retries\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_predict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompress\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mok\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/verta/verta/deployment.py\u001b[0m in \u001b[0;36m_predict\u001b[0;34m(self, x, compress)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 172\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prediction_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 173\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompress\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_retries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malways_retry_404\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malways_retry_429\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(self, url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 576\u001b[0m \"\"\"\n\u001b[1;32m 577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 578\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'POST'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 528\u001b[0m }\n\u001b[1;32m 529\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 530\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 531\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m )\n\u001b[1;32m 451\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 670\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 671\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 672\u001b[0;31m \u001b[0mchunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunked\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 673\u001b[0m )\n\u001b[1;32m 674\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[0;31m# Python 3 (including for exceptions like SystemExit).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0;31m# Otherwise it looks like a bug in the code.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 421\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 422\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSocketTimeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseSSLError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSocketError\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_timeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mread_timeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n", + "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0;31m# Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 416\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 417\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0;31m# Remove the TypeError from the exception chain in\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1342\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1343\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1344\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1345\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1346\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 306\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 307\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 308\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 589\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 590\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1069\u001b[0m \u001b[0;34m\"non-zero flags not allowed in calls to recv_into() on %s\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1070\u001b[0m self.__class__)\n\u001b[0;32m-> 1071\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1072\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1073\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 927\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 929\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 930\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 931\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "deployed_model = run.get_deployed_model()\n", + "for _, row in X_train.iterrows():\n", + " print(deployed_model.predict([row.tolist()]))\n", + " time.sleep(2)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "for x in itertools.cycle(X_test.values.tolist()):\n", - " print(deployed_model.predict([x]))\n", - " time.sleep(.5)" + "run.undeploy()" ] }, { @@ -430,7 +751,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.5" } }, "nbformat": 4, From a6c1746e255690c1cecf8327907677e39b9960c0 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 12:27:47 -0700 Subject: [PATCH 09/28] Remove notebook --- .../workflows/demos/census-end-to-end.ipynb | 643 +++++------------- 1 file changed, 161 insertions(+), 482 deletions(-) diff --git a/client/workflows/demos/census-end-to-end.ipynb b/client/workflows/demos/census-end-to-end.ipynb index 1cc025a92c..6df10250b6 100644 --- a/client/workflows/demos/census-end-to-end.ipynb +++ b/client/workflows/demos/census-end-to-end.ipynb @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "HOST = \"dev.verta.ai\"\n", + "HOST = \"app.verta.ai\"\n", "\n", "PROJECT_NAME = \"Census Income Classification\"\n", "EXPERIMENT_NAME = \"Logistic Regression\"" @@ -152,235 +152,7 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agecapital-gaincapital-losshours-per-weekworkclass_local-govworkclass_privateworkclass_self-emp-incworkclass_self-emp-not-incworkclass_state-govworkclass_without-pay...occupation_handlers-cleanersoccupation_machine-op-inspctoccupation_other-serviceoccupation_priv-house-servoccupation_prof-specialtyoccupation_protective-servoccupation_salesoccupation_tech-supportoccupation_transport-moving>50k
0440040010000...0000000000
1210040010000...0010000000
2537298060010000...0000001001
3490040010000...0010000000
4530148540010000...0000001001
\n", - "

5 rows × 44 columns

\n", - "
" - ], - "text/plain": [ - " age capital-gain capital-loss hours-per-week workclass_local-gov \\\n", - "0 44 0 0 40 0 \n", - "1 21 0 0 40 0 \n", - "2 53 7298 0 60 0 \n", - "3 49 0 0 40 0 \n", - "4 53 0 1485 40 0 \n", - "\n", - " workclass_private workclass_self-emp-inc workclass_self-emp-not-inc \\\n", - "0 1 0 0 \n", - "1 1 0 0 \n", - "2 1 0 0 \n", - "3 1 0 0 \n", - "4 1 0 0 \n", - "\n", - " workclass_state-gov workclass_without-pay ... \\\n", - "0 0 0 ... \n", - "1 0 0 ... \n", - "2 0 0 ... \n", - "3 0 0 ... \n", - "4 0 0 ... \n", - "\n", - " occupation_handlers-cleaners occupation_machine-op-inspct \\\n", - "0 0 0 \n", - "1 0 0 \n", - "2 0 0 \n", - "3 0 0 \n", - "4 0 0 \n", - "\n", - " occupation_other-service occupation_priv-house-serv \\\n", - "0 0 0 \n", - "1 1 0 \n", - "2 0 0 \n", - "3 1 0 \n", - "4 0 0 \n", - "\n", - " occupation_prof-specialty occupation_protective-serv occupation_sales \\\n", - "0 0 0 0 \n", - "1 0 0 0 \n", - "2 0 0 1 \n", - "3 0 0 0 \n", - "4 0 0 1 \n", - "\n", - " occupation_tech-support occupation_transport-moving >50k \n", - "0 0 0 0 \n", - "1 0 0 0 \n", - "2 0 0 1 \n", - "3 0 0 0 \n", - "4 0 0 1 \n", - "\n", - "[5 rows x 44 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_train = pd.read_csv(train_data_filename)\n", "X_train = df_train.iloc[:,:-1]\n", @@ -425,19 +197,7 @@ "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "set email from environment\n", - "set developer key from environment\n", - "connection successfully established\n", - "set existing Project: Census Income Classification from personal workspace\n", - "set existing Experiment: Logistic Regression\n" - ] - } - ], + "outputs": [], "source": [ "from verta import Client\n", "from verta.utils import ModelAPI\n", @@ -460,53 +220,46 @@ "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "created new ExperimentRun: Run 558931588011935496772\n", - "{'C': 1e-06, 'solver': 'lbfgs', 'max_iter': 15} Validation accuracy: 0.7952\n", - "upload complete (custom_modules.zip)\n", - "upload complete (model.pkl)\n", - "upload complete (model_api.json)\n", - "upload complete (requirements.txt)\n" - ] - } - ], - "source": [ - "hyperparams = hyperparam_sets[0]\n", - "\n", - "# create object to track experiment run\n", - "run = client.set_experiment_run()\n", - "\n", - "# create validation split\n", - "(X_val_train, X_val_test,\n", - " y_val_train, y_val_test) = model_selection.train_test_split(X_train, y_train,\n", - " test_size=0.2,\n", - " shuffle=True)\n", - "\n", - "# log hyperparameters\n", - "run.log_hyperparameters(hyperparams)\n", - "print(hyperparams, end=' ')\n", - "\n", - "# create and train model\n", - "model = linear_model.LogisticRegression(**hyperparams)\n", - "model.fit(X_train, y_train)\n", - "\n", - "# calculate and log validation accuracy\n", - "val_acc = model.score(X_val_test, y_val_test)\n", - "run.log_metric(\"val_acc\", val_acc)\n", - "print(\"Validation accuracy: {:.4f}\".format(val_acc))\n", - "\n", - "# create deployment artifacts\n", - "model_api = ModelAPI(X_train, model.predict(X_train))\n", - "requirements = [\"scikit-learn\"]\n", + "outputs": [], + "source": [ + "def run_experiment(hyperparams):\n", + " # create object to track experiment run\n", + " run = client.set_experiment_run()\n", + " \n", + " # create validation split\n", + " (X_val_train, X_val_test,\n", + " y_val_train, y_val_test) = model_selection.train_test_split(X_train, y_train,\n", + " test_size=0.2,\n", + " shuffle=True)\n", "\n", - "# save and log model\n", - "run.log_model(model, model_api=model_api)\n", - "run.log_requirements(requirements)\n", - "run.log_training_data(X_train, y_train)" + " # log hyperparameters\n", + " run.log_hyperparameters(hyperparams)\n", + " print(hyperparams, end=' ')\n", + " \n", + " # create and train model\n", + " model = linear_model.LogisticRegression(**hyperparams)\n", + " model.fit(X_train, y_train)\n", + " \n", + " # calculate and log validation accuracy\n", + " val_acc = model.score(X_val_test, y_val_test)\n", + " run.log_metric(\"val_acc\", val_acc)\n", + " print(\"Validation accuracy: {:.4f}\".format(val_acc))\n", + " \n", + " # create deployment artifacts\n", + " model_api = ModelAPI(X_train, model.predict(X_train))\n", + " requirements = [\"scikit-learn\"]\n", + " \n", + " # save and log model\n", + " run.log_model(model, model_api=model_api)\n", + " run.log_requirements(requirements)\n", + " run.log_training_data(X_train, y_train)\n", + " \n", + " # log Git information as code version\n", + " run.log_code()\n", + " \n", + "pool = multiprocessing.Pool()\n", + "pool.map(run_experiment, hyperparam_sets)\n", + "pool.close()" ] }, { @@ -516,215 +269,141 @@ "---" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Revisit Workflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Retrieve Best Run" + ] + }, { "cell_type": "code", "execution_count": 11, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "name: Run 558931588011935496772\n", - "url: https://dev.verta.ai/Convoliution/projects/8f7dc23e-3e0a-40b6-a885-d37f21ad810b/exp-runs/764fbf70-c579-4186-b8b6-9487f34b71a3\n", - "description: \n", - "tags: []\n", - "attributes: {}\n", - "id: 764fbf70-c579-4186-b8b6-9487f34b71a3\n", - "experiment id: cdefd3f4-b54d-434f-8e02-38a3eec7ad0b\n", - "project id: 8f7dc23e-3e0a-40b6-a885-d37f21ad810b\n", - "hyperparameters: {'C': 1e-06, 'solver': 'lbfgs', 'max_iter': 15}\n", - "observations: {}\n", - "metrics: {'val_acc': 0.7951907131011609}\n", - "artifact keys: ['model.pkl', 'requirements.txt', 'custom_modules', 'model_api.json']" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "run.deploy(wait=True)\n", - "run" + "outputs": [], + "source": [ + "best_run = expt.expt_runs.sort(\"metrics.val_acc\", descending=True)[0]\n", + "print(\"Validation Accuracy: {:.4f}\".format(best_run.get_metric(\"val_acc\")))\n", + "\n", + "best_hyperparams = best_run.get_hyperparameters()\n", + "print(\"Hyperparameters: {}\".format(best_hyperparams))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train on Full Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "model = linear_model.LogisticRegression(multi_class='auto', **best_hyperparams)\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculate Accuracy on Full Training Set" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "train_acc = model.score(X_train, y_train)\n", + "print(\"Training accuracy: {:.4f}\".format(train_acc))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Make Live Predictions" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[0]\n", - "[0]\n", - "[1]\n", - "[0]\n", - "[1]\n", - "[0]\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdeployed_model\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_deployed_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdeployed_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/verta/verta/deployment.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, compress, max_retries, always_retry_404, always_retry_429)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0mnum_retries\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mnum_retries\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mmax_retries\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_predict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompress\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 213\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mok\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/verta/verta/deployment.py\u001b[0m in \u001b[0;36m_predict\u001b[0;34m(self, x, compress)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 172\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prediction_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 173\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompress\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_retries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malways_retry_404\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malways_retry_429\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(self, url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 576\u001b[0m \"\"\"\n\u001b[1;32m 577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 578\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'POST'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 528\u001b[0m }\n\u001b[1;32m 529\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 530\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 531\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m )\n\u001b[1;32m 451\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 670\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 671\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 672\u001b[0;31m \u001b[0mchunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunked\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 673\u001b[0m )\n\u001b[1;32m 674\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[0;31m# Python 3 (including for exceptions like SystemExit).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0;31m# Otherwise it looks like a bug in the code.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 421\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 422\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSocketTimeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseSSLError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSocketError\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_timeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mread_timeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n", - "\u001b[0;32m~/Documents/modeldb/client/workflows/venv-flow/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0;31m# Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 416\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 417\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0;31m# Remove the TypeError from the exception chain in\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1342\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1343\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1344\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1345\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1346\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 306\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 307\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 308\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 589\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 590\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1069\u001b[0m \u001b[0;34m\"non-zero flags not allowed in calls to recv_into() on %s\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1070\u001b[0m self.__class__)\n\u001b[0;32m-> 1071\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1072\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1073\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.5/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 927\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 929\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 930\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 931\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "deployed_model = run.get_deployed_model()\n", - "for _, row in X_train.iterrows():\n", - " print(deployed_model.predict([row.tolist()]))\n", - " time.sleep(2)" + "outputs": [], + "source": [ + "model_id = " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Data" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "df_test = pd.read_csv(test_data_filename)\n", + "X_test = df_test.iloc[:,:-1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Deployed Model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from verta._demo_utils import DeployedModel\n", + "\n", + "deployed_model = DeployedModel(HOST, model_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query Deployed Model" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "run.undeploy()" + "for x in itertools.cycle(X_test.values.tolist()):\n", + " print(deployed_model.predict([x]))\n", + " time.sleep(.5)" ] }, { @@ -751,7 +430,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.5" + "version": "3.7.3" } }, "nbformat": 4, From b36118d5854418df9205e023360f8200639eab0e Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 12:30:40 -0700 Subject: [PATCH 10/28] Add total_count --- client/verta/verta/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/client.py b/client/verta/verta/client.py index 9c21e5aff9..bde53ca401 100644 --- a/client/verta/verta/client.py +++ b/client/verta/verta/client.py @@ -3587,7 +3587,7 @@ def log_training_data(self, train_features, train_targets, overwrite=False): train_df = train_features.join(train_targets) - histograms = {'features': {}} + histograms = {'total_count': len(train_df.index), 'features': {}} for colname in train_df: histogram = _histogram_utils.calculate_histogram(train_df[colname]) histograms['features'][colname] = histogram From 05fa5475a756b0019147c814fceff13475738c87 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 13:01:01 -0700 Subject: [PATCH 11/28] Enhance docstrings --- .../verta/_internal_utils/_histogram_utils.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index bce84b44c9..4ad2c7fcdd 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -4,6 +4,19 @@ def calculate_histogram(data): + """ + Calculates a histogram for `data`. + + Parameters + ---------- + data : pandas.Series + Data to be binned. + + Returns + ------- + histogram : dict + + """ try: # binary return calculate_binary_histogram(data) except HistogramError: @@ -19,6 +32,8 @@ def calculate_histogram(data): def calculate_binary_histogram(data): """ + Calculates a histogram for binary `data`. + Parameters ---------- data : pandas.Series @@ -31,6 +46,7 @@ def calculate_binary_histogram(data): Raises ------ HistogramError + If a binary histogram cannot be calculated from `data`. """ values = data.values.tolist() @@ -70,7 +86,7 @@ def calculate_binary_histogram(data): continue # unsupported value - raise HistogramError("invalid binanry value {}".format(value)) + raise HistogramError("invalid binary value {}".format(value)) return { 'histogram': { @@ -97,6 +113,7 @@ def calculate_discrete_histogram(data): Raises ------ HistogramError + If a discrete histogram cannot be calculated from `data`. """ value_counts = data.value_counts().sort_index() From 84e5d2f167176ad4c0bf5e58d87de924fa433355 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 13:05:57 -0700 Subject: [PATCH 12/28] Add comment --- client/verta/verta/_internal_utils/_histogram_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index 4ad2c7fcdd..f63875ecea 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -142,7 +142,7 @@ def calculate_discrete_histogram(data): ) # heuristic: reject if counts don't seem high enough - if value_counts.mean() < 10: + if value_counts.mean() < 10: # `value_counts` instead of `counts` for mean() method raise HistogramError( "heuristic says that each discrete value should average at least 10 appearances" ) From d84b159f573fe2ff1e48141dd6abdad084217025 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 16:00:32 -0700 Subject: [PATCH 13/28] Add happy-path test case --- client/verta/tests/test_deployment.py | 41 +++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 6d33d766fb..073ec79e91 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -578,6 +578,47 @@ def test_dataframe(self, experiment_run, model_for_deployment): data_csv = experiment_run.get_artifact("train_data").read() assert X_train.join(y_train).to_csv(index=False) == six.ensure_str(data_csv) + def test_histogram(self, experiment_run): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + + df = pd.concat( + objs=[ + pd.Series([True]*10 + [False]*20, name='binary col'), + pd.Series([0]*5 + [1]*10 + [2]*15, name='discrete col'), + pd.Series(range(30), name='continuous col'), + ], + axis='columns', + ) + X = df[['binary col', 'discrete col']] + y = df['continuous col'] + + # TODO: generate a histogram w/ discrete using Historical Data Processor + histograms = { + 'features': { + 'binary col': {'histogram': {'binary': {'count': [20, 10]}}, 'type': 'binary'}, + 'discrete col': {'histogram': {'discrete': { + 'bucket_values': [0, 1, 2], + 'count': [5, 10, 15]}}, 'type': 'discrete'}, + 'continuous col': {'histogram': {'float': { + 'bucket_limits': [0, 2.9, 5.8, 8.7, 11.6, 14.5, 17.4, 20.3, 23.2, 26.099999999999998, 29], + 'count': [3, 3, 3, 3, 3, 3, 3, 3, 3, 2]}}, 'type': 'float'}, + }, + 'total_count': 30, + } + + experiment_run.log_training_data(X, y) + endpoint = "{}://{}/api/v1/monitoring/data/references/{}".format( + experiment_run._conn.scheme, + experiment_run._conn.socket, + experiment_run.id, + ) + response = _utils.make_request("GET", endpoint, experiment_run._conn) + _utils.raise_for_http_error(response) + generated_histograms = response.json() + + assert generated_histograms == histograms + @pytest.mark.not_oss class TestDeploy: From 35a1d5501109fab4d92ea69e3c675e33c7c4e56f Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 17:13:48 -0700 Subject: [PATCH 14/28] Remove TODO --- client/verta/tests/test_config.py | 15 +++++++++++++++ client/verta/tests/test_deployment.py | 1 - 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 client/verta/tests/test_config.py diff --git a/client/verta/tests/test_config.py b/client/verta/tests/test_config.py new file mode 100644 index 0000000000..f28e667d36 --- /dev/null +++ b/client/verta/tests/test_config.py @@ -0,0 +1,15 @@ +import os +import shutil +import tempfile + +import pytest + +from verta._internal_utils import _config_utils + + +class TestRead: + def test_merge(self, tempdir): + pass + + def test_merge_overwrite(self, tempdir): + pass diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 073ec79e91..4a1bfaf061 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -593,7 +593,6 @@ def test_histogram(self, experiment_run): X = df[['binary col', 'discrete col']] y = df['continuous col'] - # TODO: generate a histogram w/ discrete using Historical Data Processor histograms = { 'features': { 'binary col': {'histogram': {'binary': {'count': [20, 10]}}, 'type': 'binary'}, From 6afaccfb177a6deea06ba2b508daf9f0acf3f3b7 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 17:14:50 -0700 Subject: [PATCH 15/28] Remove accidental addition --- client/verta/tests/test_config.py | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 client/verta/tests/test_config.py diff --git a/client/verta/tests/test_config.py b/client/verta/tests/test_config.py deleted file mode 100644 index f28e667d36..0000000000 --- a/client/verta/tests/test_config.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -import shutil -import tempfile - -import pytest - -from verta._internal_utils import _config_utils - - -class TestRead: - def test_merge(self, tempdir): - pass - - def test_merge_overwrite(self, tempdir): - pass From 2bf5a4830674bbbdd5ecedc670ad60fcbf1af799 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 17:33:47 -0700 Subject: [PATCH 16/28] Raise error for invalid cols --- .../verta/_internal_utils/_histogram_utils.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index f63875ecea..ce16ff1e8e 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -173,15 +173,25 @@ def calculate_float_histogram(data, num_bins=10): histogram : dict """ + values = data.values.tolist() + + # reject non-numbers + try: + values = list(map(float, values)) + except ValueError: + raise TypeError( + "unable to generate histogram from non-numeric column {}".format(data.name) + ) + # calculate bin boundaries - start, stop = min(data), max(data) + start, stop = min(values), max(values) space = (stop - start)/num_bins bin_boundaries = [start + space*i for i in range(num_bins+1)] # fit `data` into bins reference_counts = [] for l, r in zip(bin_boundaries[:-1], bin_boundaries[1:]): - count = len([datum for datum in data if l <= datum < r]) + count = len([value for value in values if l <= value < r]) reference_counts.append(count) return { From 51b50365c4283d429ede0c80d8070c5ce758b4ad Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 21:22:34 -0700 Subject: [PATCH 17/28] Ensure float histogram bins incl max val --- client/verta/verta/_internal_utils/_histogram_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index ce16ff1e8e..aed9fc96f2 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -186,7 +186,8 @@ def calculate_float_histogram(data, num_bins=10): # calculate bin boundaries start, stop = min(values), max(values) space = (stop - start)/num_bins - bin_boundaries = [start + space*i for i in range(num_bins+1)] + bin_boundaries = [start + space*i for i in range(num_bins)] + bin_boundaries.append(stop) # ensure last bin captures max value # fit `data` into bins reference_counts = [] From 57401f0e0bf1ff4b8817c6032cacb9a570195138 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Mon, 27 Apr 2020 22:03:43 -0700 Subject: [PATCH 18/28] Ensure float histogram counts incl max val --- client/verta/verta/_internal_utils/_histogram_utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index aed9fc96f2..db9b05d917 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -187,13 +187,18 @@ def calculate_float_histogram(data, num_bins=10): start, stop = min(values), max(values) space = (stop - start)/num_bins bin_boundaries = [start + space*i for i in range(num_bins)] - bin_boundaries.append(stop) # ensure last bin captures max value + # ensure last bin covers max value + bin_boundaries.append(stop) # fit `data` into bins reference_counts = [] - for l, r in zip(bin_boundaries[:-1], bin_boundaries[1:]): + bin_windows = list(zip(bin_boundaries[:-1], bin_boundaries[1:])) + for l, r in bin_windows[:-1]: # handle last bin shortly count = len([value for value in values if l <= value < r]) reference_counts.append(count) + # ensure last bin includes max value + count = len([value for value in values if bin_boundaries[-2] <= value]) + reference_counts.append(count) return { 'histogram': { From 40af00ecdf78b1e2a1bf2cc6adf95bc9be671529 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 08:37:54 -0700 Subject: [PATCH 19/28] Move full df-to-hist logic to util --- .../verta/_internal_utils/_histogram_utils.py | 24 ++++++++++++++++++- client/verta/verta/client.py | 5 +--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index db9b05d917..256e317130 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -3,7 +3,29 @@ from ..external import six -def calculate_histogram(data): +def calculate_histograms(df): + """ + Calculates histograms for the columns of `df`. + + Parameters + ---------- + df : pandas.DataFrame + Data to be binned. + + Returns + ------- + histograms : dict + + """ + histograms = {'total_count': len(df.index), 'features': {}} + for colname in df: + histogram = calculate_single_histogram(df[colname]) + histograms['features'][colname] = histogram + + return histograms + + +def calculate_single_histogram(data): """ Calculates a histogram for `data`. diff --git a/client/verta/verta/client.py b/client/verta/verta/client.py index bde53ca401..3db9c4a998 100644 --- a/client/verta/verta/client.py +++ b/client/verta/verta/client.py @@ -3587,10 +3587,7 @@ def log_training_data(self, train_features, train_targets, overwrite=False): train_df = train_features.join(train_targets) - histograms = {'total_count': len(train_df.index), 'features': {}} - for colname in train_df: - histogram = _histogram_utils.calculate_histogram(train_df[colname]) - histograms['features'][colname] = histogram + histograms = _histogram_utils.calculate_histograms(train_df) endpoint = "{}://{}/api/v1/monitoring/data/references/{}".format( self._conn.scheme, From e9ce4a4bba47f5a9994f3d510f6f41788fad1502 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 08:41:54 -0700 Subject: [PATCH 20/28] Fix bug --- client/verta/verta/_internal_utils/_histogram_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/_internal_utils/_histogram_utils.py b/client/verta/verta/_internal_utils/_histogram_utils.py index 256e317130..07b969852a 100644 --- a/client/verta/verta/_internal_utils/_histogram_utils.py +++ b/client/verta/verta/_internal_utils/_histogram_utils.py @@ -20,7 +20,7 @@ def calculate_histograms(df): histograms = {'total_count': len(df.index), 'features': {}} for colname in df: histogram = calculate_single_histogram(df[colname]) - histograms['features'][colname] = histogram + histograms['features'][str(colname)] = histogram # TODO: directly store non-str column names return histograms From 564e818f0a8772ae31d4626c2ac9ab309efbba57 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 09:11:33 -0700 Subject: [PATCH 21/28] Add unit tests --- client/verta/tests/test_deployment.py | 106 ++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 4a1bfaf061..9c44ea4417 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -15,6 +15,7 @@ import requests import verta +from verta._internal_utils import _histogram_utils from verta._internal_utils import _utils @@ -619,6 +620,111 @@ def test_histogram(self, experiment_run): assert generated_histograms == histograms +class TestHistogram: + @staticmethod + def assert_histograms_match_dataframe(histograms, df): + """Common assertions for this test suite.""" + # features match + assert set(histograms['features'].keys()) == set(df.columns) + # all rows counted + assert histograms['total_count'] == len(df.index) + + for feature_name, histogram in histograms['features'].items(): + series = df[feature_name] + histogram_type = histogram['type'] + histogram_data = histogram['histogram'][histogram_type] + + # all data points counted + assert sum(histogram_data['count']) == len(series) + + if histogram_type == "binary": + num_false = sum(~series) + num_true = sum(series) + + assert histogram_data['count'] == [num_false, num_true] + elif histogram_type == "discrete": + buckets = histogram_data['bucket_values'] + counts = histogram_data['count'] + + # buckets in ascending order + assert buckets == list(sorted(buckets)) + + # data within buckets + assert all(buckets[0] <= series) + assert all(series <= buckets[-1]) + + # counts correct + for value, count in zip(buckets, counts): + assert sum(series == value) == count + elif histogram_type == "float": + buckets = histogram_data['bucket_limits'] + counts = histogram_data['count'] + + # buckets in ascending order + assert buckets == list(sorted(buckets)) + + # data within buckets + assert all(buckets[0] <= series) + assert all(series <= buckets[-1]) + + # counts correct + bin_windows = list(zip(buckets[:-1], buckets[1:])) + for i, (l, r) in enumerate(bin_windows[:-1]): + assert sum((l <= series) & (series < r)) == counts[i] + assert sum(buckets[-2] <= series) == counts[-1] + + def test_binary(self): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + num_rows = 30 + + df = pd.concat( + objs=[ + pd.Series(np.random.random(size=num_rows).round().astype(bool), name="A"), + pd.Series(np.random.random(size=num_rows).round().astype(bool), name="B"), + pd.Series(np.random.random(size=num_rows).round().astype(bool), name="C"), + ], + axis='columns', + ) + histograms = _histogram_utils.calculate_histograms(df) + + self.assert_histograms_match_dataframe(histograms, df) + + def test_discrete(self): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + num_rows = 30 + + df = pd.concat( + objs=[ + pd.Series(np.random.randint(0, 12, size=num_rows), name="A"), + pd.Series(np.random.randint(-12, -6, size=num_rows), name="B"), + pd.Series(np.random.randint(-30, 24, size=num_rows), name="C"), + ], + axis='columns', + ) + histograms = _histogram_utils.calculate_histograms(df) + + self.assert_histograms_match_dataframe(histograms, df) + + def test_float(self): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + num_rows = 30 + + df = pd.concat( + objs=[ + pd.Series(np.random.normal(loc=9, size=num_rows), name="A"), + pd.Series(np.random.normal(scale=12, size=num_rows), name="B"), + pd.Series(np.random.normal(loc=-3, scale=6, size=num_rows), name="C"), + ], + axis='columns', + ) + histograms = _histogram_utils.calculate_histograms(df) + + self.assert_histograms_match_dataframe(histograms, df) + + @pytest.mark.not_oss class TestDeploy: def test_auto_path_auto_token_deploy(self, experiment_run, model_for_deployment): From 33075a272f9c78e6014ba983a110293a2be7bfcc Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 09:17:16 -0700 Subject: [PATCH 22/28] Add assertion --- client/verta/tests/test_deployment.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 9c44ea4417..fca0e064f3 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -676,7 +676,7 @@ def assert_histograms_match_dataframe(histograms, df): def test_binary(self): np = pytest.importorskip("numpy") pd = pytest.importorskip("pandas") - num_rows = 30 + num_rows = 90 df = pd.concat( objs=[ @@ -688,29 +688,39 @@ def test_binary(self): ) histograms = _histogram_utils.calculate_histograms(df) + assert all( + histogram['type'] == "binary" + for histogram + in histograms['features'].values() + ) self.assert_histograms_match_dataframe(histograms, df) def test_discrete(self): np = pytest.importorskip("numpy") pd = pytest.importorskip("pandas") - num_rows = 30 + num_rows = 90 df = pd.concat( objs=[ - pd.Series(np.random.randint(0, 12, size=num_rows), name="A"), + pd.Series(np.random.randint(6, 12, size=num_rows), name="A"), pd.Series(np.random.randint(-12, -6, size=num_rows), name="B"), - pd.Series(np.random.randint(-30, 24, size=num_rows), name="C"), + pd.Series(np.random.randint(-3, 3, size=num_rows), name="C"), ], axis='columns', ) histograms = _histogram_utils.calculate_histograms(df) + assert all( + histogram['type'] == "discrete" + for histogram + in histograms['features'].values() + ) self.assert_histograms_match_dataframe(histograms, df) def test_float(self): np = pytest.importorskip("numpy") pd = pytest.importorskip("pandas") - num_rows = 30 + num_rows = 90 df = pd.concat( objs=[ @@ -722,6 +732,11 @@ def test_float(self): ) histograms = _histogram_utils.calculate_histograms(df) + assert all( + histogram['type'] == "float" + for histogram + in histograms['features'].values() + ) self.assert_histograms_match_dataframe(histograms, df) From be089c9d41cfbbf073f3e9444d2a0612f221dbae Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 09:39:57 -0700 Subject: [PATCH 23/28] Fix e2e test --- client/verta/tests/test_deployment.py | 89 +++++++++++++++------------ 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index fca0e064f3..4f7332bb46 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -579,46 +579,6 @@ def test_dataframe(self, experiment_run, model_for_deployment): data_csv = experiment_run.get_artifact("train_data").read() assert X_train.join(y_train).to_csv(index=False) == six.ensure_str(data_csv) - def test_histogram(self, experiment_run): - np = pytest.importorskip("numpy") - pd = pytest.importorskip("pandas") - - df = pd.concat( - objs=[ - pd.Series([True]*10 + [False]*20, name='binary col'), - pd.Series([0]*5 + [1]*10 + [2]*15, name='discrete col'), - pd.Series(range(30), name='continuous col'), - ], - axis='columns', - ) - X = df[['binary col', 'discrete col']] - y = df['continuous col'] - - histograms = { - 'features': { - 'binary col': {'histogram': {'binary': {'count': [20, 10]}}, 'type': 'binary'}, - 'discrete col': {'histogram': {'discrete': { - 'bucket_values': [0, 1, 2], - 'count': [5, 10, 15]}}, 'type': 'discrete'}, - 'continuous col': {'histogram': {'float': { - 'bucket_limits': [0, 2.9, 5.8, 8.7, 11.6, 14.5, 17.4, 20.3, 23.2, 26.099999999999998, 29], - 'count': [3, 3, 3, 3, 3, 3, 3, 3, 3, 2]}}, 'type': 'float'}, - }, - 'total_count': 30, - } - - experiment_run.log_training_data(X, y) - endpoint = "{}://{}/api/v1/monitoring/data/references/{}".format( - experiment_run._conn.scheme, - experiment_run._conn.socket, - experiment_run.id, - ) - response = _utils.make_request("GET", endpoint, experiment_run._conn) - _utils.raise_for_http_error(response) - generated_histograms = response.json() - - assert generated_histograms == histograms - class TestHistogram: @staticmethod @@ -739,6 +699,55 @@ def test_float(self): ) self.assert_histograms_match_dataframe(histograms, df) + def test_integration(self, experiment_run): + np = pytest.importorskip("numpy") + pd = pytest.importorskip("pandas") + + binary_col_name = 'binary col' + discrete_col_name = 'discrete col' + float_col_name = 'float col' + df = pd.concat( + objs=[ + pd.Series([True]*10 + [False]*20, name=binary_col_name), + pd.Series([0]*5 + [1]*10 + [2]*15, name=discrete_col_name), + pd.Series(range(30), name=float_col_name), + ], + axis='columns', + ) + histograms = _histogram_utils.calculate_histograms(df) + + experiment_run.log_training_data(df[[binary_col_name, discrete_col_name]], df[float_col_name]) + endpoint = "{}://{}/api/v1/monitoring/data/references/{}".format( + experiment_run._conn.scheme, + experiment_run._conn.socket, + experiment_run.id, + ) + response = _utils.make_request("GET", endpoint, experiment_run._conn) + _utils.raise_for_http_error(response) + retrieved_histograms = response.json() + + # features match + features = histograms['features'] + retrieved_features = retrieved_histograms['features'] + assert set(features.keys()) == set(retrieved_features.keys()) + + # binary matches + binary_hist = histograms['features'][binary_col_name]['histogram']['binary'] + retrieved_binary_hist = retrieved_histograms['features'][binary_col_name]['histogram']['binary'] + assert binary_hist['count'] == retrieved_binary_hist['count'] + + # discrete matches + discrete_hist = histograms['features'][discrete_col_name]['histogram']['discrete'] + retrieved_discrete_hist = retrieved_histograms['features'][discrete_col_name]['histogram']['discrete'] + assert discrete_hist['bucket_values'] == retrieved_discrete_hist['bucket_values'] + assert discrete_hist['count'] == retrieved_discrete_hist['count'] + + # float matches + float_hist = histograms['features'][float_col_name]['histogram']['float'] + retrieved_float_hist = retrieved_histograms['features'][float_col_name]['histogram']['float'] + assert all(np.isclose(float_hist['bucket_limits'], retrieved_float_hist['bucket_limits'])) + assert float_hist['count'] == retrieved_float_hist['count'] + @pytest.mark.not_oss class TestDeploy: From e2cecdf2d8f8fda8936bb8624759ae2080bedf0a Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 10:12:38 -0700 Subject: [PATCH 24/28] Clarify variables --- client/verta/tests/test_deployment.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 4f7332bb46..06ef32a4fd 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -595,7 +595,8 @@ def assert_histograms_match_dataframe(histograms, df): histogram_data = histogram['histogram'][histogram_type] # all data points counted - assert sum(histogram_data['count']) == len(series) + counts = histogram_data['count'] + assert sum(counts) == len(series) if histogram_type == "binary": num_false = sum(~series) @@ -604,7 +605,6 @@ def assert_histograms_match_dataframe(histograms, df): assert histogram_data['count'] == [num_false, num_true] elif histogram_type == "discrete": buckets = histogram_data['bucket_values'] - counts = histogram_data['count'] # buckets in ascending order assert buckets == list(sorted(buckets)) @@ -617,21 +617,20 @@ def assert_histograms_match_dataframe(histograms, df): for value, count in zip(buckets, counts): assert sum(series == value) == count elif histogram_type == "float": - buckets = histogram_data['bucket_limits'] - counts = histogram_data['count'] + limits = histogram_data['bucket_limits'] - # buckets in ascending order - assert buckets == list(sorted(buckets)) + # limits in ascending order + assert limits == list(sorted(limits)) - # data within buckets - assert all(buckets[0] <= series) - assert all(series <= buckets[-1]) + # data within limits + assert all(limits[0] <= series) + assert all(series <= limits[-1]) # counts correct - bin_windows = list(zip(buckets[:-1], buckets[1:])) + bin_windows = list(zip(limits[:-1], limits[1:])) for i, (l, r) in enumerate(bin_windows[:-1]): assert sum((l <= series) & (series < r)) == counts[i] - assert sum(buckets[-2] <= series) == counts[-1] + assert sum(limits[-2] <= series) == counts[-1] def test_binary(self): np = pytest.importorskip("numpy") From 4bb299474697e44a35a1dd9c046e6f389ac42356 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 10:19:17 -0700 Subject: [PATCH 25/28] Check leftmost and rightmost buckets --- client/verta/tests/test_deployment.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 06ef32a4fd..72481855d7 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -584,6 +584,8 @@ class TestHistogram: @staticmethod def assert_histograms_match_dataframe(histograms, df): """Common assertions for this test suite.""" + np = pytest.importorskip("numpy") + # features match assert set(histograms['features'].keys()) == set(df.columns) # all rows counted @@ -613,6 +615,10 @@ def assert_histograms_match_dataframe(histograms, df): assert all(buckets[0] <= series) assert all(series <= buckets[-1]) + # appropriate leftmost and rightmost buckets + assert buckets[0] == series.min() + assert buckets[-1] == series.max() + # counts correct for value, count in zip(buckets, counts): assert sum(series == value) == count @@ -626,6 +632,10 @@ def assert_histograms_match_dataframe(histograms, df): assert all(limits[0] <= series) assert all(series <= limits[-1]) + # appropriate leftmost and rightmost limits + assert np.isclose(limits[0], series.min()) + assert np.isclose(limits[-1], series.max()) + # counts correct bin_windows = list(zip(limits[:-1], limits[1:])) for i, (l, r) in enumerate(bin_windows[:-1]): From 53eb7f9d978c934261a6e9116abb8ce6f4371378 Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 10:26:19 -0700 Subject: [PATCH 26/28] Finish e2cecdf2 --- client/verta/tests/test_deployment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 72481855d7..04fbb4ead0 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -604,7 +604,7 @@ def assert_histograms_match_dataframe(histograms, df): num_false = sum(~series) num_true = sum(series) - assert histogram_data['count'] == [num_false, num_true] + assert counts == [num_false, num_true] elif histogram_type == "discrete": buckets = histogram_data['bucket_values'] From 964cd07c9fdb6d02859ebc76121cb07cfcc4ba5e Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 12:05:31 -0700 Subject: [PATCH 27/28] Add additional bucket checks --- client/verta/tests/test_deployment.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 04fbb4ead0..77c10e4e32 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -619,6 +619,10 @@ def assert_histograms_match_dataframe(histograms, df): assert buckets[0] == series.min() assert buckets[-1] == series.max() + # all buckets have data + # NOTE: this might not be behavior that we want in the future + assert all(counts) + # counts correct for value, count in zip(buckets, counts): assert sum(series == value) == count @@ -636,6 +640,13 @@ def assert_histograms_match_dataframe(histograms, df): assert np.isclose(limits[0], series.min()) assert np.isclose(limits[-1], series.max()) + # buckets equal in size + bucket_sizes = np.diff(limits) + assert np.allclose(bucket_sizes, bucket_sizes[0]) + + # correct number of buckets + assert len(limits) == 11 + # counts correct bin_windows = list(zip(limits[:-1], limits[1:])) for i, (l, r) in enumerate(bin_windows[:-1]): From 6549648332229a9f177d9619b6062fe53fef8fff Mon Sep 17 00:00:00 2001 From: Michael Liu Date: Tue, 28 Apr 2020 15:17:34 -0700 Subject: [PATCH 28/28] Clarify variable --- client/verta/tests/test_deployment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/verta/tests/test_deployment.py b/client/verta/tests/test_deployment.py index 77c10e4e32..5bc27668d0 100644 --- a/client/verta/tests/test_deployment.py +++ b/client/verta/tests/test_deployment.py @@ -640,9 +640,9 @@ def assert_histograms_match_dataframe(histograms, df): assert np.isclose(limits[0], series.min()) assert np.isclose(limits[-1], series.max()) - # buckets equal in size - bucket_sizes = np.diff(limits) - assert np.allclose(bucket_sizes, bucket_sizes[0]) + # buckets equal in width + bucket_widths = np.diff(limits) + assert np.allclose(bucket_widths, bucket_widths[0]) # correct number of buckets assert len(limits) == 11