docs update 0.10.2 (#978)

capitalone · Jul 28, 2023 · 0420368 · 0420368
1 parent 8a7f9cc
commit 0420368
Show file tree

Hide file tree

Showing 297 changed files with 73,700 additions and 1 deletion.
diff --git a/docs/0.10.2/doctrees/API.doctree b/docs/0.10.2/doctrees/API.doctree
diff --git a/docs/0.10.2/doctrees/add_new_model_to_data_labeler.doctree b/docs/0.10.2/doctrees/add_new_model_to_data_labeler.doctree
diff --git a/docs/0.10.2/doctrees/column_name_labeler_example.doctree b/docs/0.10.2/doctrees/column_name_labeler_example.doctree
diff --git a/docs/0.10.2/doctrees/data_labeling.doctree b/docs/0.10.2/doctrees/data_labeling.doctree
diff --git a/docs/0.10.2/doctrees/data_reader.doctree b/docs/0.10.2/doctrees/data_reader.doctree
diff --git a/docs/0.10.2/doctrees/data_readers.doctree b/docs/0.10.2/doctrees/data_readers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.avro_data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.avro_data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.base_data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.base_data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.csv_data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.csv_data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.data_utils.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.data_utils.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.filepath_or_buffer.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.filepath_or_buffer.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.graph_data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.graph_data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.json_data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.json_data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.parquet_data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.parquet_data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.structured_mixins.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.structured_mixins.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.data_readers.text_data.doctree b/docs/0.10.2/doctrees/dataprofiler.data_readers.text_data.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.doctree b/docs/0.10.2/doctrees/dataprofiler.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.dp_logging.doctree b/docs/0.10.2/doctrees/dataprofiler.dp_logging.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.base_data_labeler.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.base_data_labeler.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.base_model.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.base_model.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.char_load_tf_model.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.char_load_tf_model.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.character_level_cnn_model.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.character_level_cnn_model.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.classification_report_utils.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.classification_report_utils.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.column_name_model.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.column_name_model.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.data_labelers.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.data_labelers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.data_processing.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.data_processing.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.labeler_utils.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.labeler_utils.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.regex_model.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.regex_model.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.labelers.utils.doctree b/docs/0.10.2/doctrees/dataprofiler.labelers.utils.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.base_column_profilers.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.base_column_profilers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.categorical_column_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.categorical_column_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.column_profile_compilers.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.column_profile_compilers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.data_labeler_column_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.data_labeler_column_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.datetime_column_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.datetime_column_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.float_column_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.float_column_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.graph_profiler.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.graph_profiler.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.helpers.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.helpers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.helpers.report_helpers.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.helpers.report_helpers.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.histogram_utils.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.histogram_utils.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.int_column_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.int_column_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.json_decoder.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.json_decoder.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.json_encoder.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.json_encoder.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.numerical_column_stats.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.numerical_column_stats.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.order_column_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.order_column_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.profile_builder.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.profile_builder.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.profiler_options.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.profiler_options.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.text_column_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.text_column_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.unstructured_labeler_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.unstructured_labeler_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.unstructured_text_profile.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.unstructured_text_profile.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.profilers.utils.doctree b/docs/0.10.2/doctrees/dataprofiler.profilers.utils.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.reports.doctree b/docs/0.10.2/doctrees/dataprofiler.reports.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.reports.graphs.doctree b/docs/0.10.2/doctrees/dataprofiler.reports.graphs.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.reports.utils.doctree b/docs/0.10.2/doctrees/dataprofiler.reports.utils.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.settings.doctree b/docs/0.10.2/doctrees/dataprofiler.settings.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.validators.base_validators.doctree b/docs/0.10.2/doctrees/dataprofiler.validators.base_validators.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.validators.doctree b/docs/0.10.2/doctrees/dataprofiler.validators.doctree
diff --git a/docs/0.10.2/doctrees/dataprofiler.version.doctree b/docs/0.10.2/doctrees/dataprofiler.version.doctree
diff --git a/docs/0.10.2/doctrees/environment.pickle b/docs/0.10.2/doctrees/environment.pickle
diff --git a/docs/0.10.2/doctrees/examples.doctree b/docs/0.10.2/doctrees/examples.doctree
diff --git a/docs/0.10.2/doctrees/graph_data_demo.doctree b/docs/0.10.2/doctrees/graph_data_demo.doctree
diff --git a/docs/0.10.2/doctrees/graphs.doctree b/docs/0.10.2/doctrees/graphs.doctree
diff --git a/docs/0.10.2/doctrees/index.doctree b/docs/0.10.2/doctrees/index.doctree
diff --git a/docs/0.10.2/doctrees/install.doctree b/docs/0.10.2/doctrees/install.doctree
diff --git a/docs/0.10.2/doctrees/labeler.doctree b/docs/0.10.2/doctrees/labeler.doctree
diff --git a/docs/0.10.2/doctrees/merge_profile_list.doctree b/docs/0.10.2/doctrees/merge_profile_list.doctree
diff --git a/docs/0.10.2/doctrees/modules.doctree b/docs/0.10.2/doctrees/modules.doctree
diff --git a/docs/0.10.2/doctrees/nbsphinx/add_new_model_to_data_labeler.ipynb b/docs/0.10.2/doctrees/nbsphinx/add_new_model_to_data_labeler.ipynb
diff --git a/docs/0.10.2/doctrees/nbsphinx/column_name_labeler_example.ipynb b/docs/0.10.2/doctrees/nbsphinx/column_name_labeler_example.ipynb
@@ -0,0 +1,364 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e04c382a-7c49-452b-b9bf-e448951c64fe",
+   "metadata": {},
+   "source": [
+    "# ColumnName Labeler Tutorial"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6fb3ecb9-bc51-4c18-93d5-7991bbee5165",
+   "metadata": {},
+   "source": [
+    "This notebook teaches how to use the existing `ColumnNameModel`:\n",
+    "\n",
+    "1. Loading and utilizing the pre-existing `ColumnNameModel`\n",
+    "2. Run the labeler\n",
+    "\n",
+    "First, let's import the libraries needed for this example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a67c197b-d3ee-4896-a96f-cc3d043601d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import json\n",
+    "from pprint import pprint\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "try:\n",
+    "    import dataprofiler as dp\n",
+    "except ImportError:\n",
+    "    sys.path.insert(0, '../..')\n",
+    "    import dataprofiler as dp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "35841215",
+   "metadata": {},
+   "source": [
+    "## Loading and predicting using a pre-existing model using `load_from_library`\n",
+    "\n",
+    "The easiest option for users is to `load_from_library` by specifying the name for the labeler in the `resources/` folder. Quickly import and start predicting with any model from the Data Profiler's library of models available."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46e36dd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labeler_from_library = dp.DataLabeler.load_from_library('column_name_labeler')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfa94868",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labeler_from_library.predict(data=[\"ssn\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c71356f4-9020-4862-a1e1-816effbb5443",
+   "metadata": {},
+   "source": [
+    "## Loading and using the pre-existing column name labeler using `load_with_components`\n",
+    "\n",
+    "For example purposes here, we will import the exsting `ColumnName` labeler via the `load_with_components` command from the `dp.DataLabeler`. This shows a bit more of the details of the data labeler's flow."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "818c5b88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parameters = {\n",
+    "            \"true_positive_dict\": [\n",
+    "                {\"attribute\": \"ssn\", \"label\": \"ssn\"},\n",
+    "                {\"attribute\": \"suffix\", \"label\": \"name\"},\n",
+    "                {\"attribute\": \"my_home_address\", \"label\": \"address\"},\n",
+    "            ],\n",
+    "            \"false_positive_dict\": [\n",
+    "                {\n",
+    "                    \"attribute\": \"contract_number\",\n",
+    "                    \"label\": \"ssn\",\n",
+    "                },\n",
+    "                {\n",
+    "                    \"attribute\": \"role\",\n",
+    "                    \"label\": \"name\",\n",
+    "                },\n",
+    "                {\n",
+    "                    \"attribute\": \"send_address\",\n",
+    "                    \"label\": \"address\",\n",
+    "                },\n",
+    "            ],\n",
+    "            \"negative_threshold_config\": 50,\n",
+    "            \"positive_threshold_config\": 85,\n",
+    "            \"include_label\": True,\n",
+    "        }\n",
+    "\n",
+    "label_mapping = {\"ssn\": 1, \"name\": 2, \"address\": 3}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9098329e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pre processor \n",
+    "preprocessor = dp.labelers.data_processing.DirectPassPreprocessor()\n",
+    "\n",
+    "# model\n",
+    "from dataprofiler.labelers.column_name_model import ColumnNameModel\n",
+    "model = ColumnNameModel(\n",
+    "    parameters=parameters,\n",
+    "    label_mapping=label_mapping,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# post processor\n",
+    "postprocessor = dp.labelers.data_processing.ColumnNameModelPostprocessor()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "113d6655-4bca-4d8e-9e6f-b972e29d5684",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_labeler = dp.DataLabeler.load_with_components(\n",
+    "    preprocessor=preprocessor,\n",
+    "    model=model,\n",
+    "    postprocessor=postprocessor,\n",
+    ")\n",
+    "data_labeler.model.help()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b405887-2b92-44ca-b8d7-29c384f6dd9c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pprint(data_labeler.label_mapping)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11916a48-098c-4056-ac6c-b9542d85fa86",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pprint(data_labeler.model._parameters)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da0e97ee-8d6d-4631-9b55-78ed904d5f41",
+   "metadata": {},
+   "source": [
+    "### Predicting with the ColumnName labeler\n",
+    "\n",
+    "In the prediction below, the data will be passed into to stages in the background\n",
+    "- 1) `compare_negative`: The idea behind the `compare_negative` is to first filter out any possibility of flagging a false positive in the model prediction. In this step, the confidence value is checked and if the similarity is too close to being a false positive, that particular string in the `data` is removed and not returned to the `compare_positive`.\n",
+    "- 2) `compare_positive`: Finally the `data` is passed to the `compare_positive` step and checked for similarity with the the `true_positive_dict` values. Again, during this stage the `positive_threshold_config` is used to filter the results to only those `data` values that are greater than or equal to the `positive_threshold_config` provided by the user."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe519e65-36a7-4f42-8314-5369de8635c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# evaluate a prediction using the default parameters\n",
+    "data_labeler.predict(data=[\"ssn\", \"name\", \"address\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b41d834d-e47b-45a6-8970-d2d2033e2ade",
+   "metadata": {},
+   "source": [
+    "## Replacing the parameters in the existing labeler\n",
+    "\n",
+    "We can achieve this by:\n",
+    "1. Setting the label mapping to the new labels\n",
+    "2. Setting the model parameters which include: `true_positive_dict`, `false_positive_dict`, `negative_threshold_config`, `positive_threshold_config`, and `include_label`\n",
+    "\n",
+    "where `true_positive_dict` and `false_positive_dict` are `lists` of `dicts`, `negative_threshold_config` and `positive_threshold_config` are integer values between `0` and `100`, and `include_label` is a `boolean` value that determines if the output should include the prediction labels or only the confidence values."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c6bb010a-406f-4fd8-abd0-3355a5ad0ded",
+   "metadata": {},
+   "source": [
+    "Below, we created 4 labels where `other` is the `default_label`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f86584cf-a7af-4bae-bf44-d87caa68833a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_labeler.set_labels({'other': 0, \"funky_one\": 1, \"funky_two\": 2, \"funky_three\": 3})\n",
+    "data_labeler.model.set_params(\n",
+    "    true_positive_dict= [\n",
+    "                {\"attribute\": \"ssn\", \"label\": \"funky_one\"},\n",
+    "                {\"attribute\": \"suffix\", \"label\": \"funky_two\"},\n",
+    "                {\"attribute\": \"my_home_address\", \"label\": \"funky_three\"},\n",
+    "            ],\n",
+    "    false_positive_dict=[\n",
+    "                {\n",
+    "                    \"attribute\": \"contract_number\",\n",
+    "                    \"label\": \"ssn\",\n",
+    "                },\n",
+    "                {\n",
+    "                    \"attribute\": \"role\",\n",
+    "                    \"label\": \"name\",\n",
+    "                },\n",
+    "                {\n",
+    "                    \"attribute\": \"not_my_address\",\n",
+    "                    \"label\": \"address\",\n",
+    "                },\n",
+    "            ],\n",
+    "    negative_threshold_config=50,\n",
+    "    positive_threshold_config=85,\n",
+    "    include_label=True,\n",
+    ")\n",
+    "data_labeler.label_mapping"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1ece1c8c-18a5-46fc-b563-6458e6e71e53",
+   "metadata": {},
+   "source": [
+    "### Predicting with the new labels\n",
+    "\n",
+    "Here we are testing the `predict()` method with brand new labels for label_mapping. As we can see the new labels flow throught to the output of the data labeler."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92842e14-2ea6-4879-b58c-c52b607dc94c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_labeler.predict(data=[\"ssn\", \"suffix\"], predict_options=dict(show_confidences=True))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "261b903f-8f4c-403f-839b-ab8813f850e9",
+   "metadata": {},
+   "source": [
+    "## Saving the Data Labeler for future use"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b6ffbaf2-9400-486a-ba83-5fc9ba9334d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.isdir('new_column_name_labeler'):\n",
+    "    os.mkdir('new_column_name_labeler')\n",
+    "data_labeler.save_to_disk('new_column_name_labeler')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "09e40cb6-9d89-41c4-ae28-3dca498f8c68",
+   "metadata": {},
+   "source": [
+    "## Loading the saved Data Labeler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52615b25-70a6-4ebb-8a32-14aaf1e747d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "saved_labeler = dp.DataLabeler.load_from_disk('new_column_name_labeler')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1ccc0b3-1dc2-4847-95c2-d6b8769b1590",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ensuring the parametesr are what we saved.\n",
+    "print(\"label_mapping:\")\n",
+    "pprint(saved_labeler.label_mapping)\n",
+    "print(\"\\nmodel parameters:\")\n",
+    "pprint(saved_labeler.model._parameters)\n",
+    "print()\n",
+    "print(\"postprocessor: \" + saved_labeler.postprocessor.__class__.__name__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c827f2ae-4af6-4f3f-9651-9ee9ebea9fa0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# predicting with the loaded labeler.\n",
+    "saved_labeler.predict([\"ssn\", \"name\", \"address\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}