# ID/Primary Key

In [1]:
import pandas as pd

In [7]:
import great_expectations as ge
from great_expectations.core import (
    ExpectationConfiguration,
    ExpectationValidationResult,
)
from great_expectations.core.batch import Batch, BatchRequest
from great_expectations.core.batch_spec import SqlAlchemyDatasourceBatchSpec
from great_expectations.core.util import convert_to_json_serializable
from great_expectations.data_context.util import file_relative_path
from great_expectations.datasource.data_connector import ConfiguredAssetSqlDataConnector
from great_expectations.execution_engine import (
    PandasExecutionEngine,
    SparkDFExecutionEngine,
    SqlAlchemyExecutionEngine,
)
from great_expectations.expectations.core import ExpectColumnValuesToBeInSet
from great_expectations.expectations.metrics import (
    ColumnMax,
    ColumnValuesNonNull,
    CompoundColumnsUnique,
)
from great_expectations.expectations.metrics.map_metric_provider import (
    ColumnMapMetricProvider,
    MapMetricProvider,
)
from great_expectations.validator.validation_graph import MetricConfiguration
from great_expectations.validator.validator import Validator


# Dataframe with 3 columns

`animals` column contains 6 animals names, 3 of which are domestic animals (`cat`, `fish`, `dog`) and 3 that are not (`giraffe`, `lion`, `zebra`). It also has 2 columns that can be used as primary key columns (`pk_1` and `pk_2`)

In [31]:
my_df: pd.DataFrame = pd.DataFrame(
        {
            "pk_1": [0, 1, 2, 3, 4, 5],
            "pk_2": ["zero", "one", "two", "three", "four", "five"],
            "animals": [
                "cat",
                "fish",
                "dog",
                "giraffe",
                "lion",
                "zebra",
            ],
        }
    )

In [32]:
my_df

Unnamed: 0,pk_1,pk_2,animals
0,0,zero,cat
1,1,one,fish
2,2,two,dog
3,3,three,giraffe
4,4,four,lion
5,5,five,zebra


### Configurations

In [39]:
expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "animals",
            "value_set": ["cat", "fish", "dog"],
            "result_format": {
                "result_format": "COMPLETE",
                 #"include_unexpected_rows": True,  # for all the rows
            },
        },
    )


* `result_format`: `COMPLETE`
* This will output `partial_unexpected_index_list` and `unexpected_index_list`

In [24]:
expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "animals",
            "value_set": ["cat", "fish", "dog"],
            "result_format": {
                "result_format": "SUMMARY",
                #"include_unexpected_rows": True,  # for all the rows
            },
        },
    )

* `result_format`: `SUMMARY`
* This will output `partial_unexpected_index_list`

In [34]:
expectation_configuration = ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_in_set",
        kwargs={
            "column": "animals",
            "value_set": ["cat", "fish", "dog"],
            "result_format": {
                "result_format": "COMPLETE",
                 "unexpected_index_column_names": ["pk_1", "pk_2"],  # Single column
            },
        },
    )

* `result_format`: `COMPLETE`
* `unexpected_index_column_names` : `pk_1`

### Validation Code

In [40]:
expectation = ExpectColumnValuesToBeInSet(expectation_configuration)
batch: Batch = Batch(data=my_df)
engine = PandasExecutionEngine()
validator = Validator(
    execution_engine=engine,
    batches=[
        batch,
    ],
)
result = expectation.validate(validator)

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

In [41]:
result.result

{'element_count': 6,
 'unexpected_count': 3,
 'unexpected_percent': 50.0,
 'partial_unexpected_list': ['giraffe', 'lion', 'zebra'],
 'missing_count': 0,
 'missing_percent': 0.0,
 'unexpected_percent_total': 50.0,
 'unexpected_percent_nonmissing': 50.0,
 'partial_unexpected_index_list': [3, 4, 5],
 'partial_unexpected_counts': [{'value': 'giraffe', 'count': 1},
  {'value': 'lion', 'count': 1},
  {'value': 'zebra', 'count': 1}],
 'unexpected_list': ['giraffe', 'lion', 'zebra'],
 'unexpected_index_list': [3, 4, 5]}

# Relevant PRs


* [SQL - under review](https://github.com/great-expectations/great_expectations/pull/6448)
* [Pandas - merged](https://github.com/great-expectations/great_expectations/pull/6329)

In [None]:
df[df[pk1]==[3,4,5]]