In [2]:
!pip install great_expectations==1.3.0 pandas opentelemetry-api==1.28.2 opentelemetry-distro==0.49b2 opentelemetry-exporter-otlp==1.28.2 opentelemetry-exporter-otlp-proto-grpc==1.28.2 opentelemetry-sdk==1.28.2 opentelemetry-semantic-conventions==0.49b2 opentelemetry-util-http==0.49b2 wrapt==1.16.0


Collecting great_expectations==1.3.0
  Downloading great_expectations-1.3.0-py3-none-any.whl.metadata (8.5 kB)
Collecting pandas
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting opentelemetry-api==1.28.2
  Downloading opentelemetry_api-1.28.2-py3-none-any.whl.metadata (1.4 kB)
Collecting opentelemetry-distro==0.49b2
  Downloading opentelemetry_distro-0.49b2-py3-none-any.whl.metadata (1.6 kB)
Collecting opentelemetry-exporter-otlp==1.28.2
  Downloading opentelemetry_exporter_otlp-1.28.2-py3-none-any.whl.metadata (2.3 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc==1.28.2
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.28.2-py3-none-any.whl.metadata (2.2 kB)
Collecting opentelemetry-sdk==1.28.2
  Downloading opentelemetry_sdk-1.28.2-py3-none-any.whl.metadata (1.5 kB)
C

In [3]:
def data_source_definition(context, data_source_name):
    return context.data_sources.add_pandas(data_source_name)

def data_asset_definition(data_source, data_asset_name):
    return data_source.add_dataframe_asset(name=data_asset_name)

def whole_batch_definition(data_asset, batch_definition_name):
    return data_asset.add_batch_definition_whole_dataframe(batch_definition_name)

def suite_definition(context, data_product_name, suite_name, suite_expectations):
    suite = gx.ExpectationSuite(name=suite_name)
    suite = context.suites.add(suite)
    for exp in suite_expectations:
        ec = ExpectationConfiguration(
            type=exp["expectation_type"], 
            kwargs=exp["kwargs"], 
            meta={
                "expectation_name": exp["expectation_name"],
                "data_product_name": data_product_name
            }
        )
        suite.add_expectation_configuration(ec)
    suite.save()

    return suite

def validation_definition(context, batch_definition, suite):
    print()
    validation = gx.ValidationDefinition(data=batch_definition, suite=suite, name=suite.name)
    return context.validation_definitions.add(validation)

def validation_run(df, validation_definition):
    return validation_definition.run(batch_parameters={"dataframe": df})

In [4]:
import random
import time
from datetime import datetime
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
from opentelemetry.metrics import get_meter, Observation
from opentelemetry.sdk.resources import Resource
import logging
import pandas as pd
import great_expectations as gx
#from great_expectations_setup.gx_dataframe import *
#from great_expectations_setup.expectations import *
import logging
from opentelemetry import metrics

logging.basicConfig(level=logging.INFO)
context = gx.get_context()

#sidecar_name = data_product_name + "-quality_sidecar"
# resource = Resource.create({"service.name": sidecar_name})
# exporter = ConsoleMetricExporter()
# metric_reader = PeriodicExportingMetricReader(exporter)
# provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
# get_meter.__globals__["_METER_PROVIDER"] = provider 
# meter = get_meter(sidecar_name, "1.0.0")

meter = metrics.get_meter(__name__)

def setup_gx(data_product_suites): 
    validation_defs = []

    for data_product_suite in data_product_suites:
        
        physical_informations = data_product_suite["physical_informations"]
        suite_name = data_product_name + "-" + physical_informations["data_source_name"] + "-" + physical_informations["data_asset_name"]

        data_source = data_source_definition(context, physical_informations["data_source_name"])
        data_asset = data_asset_definition(data_source, physical_informations["data_asset_name"])
        batch_definition = whole_batch_definition(data_asset, suite_name)

        suite = suite_definition(context, data_product_name, suite_name, data_product_suite["expectations"])
        validation_def = validation_definition(context, batch_definition, suite)
        validation_defs.append(validation_def)

        # Create an ObservableGauge
        observable_gauge = meter.create_observable_gauge(
            name=suite_name,
            description=f"Validation results for suite: {suite_name}",
            unit="string",
            callbacks=[run_validation_callback(validation_def, suite_name, physical_informations["data_source_name"], physical_informations["data_asset_name"], pd.read_csv(physical_informations["dataframe"]))]
        )

    return validation_defs

def run_validation_callback(validation_def, suite_name, data_source_name, data_asset_name, df):
    def callback(options):
        validation_results = validation_run(df=df, validation_definition=validation_def)
        
        observations = []
        
        for validation_result in validation_results["results"]:
            result = validation_result["result"]
            expectation_config = validation_result["expectation_config"]
            meta = expectation_config["meta"]

            #print(f"Validation result: {validation_result}")
            observation = Observation(
                value=validation_result["success"],
                attributes={
                    "element_count": result["element_count"],
                    "unexpected_count": result["unexpected_count"],
                    "expectation_name": meta["expectation_name"],
                    "suite_name": suite_name,
                    "data_source_name": data_source_name,
                    "data_asset_name": data_asset_name#,
                    #"timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                }
            )

            observations.append(observation)

        return observations

    return callback 

if __name__ == "__main__":
    logging.info("Starting the application...")
    logging.info("Setting up GreatExpectations...")
    validation_defs = setup_gx(data_product_suites)
    
    try:
        while True:
            time.sleep(5)  
    except:
        logging.info("Shutting down the application...")


ModuleNotFoundError: No module named 'great_expectations_setup'