# Packages

In [None]:
import json

# Dummy Data

The ingest data should be a JSON formatted string. Additions should be made to increase flexibility including incorporating nested dictionaries and arrays, converting other types of null values automatically ("nan", "NaN", "", ...), and potentially strings with multiple JSON objects ("[{},{}]", "{}{}" or "{},{}")

In [None]:
ingestData = '''{
    "expected_str": "A",
    "expected_int": 1,
    "expected_float": 1.0,
    "expected_none": null,
    "expected_incorrectType": "AA",
    "unexpected": "B"
    }'''

# Dummmy Data Model

The data model defines the expected data fields and their types to exist in the JSON string

In [None]:
dataModel = {
    "expected_str": str,
    "expected_int": int,
    "expected_float": float,
    "expected_none": str,
    "expected_incorrectType": int,
    "expected_missing": str
}

# Data Ingestion Class

The data ingestion class can utilize custom exceptions to catch errors in development, but should not catch exceptions from any deviations between the data and the data model

In [None]:
class ingestDataTypeError(Exception):
    """
    Incorrect ingest data type received for the ingestData parameter
    """
    def __init__(self, typeReceived):
        message = f"The ingestData of the JSONDataIngestion must be a string, but received a {typeReceived}"
        super().__init__(message)

class dataModelTypeError(Exception):
    """
    Incorrect data model type received for the dataModel parameter
    """
    def __init__(self, typeReceived):
        message = f"The dataModel of the JSONDataIngestion must be a dictionary, but received a {typeReceived}"
        super().__init__(message)

The data ingestion class parses and formats the incoming data, validates the data when compared to the data model, and outputs any deviations between the data and the data model

In [None]:
class JSONDataIngestion:
    """
    The JSON data ingestion class
    """
    
    def __init__(self, ingestData: str, dataModel: dict):
        """
        JSON data ingestion constructor

        Parameters
        ----------
        ingestData: str
            The raw JSON formatted string
        dataModel: str
            The data model

        Notes
        -----
        ex. ingestData: "{'field1': 'A', 'field2': 1, 'field3': 1.0}"
        ex. dataModel: {'field1': str, 'field2': int, 'field3': float, ...}
        """
        # Validating input data types
        if type(ingestData) != str:
            raise ingestDataTypeError(type(ingestData))
        if type(dataModel) != dict:
            raise dataModelTypeError(type(dataModel))
        # Storing raw ingested data
        self._ingestData = ingestData
        # Storing data model
        self._dataModel = dataModel
        # Ingestion validation feedback
        self._validation = {
            'incorrectJSONFormat': False,
            'incorrectDataType': [],
            'unexpectedData': [],
            'expectedData': []
        }
        # Formatted data output
        self._data = {}
        # Ingest data
        self.ingest()
        
    def _get_ingestData(self):
        """
        ingestData property getter method
        """
        return self._ingestData

    ingestData = property(
        fget=_get_ingestData,
        doc="The raw JSON formatted string"
    )

    def _get_dataModel(self):
        """
        dataModel property getter method
        """
        return self._dataModel

    dataModel = property(
        fget=_get_dataModel,
        doc="The data model"
    )

    def _get_data(self):
        """
        data property getter method
        """
        return self._data

    data = property(
        fget=_get_data,
        doc="The formatted data"
    )

    def _get_validation(self):
        """
        The validation property getter method
        """
        return self._validation

    validation = property(
        fget=_get_validation,
        doc="The ingestion validation feedback"
    )

    def ingest(self):
        """
        JSON data ingestion method
        """
        # ___ Load data and model ___
        ingestData = self._ingestData
        dataModel = self._dataModel
        # ___ Convert raw JSON string to object ___
        # Try to parse the ingested data string
        try:
            data = json.loads(ingestData)
        # If ingested data string can not be parsed as a JSON object
        except json.JSONDecodeError:
            # Set the validation feedback for incorrect JSON format
            self._validation['incorrectJSONFormat'] = True
            # No data will be returned
            self._data = None
            return
        # All other exceptions will be raised
        except Exception as e:
            raise(e)
        # ___ Validate data in ingested data ___
        # Check every entry in the ingested data string
        for key, value in data.items():
            # Set the output data to the value received
            self._data[key] = value
            # If there is a matching key in the model
            if key in dataModel.keys():
                # If the data in the key does not match the type of data in the model or is None
                if (type(value) not in [dataModel[key], type(None)]):
                    # Set the validation feedback to incorrect data type received
                    self._validation['incorrectDataType'].append({
                        key: type(value)
                    })
            # If there is not a matching key in the model
            else:
                # Set the validation feedback to unexpected data received
                self._validation['unexpectedData'].append(key)
        # ___ Validate data expected from model ___
        # Check every entry in the model
        for key, value in dataModel.items():
            # If key was not received in the ingested data
            if key not in self._data.keys():
                # Set the output data to None since it was not received
                self._data[key] = None
                # Set the validation feedback to data not received
                self._validation['expectedData'].append(key)

# Example Usage

## Creating data model object

In [None]:
dummyModel = JSONDataIngestion(ingestData, dataModel)

## Accessing private properties

Private fields in the model can be accessed through the public properties

In [None]:
print(dummyModel.ingestData)

## Setting private properties

Public properties can not be modified after the data is ingested to prevent any data corruption. Python does not prevent the alteration of private fields, but best practice is to not modify any private fields.

In [None]:
try:
    dummyModel.ingestData = "{'field1': 'A'}"
except AttributeError:
    print("Private property can not be overwritten")
except Exception as e:
    print(e)

## Data output and validation

The formatted data and validation can be accessed through the properties and are available immediately after ingestion

In [None]:
print("Output Data:")
print(dummyModel.data, end="\n\n")
print("Data Validation:")
print(dummyModel.validation)

# Error Catching

## Incorrect JSON format

Any errors that exist due to the format of the data should not be raised as exceptions, but should be logged in the validation

In [None]:
poorFormatJSON = JSONDataIngestion("{'field1': 'A'", {'field1': str})
print("Data Output:")
print(poorFormatJSON.data, end="\n\n")
print("Validation Output:")
print(poorFormatJSON.validation)

## Incorrect input data types

Exceptions that would be caused due to incorrect use of the data ingestion method should raise exceptions as these should be caught during development

In [None]:
try:
    poorIngestDataType = JSONDataIngestion({'A': 'a'}, {'A': str})
except ingestDataTypeError as e:
    print(e)
except Exception as e:
    print(e)

In [None]:
try:
    poorDataModel = JSONDataIngestion("{'A': 'a'}", "{'A': str}")
except ingestDataTypeError as e:
    print(e)
except Exception as e:
    print(e)