In [1]:
import pyspark as ps
from pyspark.sql import SparkSession 

In [2]:
'''
"usepool": false, indicates that Connection Pooling is turned off. A sub-optimal deployment.
"cacheEnabled": false, = caching has been disabled. another sub-optimal configuration.
and finally the rules section must have a rule to enable r/w split;
all three of these being present confirms the proxy is configured 
to support each of those features: connection pooling, caching, and r/w split.
Then the performance of each of those features can be examined.
'''

'\n"usepool": false, indicates that Connection Pooling is turned off. A sub-optimal deployment.\n"cacheEnabled": false, = caching has been disabled. another sub-optimal configuration.\nand finally the rules section must have a rule to enable r/w split;\n    {\n      "enabled": true,\n      "type": "V",\n      "patterns": [\n        "(?i)^select"\n      ],\n      "rowPatterns": [],\n      "operator": "AND",\n      "columnNameOperator": "AND",\n      "intrans": false,\n      "properties": {}\n    },\n\nall three of these being present confirms the proxy is configured \nto support each of those features: connection pooling, caching, and r/w split.\nThen the performance of each of those features can be examined.\n'

In [32]:
class configHandler:
    def __init__(
        self, usepool_status=True, cacheStatus=True, rwStatus=True, authMode=True
    ) -> None:
        self.ups = usepool_status
        self.cs = cacheStatus
        self.rws = rwStatus
        self.am = authMode

    # builds a message that contains sanity check result. Returns empty string if passes check.
    def get_msg(self) -> str:
        content_to_send = []
        if not self.ups:
            content_to_send.append(
                'Connection Pooling is turned off. ("usepool": false) in config.'
            )
        if not self.cs:
            content_to_send.append(
                'Caching is disabled. ("cacheEnabled": false) in config.'
            )
        if not self.rws:
            content_to_send.append(
                "Read/write split is disabled. Check configuration file rule section."
            )
        if not self.am:
            content_to_send.append(
                "Authentication mode is not 'passthrough', username and password need to be separately configured in the vdb."
            )
        return "\n".join(content_to_send)
        

In [30]:
# function to perform basic sanity checks on a configuration file
def sanity_check(conf: ps.sql.DataFrame) -> configHandler:
    # check usepool
    usepool_status = conf.select("sources").collect()[0]["sources"][0]["usepool"]
    # chceck cache
    cacheStatus = conf.select("vdbs").collect()[0]["vdbs"][0]["cacheEnabled"]
    # check for proper r/w rules
    rw_rules = conf.select("rules").collect()[0]["rules"][0]["rules"]
    rwStatus = False
    for rule in rw_rules:
        if "(?i)^select" in rule["patterns"]:
            rwStatus = True
            break
    # check whether authMode is passthrough 
    authMode = conf.select("vdbs").collect()[0]["vdbs"][0]["authMode"] == 'passthrough'
    return configHandler(usepool_status, cacheStatus, rwStatus, authMode)

In [13]:
spark = SparkSession.builder.appName('Config Checker').getOrCreate()
path = '../../data/sample_log_folders/talview-heimdall-logs-202309181309/core-01-vdb_1.conf'
config = spark.read.option('multiLine', 'true').json(path)

In [33]:
config_handler = sanity_check(config)
config_handler.get_msg()

False


"Authentication mode is not 'passthrough', username and password need to be separately configured in the vdb."

In [34]:
spark.stop()