# Load conf files

## Input API

In [1]:
import tomli
import re
from uuid import uuid4

inputs_conf = 'config/inputs.conf'
connections_conf = 'config/connections.conf'

with open(inputs_conf) as f:
    tmp = f.read()
    # .conf files aren't exactly TOML, so we need to format it a bit
    # first before we can load it
    tmp_formatted = re.sub(r"(\s*=\s*)(.*)", r"\1'\2'", tmp)
    tomli_inputs = tomli.loads(tmp_formatted).items()

with open(connections_conf) as f:
    tmp = f.read()
    tmp_formatted = re.sub(r"(\s*=\s*)(.*)", r"\1'\2'", tmp)
    tomli_connections = tomli.loads(tmp_formatted).items()

In [30]:
tomli_inputs

dict_items([('assignmentgroups', {'connection': 'TOOLX_P', 'disabled': '0', 'host': 'host1', 'index': 'indexx', 'index_time_mode': 'current', 'interval': '0 6 * * *', 'max_rows': '100000', 'mode': 'batch', 'query': 'SELECT * FROM "OWNER"."ASSIGNMENT_GROUPS_SPLUNK"', 'source': 'toolx', 'sourcetype': 'assignmentgroups'}), ('CI_relations', {'connection': 'TOOLX_P', 'disabled': '0', 'host': 'host1', 'index': 'indexx', 'index_time_mode': 'current', 'interval': '0 5 * * *', 'max_rows': '1000000', 'mode': 'batch', 'query': 'SELECT * FROM "OWNER"."RELATIONSHIP_SPLUNK"', 'source': 'toolx', 'sourcetype': 'ci_relations'}), ('config_full', {'connection': 'TOOLY_P', 'description': 'Get all config daily', 'disabled': '0', 'host': 'host2', 'index': 'indexy', 'index_time_mode': 'current', 'interval': '0 4 * * *', 'max_rows': '1000000', 'mode': 'batch', 'query': 'SELECT * FROM "OWNER"."CONFIG_SPLUNK"', 'query_timeout': '300', 'source': 'tooly', 'sourcetype': 'tooly'})])

In [12]:
dict(tomli_inputs)#['assignmentgroups']['connection']

{'assignmentgroups': {'connection': 'TOOLX_P',
  'disabled': '0',
  'host': 'host1',
  'index': 'indexx',
  'index_time_mode': 'current',
  'interval': '0 6 * * *',
  'max_rows': '100000',
  'mode': 'batch',
  'query': 'SELECT * FROM "OWNER"."ASSIGNMENT_GROUPS_SPLUNK"',
  'source': 'toolx',
  'sourcetype': 'assignmentgroups'},
 'CI_relations': {'connection': 'TOOLX_P',
  'disabled': '0',
  'host': 'host1',
  'index': 'indexx',
  'index_time_mode': 'current',
  'interval': '0 5 * * *',
  'max_rows': '1000000',
  'mode': 'batch',
  'query': 'SELECT * FROM "OWNER"."RELATIONSHIP_SPLUNK"',
  'source': 'toolx',
  'sourcetype': 'ci_relations'},
 'config_full': {'connection': 'TOOLY_P',
  'description': 'Get all config daily',
  'disabled': '0',
  'host': 'host2',
  'index': 'indexy',
  'index_time_mode': 'current',
  'interval': '0 4 * * *',
  'max_rows': '1000000',
  'mode': 'batch',
  'query': 'SELECT * FROM "OWNER"."CONFIG_SPLUNK"',
  'query_timeout': '300',
  'source': 'tooly',
  'sourcet

In [13]:
dict(tomli_connections)#.keys()

{'TOOLX_P': {'connection_type': 'oracle_service',
  'customizedJdbcUrl': 'jdbc:oracle:thin:@(DESCRIPTION_LIST=(LOAD_BALANCE=off)(FAILOVER=on)(DESCRIPTION=(CONNECT_TIMEOUT=5)(TRANSPORT_CONNECT_TIMEOUT=3)(RETRY_COUNT=3)(ADDRESS_LIST=(LOAD_BALANCE=on)(ADDRESS=(PROTOCOL=TCP)(HOST=host.sample.com)(PORT=39000)))(CONNECT_DATA=(SERVICE_NAME=DATABASESAMPLENAME)))(DESCRIPTION=(CONNECT_TIMEOUT=5)(TRANSPORT_CONNECT_TIMEOUT=3)(RETRY_COUNT=3)(ADDRESS_LIST=(LOAD_BALANCE=on)(ADDRESS=(PROTOCOL=TCP)(HOST=host.sample.com)(PORT=39000)))(CONNECT_DATA=(SERVICE_NAME=DATABASESAMPLENAME))))',
  'database': 'DATABASESAMPLENAME',
  'disabled': '0',
  'host': 'host.sample.com',
  'identity': 'TOOLX_P',
  'jdbcUseSSL': 'false',
  'localTimezoneConversionEnabled': 'false',
  'port': '39000',
  'readonly': 'true',
  'timezone': 'Europe/Amsterdam'},
 'TOOLY_P': {'connection_type': 'oracle_service',
  'database': 'DATABASESAMPLENAMEY',
  'disabled': '0',
  'host': 'host2.cinqict.nl',
  'identity': 'TOOLY_P',
  'jdbcUs

Inputs{connection} -> connections[...]

# Re organize data to make 2 batches, one for the Input API and another for Connections API

## Input API

In [2]:
merged_data = {}
dict_inputs = dict(tomli_inputs)
dict_connections = dict(tomli_connections)

for key, input_value in dict_inputs.items():
    connection_key = input_value.get("connection")
    connection_data = dict_connections.get(connection_key, {})
    merged_data[key] = {**input_value, **connection_data}

# Convert values

## schedule.enabled -> disabled

Because Splunk talks about disabled and Cribl about enabled the states need to be transformed to the opposite value. Cribl only allows true or false booleans where Splunk allows 0 and 1 numbers as well; these need to be transformed.

| From | To |
| :---- | :---- |
| true | false |
| false | true |
| 1 | false |
| 0 | true |


In [3]:
def transform_disabled_values(data):
    for key, sub_dict in data.items():
        if 'disabled' in sub_dict:
            value = sub_dict['disabled']
            if isinstance(value, str):
                value = value.lower()
            if value in ['true', 1, True, '1']:
                sub_dict['disabled'] = False
            elif value in ['false', 0, False, '0']:
                sub_dict['disabled'] = True
    return data

transformed_merged_data = transform_disabled_values(merged_data)


In [4]:
tomli_input = transformed_merged_data.items()

In [5]:
tomli_input

dict_items([('assignmentgroups', {'connection': 'TOOLX_P', 'disabled': True, 'host': 'host.sample.com', 'index': 'indexx', 'index_time_mode': 'current', 'interval': '0 6 * * *', 'max_rows': '100000', 'mode': 'batch', 'query': 'SELECT * FROM "OWNER"."ASSIGNMENT_GROUPS_SPLUNK"', 'source': 'toolx', 'sourcetype': 'assignmentgroups', 'connection_type': 'oracle_service', 'customizedJdbcUrl': 'jdbc:oracle:thin:@(DESCRIPTION_LIST=(LOAD_BALANCE=off)(FAILOVER=on)(DESCRIPTION=(CONNECT_TIMEOUT=5)(TRANSPORT_CONNECT_TIMEOUT=3)(RETRY_COUNT=3)(ADDRESS_LIST=(LOAD_BALANCE=on)(ADDRESS=(PROTOCOL=TCP)(HOST=host.sample.com)(PORT=39000)))(CONNECT_DATA=(SERVICE_NAME=DATABASESAMPLENAME)))(DESCRIPTION=(CONNECT_TIMEOUT=5)(TRANSPORT_CONNECT_TIMEOUT=3)(RETRY_COUNT=3)(ADDRESS_LIST=(LOAD_BALANCE=on)(ADDRESS=(PROTOCOL=TCP)(HOST=host.sample.com)(PORT=39000)))(CONNECT_DATA=(SERVICE_NAME=DATABASESAMPLENAME))))', 'database': 'DATABASESAMPLENAME', 'identity': 'TOOLX_P', 'jdbcUseSSL': 'false', 'localTimezoneConversionEnabl

# Schemas that include aliases

## Inputs API Schema

In [6]:
from pydantic import BaseModel, Field, AliasChoices
from typing import List, Optional
schema = BaseModel

get the right input schema according to curl. Include default values. Include aliases.

In [None]:
class Metadata(BaseModel):
    name: str
    value: str 
class Run(BaseModel):
    mode: str
    
class Schedule(BaseModel):
    cronSchedule: str = Field(alias='interval') 
    run: Run
    enabled: bool = Field(alias ='disabled')

class CollectorConf(BaseModel):
    connectionId: str = Field(alias='connection')
    query: str

class Collector(BaseModel):
    conf: CollectorConf
    type: str = "database"

class InputType(BaseModel):
    type: str = "collection"
    metadata: List[Metadata] = [
        Metadata(name="host", value=Field("default_host", alias="host")),
        Metadata(name="index", value=Field("default_index", alias="index")),
        Metadata(name="source", value=Field("default_source", alias="source")),
        Metadata(name="sourcetype", value=Field("default_sourcetype", alias="sourcetype"))
    ]

class InputSchema(BaseModel):
    type: Optional[str] =  "collection"
    schedule: Schedule
    collector: Collector
    input: InputType
    id: Optional[str]

result = [InputSchema(**row, id=f"{key}-{uuid4()}") for key, row in tomli_input]

In [7]:
from pydantic import BaseModel, Field
from typing import List, Optional
from uuid import uuid4

class Metadata(BaseModel):
    name: str
    value: str

class Run(BaseModel):
    mode: str

class Schedule(BaseModel):
    cronSchedule: str = Field(alias='interval')
    run: Run
    enabled: bool = Field(alias='disabled')

class CollectorConf(BaseModel):
    connectionId: str = Field(alias='connection')
    query: str

class Collector(BaseModel):
    conf: CollectorConf
    type: str = "database"

class InputType(BaseModel):
    type: str = "collection"
    metadata: List[Metadata]

class InputSchema(BaseModel):
    type: Optional[str] = "collection"
    schedule: Schedule
    collector: Collector
    input: InputType
    id: Optional[str]

# Transform `tomli_input` data to match `metadata` requirements
def create_metadata(data):
    return [
        Metadata(name="host", value=data["host"]),
        Metadata(name="index", value=data["index"]),
        Metadata(name="source", value=data["source"]),
        Metadata(name="sourcetype", value=data["sourcetype"])
    ]

# Parsing tomli_input to create InputSchema instances
result = [
    InputSchema(
        schedule=Schedule(
            interval=row['interval'],
            run=Run(mode=row['mode']),
            disabled=row['disabled']
        ),
        collector=Collector(
            conf=CollectorConf(
                connection=row['connection'],
                query=row['query']
            )
        ),
        input=InputType(
            metadata=create_metadata(row)
        ),
        id=f"{key}-{uuid4()}"
    )
    for key, row in tomli_input
]


In [61]:
result[0]

InputSchema(type='collection', schedule=Schedule(cronSchedule='0 6 * * *', run=Run(mode='batch'), enabled=True), collector=Collector(conf=CollectorConf(connectionId='TOOLX_P', query='SELECT * FROM "OWNER"."ASSIGNMENT_GROUPS_SPLUNK"'), type='database'), input=InputType(type='collection', metadata=[Metadata(name='host', value='host.sample.com'), Metadata(name='index', value='indexx'), Metadata(name='source', value='toolx'), Metadata(name='sourcetype', value='assignmentgroups')]), id='assignmentgroups-a6d1d721-d100-4e9d-81b3-908cef17bd20')

In [18]:
def to_dict(obj):
    """Recursively converts an object to a dictionary."""
    if isinstance(obj, list):
        return [to_dict(item) for item in obj]
    elif hasattr(obj, "__dict__"):
        result = {key: to_dict(value) for key, value in vars(obj).items()}
        return result
    else:
        return obj


input_schema_dict = to_dict(result[0])
input_schema_dict['input']['metadata'] = [
    {'name': meta.name, 'value': meta.value} for meta in result[0].input.metadata
]
input_schema_dict

{'type': 'collection',
 'schedule': {'cronSchedule': '0 6 * * *',
  'run': {'mode': 'batch'},
  'enabled': True},
 'collector': {'conf': {'connectionId': 'TOOLX_P',
   'query': 'SELECT * FROM "OWNER"."ASSIGNMENT_GROUPS_SPLUNK"'},
  'type': 'database'},
 'input': {'type': 'collection',
  'metadata': [{'name': 'host', 'value': 'host.sample.com'},
   {'name': 'index', 'value': 'indexx'},
   {'name': 'source', 'value': 'toolx'},
   {'name': 'sourcetype', 'value': 'assignmentgroups'}]},
 'id': 'assignmentgroups-b2482a16-d719-445d-ba78-4d0936e14474'}

In [8]:
payload = [{'items': [{'type': 'collection', 'schedule': {'cronSchedule': '0 6 * * *', 'run': {'mode': 'batch'}, 'enabled': True}, 'collector': {'conf': {'connectionId': 'default_connection', 'query': '"SELECT * FROM OWNER.CONFIG_SPLUNK"'}, 'type': 'database'}, 'input': {'type': 'collection', 'metadata': [{'name': 'index', 'value': 'default_index'}, {'name': 'sourcetype', 'value': 'default_sourcetype'}, {'name': 'source', 'value': 'default_source'}]}, 'connection': 'TOOLX_P', 'disabled': 0, 'host': 'host1', 'index': 'indexx', 'index_time_mode': 'current', 'interval': '0 6 * * *', 'max_rows': 100000, 'mode': 'batch', 'query': 'SELECT * FROM "OWNER"."ASSIGNMENT_GROUPS_SPLUNK"', 'source': 'toolx', 'sourcetype': 'assignmentgroups', 'id': 'assignmentgroups-d4d48e50-c394-45fa-a226-2ff5585f8a0c', 'history': [], 'savedState': {}}], 'count': 1}, {'items': [{'type': 'collection', 'schedule': {'cronSchedule': '0 6 * * *', 'run': {'mode': 'batch'}, 'enabled': True}, 'collector': {'conf': {'connectionId': 'default_connection', 'query': '"SELECT * FROM OWNER.CONFIG_SPLUNK"'}, 'type': 'database'}, 'input': {'type': 'collection', 'metadata': [{'name': 'index', 'value': 'default_index'}, {'name': 'sourcetype', 'value': 'default_sourcetype'}, {'name': 'source', 'value': 'default_source'}]}, 'connection': 'TOOLX_P', 'disabled': 0, 'host': 'host1', 'index': 'indexx', 'index_time_mode': 'current', 'interval': '0 5 * * *', 'max_rows': 1000000, 'mode': 'batch', 'query': 'SELECT * FROM "OWNER"."RELATIONSHIP_SPLUNK"', 'source': 'toolx', 'sourcetype': 'ci_relations', 'id': 'CI_relations-ecdb0b99-e8ec-4a60-9c03-b6042c84671d', 'history': [], 'savedState': {}}], 'count': 1}, {'items': [{'type': 'collection', 'schedule': {'cronSchedule': '0 6 * * *', 'run': {'mode': 'batch'}, 'enabled': True}, 'collector': {'conf': {'connectionId': 'default_connection', 'query': '"SELECT * FROM OWNER.CONFIG_SPLUNK"'}, 'type': 'database'}, 'input': {'type': 'collection', 'metadata': [{'name': 'index', 'value': 'default_index'}, {'name': 'sourcetype', 'value': 'default_sourcetype'}, {'name': 'source', 'value': 'default_source'}]}, 'connection': 'TOOLY_P', 'disabled': 0, 'host': 'host2', 'index': 'indexy', 'index_time_mode': 'current', 'interval': '0 4 * * *', 'max_rows': 1000000, 'mode': 'batch', 'query': 'SELECT * FROM "OWNER"."CONFIG_SPLUNK"', 'source': 'tooly', 'sourcetype': 'tooly', 'id': 'config_full-e90ca558-3782-4c74-89fe-4b758b257f4f', 'history': [], 'savedState': {}}], 'count': 1}]


In [10]:
payload[1]

{'items': [{'type': 'collection',
   'schedule': {'cronSchedule': '0 6 * * *',
    'run': {'mode': 'batch'},
    'enabled': True},
   'collector': {'conf': {'connectionId': 'default_connection',
     'query': '"SELECT * FROM OWNER.CONFIG_SPLUNK"'},
    'type': 'database'},
   'input': {'type': 'collection',
    'metadata': [{'name': 'index', 'value': 'default_index'},
     {'name': 'sourcetype', 'value': 'default_sourcetype'},
     {'name': 'source', 'value': 'default_source'}]},
   'connection': 'TOOLX_P',
   'disabled': 0,
   'host': 'host1',
   'index': 'indexx',
   'index_time_mode': 'current',
   'interval': '0 5 * * *',
   'max_rows': 1000000,
   'mode': 'batch',
   'query': 'SELECT * FROM "OWNER"."RELATIONSHIP_SPLUNK"',
   'source': 'toolx',
   'sourcetype': 'ci_relations',
   'id': 'CI_relations-ecdb0b99-e8ec-4a60-9c03-b6042c84671d',
   'history': [],
   'savedState': {}}],
 'count': 1}

In [None]:
class ConnectionSchema(Basemodel):
    id: str
    databaseType: str
    username: str
    password: str
    connectionString: str | None = None
    database: str
    disabled: int
    host: str
    identity: str
    jdbcUseSSL: bool
    localTimezoneConversionEnabled: bool
    port: int
    readonly: bool
    timezone: str | None = None
    

# Two load functions, one for the Iputs and one for Connections