## Import adapter options 

In [None]:
from connection_options import Connection_options
from copy_options import Copy_options
from target_options import Target_options
from transformation_options import Transformation_options

## Define helpers to get new options lists

In [None]:
def prepare_docs(docs):
    return list(map(str.strip, docs.lower().split('\n')))

def get_new_options(adapter, docs, options_type):
    print(f'Check {options_type}:')
    docs = prepare_docs(docs)
    intersection = set(docs).intersection(adapter)
    #check if all options in the adapter are still in the docs
    if len(intersection) == len(adapter):
        print('All options from the adapter are still in the documentation')
    else:
        print('!!! Not all options from the adapter are still in the documentation')
    new_options = list(set(docs) - set(intersection))
    if new_options:
        print(f'New options to add: {new_options}')
    else:
        print("No new options")
        
def get_new_source(adapter, docs):
    
    adapter_scources = list(adapter.keys())
    docs_scources = list(docs.keys())
    inter = set(adapter_scources).intersection(docs_scources)
 
    new_sources = list(set(docs_scources) - set(inter))
    if new_sources:
        print(f'New sources to add: {new_sources}')
    else:
        print("No new source detected")

   ## Paste Copy options from upsolver docs
   ### Copy all options from:
   #### https://docs.upsolver.com/sqlake/sql-command-reference/sql-jobs/create-job
   ### 23.08.2023

In [None]:
docs_copy_options = {
    
    "s3": """COMMIT_INTERVAL
             COMPRESSION
             CONTENT_TYPE
             DATE_PATTERN
             DEDUPLICATE_WITH
             DELETE_FILES_AFTER_LOAD
             FILE_PATTERN
             INITIAL_LOAD_PATTERN
             INITIAL_LOAD_PREFIX
             RUN_PARALLELISM
             SKIP_ALL_VALIDATIONS
             SKIP_VALIDATIONS
             START_FROM
             COLUMN_TRANSFORMATIONS
             COMMENT
             COMPUTE_CLUSTER
             END_AT
             EXCLUDE_COLUMNS""",

    "kafka": """COMMIT_INTERVAL
                COMPRESSION
                CONSUMER_PROPERTIES
                CONTENT_TYPE
                DEDUPLICATE_WITH
                READER_SHARDS
                RUN_PARALLELISM
                START_FROM
                STORE_RAW_DATA
                COLUMN_TRANSFORMATIONS 
                COMMENT
                SKIP_ALL_VALIDATIONS
                SKIP_VALIDATIONS
                COMPUTE_CLUSTER 
                END_AT 
                EXCLUDE_COLUMNS""",

    "mysql": """DDL_FILTERS
                SKIP_SNAPSHOTS
                SNAPSHOT_PARALLELISM
                COLUMN_TRANSFORMATIONS 
                COMMENT 
                COMPUTE_CLUSTER 
                END_AT
                EXCLUDE_COLUMNS""",

    "postgres": """HEARTBEAT_TABLE
                   PARSE_JSON_COLUMNS
                   PUBLICATION_NAME
                   SKIP_SNAPSHOTS
                   SNAPSHOT_PARALLELISM
                   START_FROM
                   COLUMN_TRANSFORMATIONS 
                   COMMENT 
                   COMPUTE_CLUSTER 
                   END_AT 
                   EXCLUDE_COLUMNS""",

    "kinesis": """COMMIT_INTERVAL
                  COMPRESSION
                  CONTENT_TYPE
                  DEDUPLICATE_WITH
                  READER_SHARDS
                  RUN_PARALLELISM
                  SKIP_ALL_VALIDATIONS
                  SKIP_VALIDATIONS
                  STORE_RAW_DATA
                  START_FROM
                  COLUMN_TRANSFORMATIONS
                  COMMENT 
                  COMPUTE_CLUSTER
                  END_AT 
                  EXCLUDE_COLUMNS""",
    
    "mongodb": """SKIP_SNAPSHOTS
                  SNAPSHOT_PARALLELISM
                  START_FROM
                  COLUMN_TRANSFORMATIONS 
                  COMMENT 
                  COMPUTE_CLUSTER 
                  END_AT 
                  EXCLUDE_COLUMNS""",
    
    "mssql": """PARSE_JSON_COLUMNS
                SKIP_SNAPSHOTS
                SNAPSHOT_PARALLELISM
                START_FROM
                EXCLUDE_COLUMNS
                COLUMN_TRANSFORMATIONS 
                COMMENT 
                COMPUTE_CLUSTER 
                END_AT """
    
}

## Get new create job sources and new copy options

### log 
#### 24.08.2023
### New sources to add: ['mssql', 'mongodb']
### Kafka New options to add: ['commit_interval', 'column_transformations']
### mysql New options to add: ['ddl_filters', 'snapshot_parallelism']
### postgres New options to add: ['snapshot_parallelism']
### s3 New options to add: ['commit_interval', 'skip_validations', 'skip_all_validations']
### kinesis New options to add: ['commit_interval', 'skip_validations', 'skip_all_validations']

In [None]:
get_new_source(Copy_options, docs_copy_options)

In [None]:
for source in Copy_options.keys():
    get_new_options(Copy_options[source]['job_options'].keys(), docs_copy_options[source], f"Copy {source}")
    print('***************')

  ## Paste Connections options from upsolver docs
  ### Copy all options from:
  #### https://docs.upsolver.com/sqlake/sql-command-reference/sql-connections/create-connection
  ### 23.08.2023

In [None]:
docs_connections_options = {
    "s3": """AWS_ROLE
             EXTERNAL_ID
             AWS_ACCESS_KEY_ID
             AWS_SECRET_ACCESS_KEY
             PATH_DISPLAY_FILTER
             PATH_DISPLAY_FILTERS
             READ_ONLY
             ENCRYPTION_KMS_KEY
             ENCRYPTION_CUSTOMER_MANAGED_KEY
             COMMENT""",

    "kafka": """HOST
                HOSTS
                CONSUMER_PROPERTIES
                VERSION
                REQUIRE_STATIC_IP
                SSL
                TOPIC_DISPLAY_FILTER
                TOPIC_DISPLAY_FILTERS
                COMMENT""",

    "glue_catalog": """AWS_ROLE
                 EXTERNAL_ID
                 AWS_ACCESS_KEY_ID
                 AWS_SECRET_ACCESS_KEY
                 DEFAULT_STORAGE_CONNECTION
                 DEFAULT_STORAGE_LOCATION
                 REGION
                 DATABASE_DISPLAY_FILTER
                 DATABASE_DISPLAY_FILTERS
                 COMMENT""",

    "kinesis": """AWS_ROLE
                  EXTERNAL_ID
                  AWS_ACCESS_KEY_ID
                  AWS_SECRET_ACCESS_KEY
                  REGION
                  READ_ONLY
                  MAX_WRITERS
                  STREAM_DISPLAY_FILTER
                  STREAM_DISPLAY_FILTERS
                  COMMENT""",

    "snowflake":"""CONNECTION_STRING
                   USER_NAME
                   PASSWORD
                   MAX_CONCURRENT_CONNECTIONS
                   COMMENT""",

    "redshift": """CONNECTION_STRING
                   USER_NAME
                   PASSWORD
                   MAX_CONCURRENT_CONNECTIONS
                   COMMENT""",

    "mysql": """CONNECTION_STRING
                USER_NAME
                PASSWORD
                COMMENT""",

    "postgres": """CONNECTION_STRING
                   USER_NAME
                   PASSWORD
                   COMMENT""",

    "elasticsearch": """CONNECTION_STRING
                        USER_NAME
                        PASSWORD
                        COMMENT""",

    "mongodb": """CONNECTION_STRING
                  USER_NAME
                  PASSWORD
                  TIMEOUT
                  COMMENT""",

    "mssql": """CONNECTION_STRING
                USER_NAME
                PASSWORD
                COMMENT"""
    }

## Get new connection sources and new connection options

### log 
#### 24.08.2023
### New sources to add: ['mssql', 'mongodb']

In [None]:
get_new_source(Connection_options, docs_connections_options)

In [None]:
for source in Connection_options.keys():
    get_new_options(Connection_options[source].keys(), docs_connections_options[source], f"Connection {source}")
    print('***************')

 ## Paste Transformation options from upsolver docs
   ### Copy all options from:
   #### https://docs.upsolver.com/sqlake/sql-command-reference/sql-jobs/create-job/sql-transformation-jobs/merge#job_options
   ### 23.08.2023

In [None]:
docs_transformation_options = {
    "s3": """COMPRESSION
             DATE_PATTERN
             FILE_FORMAT
             OUTPUT_OFFSET
             AGGREGATION_PARALLELISM
             ALLOW_CARTESIAN_PRODUCTS
             COMMENT
             COMPUTE_CLUSTER
             END_AT
             RUN_INTERVAL
             RUN_PARALLELISM
             START_FROM""",
    
    "elasticsearch": """BULK_MAX_SIZE_BYTES
                        INDEX_PARTITION_SIZE
                        ROUTING_FIELD_NAME
                        AGGREGATION_PARALLELISM
                        ALLOW_CARTESIAN_PRODUCTS
                        COMMENT
                        COMPUTE_CLUSTER
                        END_AT
                        RUN_INTERVAL
                        RUN_PARALLELISM
                        START_FROM""",
    
    "snowflake": """ADD_MISSING_COLUMNS
                    COMMIT_INTERVAL
                    CUSTOM_INSERT_EXPRESSIONS
                    CUSTOM_UPDATE_EXPRESSIONS
                    KEEP_EXISTING_VALUES_WHEN_NULL
                    AGGREGATION_PARALLELISM
                    ALLOW_CARTESIAN_PRODUCTS
                    COMMENT
                    COMPUTE_CLUSTER
                    END_AT
                    RUN_INTERVAL
                    RUN_PARALLELISM
                    START_FROM""",
    
    "datalake": """ADD_MISSING_COLUMNS
                   AGGREGATION_PARALLELISM
                   ALLOW_CARTESIAN_PRODUCTS
                   COMMENT
                   COMPUTE_CLUSTER
                   END_AT
                   RUN_INTERVAL
                   RUN_PARALLELISM
                   START_FROM""",
    
    "redshift": """FAIL_ON_WRITE_ERROR
                   SKIP_FAILED_FILES
                   AGGREGATION_PARALLELISM
                   ALLOW_CARTESIAN_PRODUCTS
                   COMMENT
                   COMPUTE_CLUSTER
                   END_AT
                   RUN_INTERVAL
                   RUN_PARALLELISM
                   START_FROM""",
    
    "postgres": """AGGREGATION_PARALLELISM
                   ALLOW_CARTESIAN_PRODUCTS
                   COMMENT
                   COMPUTE_CLUSTER
                   END_AT
                   RUN_INTERVAL
                   RUN_PARALLELISM
                   START_FROM"""
}




## Get new transformation sources and new transformation options

### log 
#### 24.08.2023
### New sources to add: ['postgres']
### elasticsearch New options to add: ['routing_field_name'] new
### snowflake New options to add: ['commit_interval'] new

In [None]:
get_new_source(Transformation_options, docs_transformation_options)

In [None]:
for source in Transformation_options.keys():
    get_new_options(Transformation_options[source].keys(), docs_transformation_options[source], f"Transformation {source}")
    print('***************')