In [105]:
import glob
import os
import pandas as pd
from pathlib import Path
from pandas.core.common import flatten
from itertools import product
from abc import ABC, abstractmethod

input_root_path = 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\input\\'
output_root_path = 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\'

In [163]:
class Parameter():
    def __init__(self, file):
        self.param_dict = pd.read_csv(file, sep=';', dtype = str).set_index('key').to_dict('index')
        
        for key in self.param_dict.keys():
            self.param_dict[key] = list(map(lambda x: x.replace(' ', ''), self.param_dict[key]['value'].split(',')))

In [210]:
class Query():
    def __init__(self, input_path, output_root_path, query_params, file_params, multi_params):
        
        # where can the query be found?
        self.input_path = input_path
        self.output_root_path = output_root_path
        
        # set parameter dictionaries
        self.query_params = query_params
        self.file_params = file_params
        self.multi_params = multi_params
        self.original_query = Path(self.input_path).read_text()
        
        # derive new properties
        self.relative_path = Path(os.path.relpath(Path(self.input_path), self.output_root_path)).parents[0]
        self.output_path = self.__set_output_path()
        self.modified_query = self.__modify_query()
        self.output_path, self.modified_query = self.__multiply()
        
    def __set_output_path(self):
                
        # modify file_name
        file_name = Path(self.input_path).name
        for k, v in self.file_params.param_dict.items():
            file_name = file_name.replace(k,v[0])
        
        # concatenate and return output_root_path, relative_path and final_filename
        return os.path.join(output_root_path,self.relative_path, file_name)
    
    def __modify_query(self):
        query = self.original_query
        for k, v in query_params.param_dict.items():
            query = query.replace(k,v[0])
        return query
    
    def __multiply(self):
        file = Path(self.input_path).name
        multiplier_matches = [i for i in list(self.multi_params.param_dict.keys()) if i in file]
        
        if len(multiplier_matches)>0:
            new_files = []
            new_queries = []
            multiplier_values = [multi_params.param_dict[match] for match in multiplier_matches]
            for combination in product(*multiplier_values):
                new_file = file
                new_query = self.modified_query
                for i, value in enumerate(combination):
                    new_file = new_file.replace(multiplier_matches[i], value)
                    new_query = new_query.replace('@'+multiplier_matches[i]+'@', value)
                new_file = os.path.join(self.output_root_path,self.relative_path,new_file)
                
                new_files.append(new_file)
                new_queries.append(new_query)
            
            return new_files, new_queries
        else:
            return [self.output_path], [self.modified_query]

In [211]:
query_params = Parameter('params/query-params.csv')
file_params = Parameter('params/file-params.csv')
multi_params = Parameter('params/file-multiplier.csv')

In [212]:
queries = []
in_files = [f for f in glob.glob(input_root_path + '**/*.sql', recursive = True)]
for query_path in in_files:
    queries.append(Query(query_path, output_root_path, query_params, file_params, multi_params))

In [None]:
# write file proliferator
# write db connector
# write query executor
# write csv file writer
# write logger