In [167]:
import glob
import os
import pandas as pd
import sqlite3
from pathlib import Path
from pandas.core.common import flatten
from itertools import product
from abc import ABC, abstractmethod

input_root_path = 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\input\\'
output_root_path = 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\'

In [168]:
class Parameter():
    def __init__(self, file):
        
        if file.lower().endswith('.csv'):
            self.param_dict = pd.read_csv(file, sep=';', dtype = str).set_index('key').to_dict('index')
        
            for key in self.param_dict.keys():
                self.param_dict[key] = list(map(lambda x: x.replace(' ', ''), self.param_dict[key]['value'].split(',')))
        elif file.lower().endswith('.sql'):
            db = sqlite3.connect(':memory:')
            
            df = pd.read_excel('test-data.xlsx')
            df.to_sql("df", db, if_exists="replace", index=False)
            
            df = pd.read_sql(open(file, 'r').read(), db)
            df.columns = ['key', 'value']
            self.param_dict = df.set_index('key').to_dict('index')
            for key in self.param_dict.keys():
                self.param_dict[key] = self.param_dict[key]['value']

In [178]:
class Query():
    def __init__(self, input_root_path, input_path, output_root_path, params, multi_params):
        
        # where can the query be found?
        self.input_root_path = input_root_path
        self.input_path = input_path
        self.output_root_path = output_root_path
        
        # set parameter dictionaries
        self.params = params
        self.multi_params = multi_params
        self.input_query = Path(self.input_path).read_text()
        
        # derive new properties
        self.relative_path = Path(self.input_path).relative_to(input_root_path).parents[0]
        self.output_path = self.__set_output_path()
        self.output_query = self.__modify_query()
        self.input_path, self.output_path, self.input_query, self.output_query = self.__multiply()
        
    def __set_output_path(self):
                
        # modify file_name
        file_name = Path(self.input_path).name
        for k, v in self.params.param_dict.items():
            file_name = file_name.replace(k,str(v))
        
        # concatenate and return output_root_path, relative_path and final_filename
        path = os.path.join(output_root_path,self.relative_path, file_name).replace('.sql','.csv')
        return path
    
    def __modify_query(self):
        query = self.input_query
        for k, v in params.param_dict.items():
            query = query.replace(k,str(v))
        return query
    
    def __multiply(self):
        file_name = Path(self.input_path).name.replace('.sql','.csv')
        multiplier_matches = [i for i in list(self.multi_params.param_dict.keys()) if i in file_name]
        
        if len(multiplier_matches)>0:
            input_paths = []
            output_paths = []
            original_queries = []
            modified_queries = []
            multiplier_values = [multi_params.param_dict[match] for match in multiplier_matches]
            for combination in product(*multiplier_values):
                output_file = file_name
                output_query = self.output_query
                for i, value in enumerate(combination):
                    output_file = output_file.replace(multiplier_matches[i], value)
                    output_query = output_query.replace('@'+multiplier_matches[i]+'@', value)
                output_path = self.input_path.replace(Path(self.input_path).name, output_file).replace(self.input_root_path, self.output_root_path)
                
                input_paths.append(self.input_path)
                output_paths.append(output_path)
                original_queries.append(self.input_query)
                modified_queries.append(output_query)
            
            return input_paths, output_paths, original_queries, modified_queries
        else:
            return [self.input_path], [self.output_path], [self.input_query], [self.output_query]

In [179]:
params = Parameter('params/params.sql')
multi_params = Parameter('params/file-multiplier.csv')

In [180]:
query_objects = []
input_files = [f for f in glob.glob(input_root_path + '**/*.sql', recursive = True)]
for query_path in input_files:
    query_objects.append(Query(input_root_path, query_path, output_root_path, params, multi_params))

In [181]:
input_paths = []
output_paths = []
output_queries = []
input_queries = []

for query in query_objects:
    input_paths.extend(query.input_path)
    output_paths.extend(query.output_path)
    input_queries.extend(query.input_query)
    output_queries.extend(query.output_query)

In [182]:
class Executor():
    def __init__(self, input_path, output_path, input_query, output_query):
        self.input_path = input_path
        self.output_path = output_path
        self.input_query = input_query
        self.output_query = output_query
        self.db = self.__create_db()
        self.__run_query()
    
    def __create_db(self):
        db = sqlite3.connect(':memory:')
        df = pd.read_excel('test-data.xlsx')
        df.to_sql("df", db, if_exists="replace", index=False)
        return db
    
    def __run_query(self):
        df = pd.read_sql_query(self.output_query,db)
        os.makedirs(Path(self.output_path).parents[0], exist_ok=True)
        df.to_csv(self.output_path, sep = ';', chunksize=1, index=False )

In [183]:
for i, query in enumerate(output_queries):
    input_path = input_paths[i]
    output_path = output_paths[i]
    input_query = input_queries[i]
    output_query = output_queries[i]
    
    Executor(input_path, output_path, input_query, output_query)

In [184]:
# write db connector
# write query executor
# write csv file writer
# write logger

In [185]:
output_paths

['C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\folder-1\\subfolder-1\\test-query-1.csv',
 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\folder-1\\subfolder-2\\another-test-query.csv',
 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\folder-1\\subfolder-2\\20190101-NESN-yet-another-query.csv',
 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\folder-1\\subfolder-2\\20190101-UBSN-yet-another-query.csv',
 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\folder-1\\subfolder-2\\20190101-ABBN-yet-another-query.csv',
 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\folder-1\\subfolder-2\\20190102-NESN-yet-another-query.csv',
 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\query-agent\\output\\folder-1\\subfolder-2\\20190102-UBSN-yet-another-query.csv',
 'C:\\Users\\alexd\\OneDrive\\Dokumente\\Python Scripts\\qu