In [30]:
import pandas as pd
from db_connect import EngineConnect as DatabaseConnect
import config
import pickle

In [31]:
class DimProjectModel:
    def __init__(
        self,
        *kwargs,
        project_id: str,
        project_code: str,
        project_name: str,
        project_type: str,
        project_description: str = None,
        project_date_end: str = None,
        project_date_start: str = None,
        project_term: str = None,
        project_complexity: str = None,
        active_status: bool,
        contract_type: str = None,
        contract_expire_date: str = None,
        contact_from_customer: str,
        other_services: str = None,
        security_delivery: str = None,
        security_operaction: str = None,
        security_communication: str = None,
        security_deleting_data: str = None,
        auto_delete_type: str = None,
        auto_delete_logs_path: str = None,
        credit_term: str = None,
        digi_pay_activated: bool,
        frequency: str = None,
        planned_fte: float = None,
        bill_unit: float = None,
        min_charge_activated: bool,
        batch_amount: float = None,
        forecast: str = None,
        tat: str = None,
        connection_method: str = None,
        offline_app: str = None,
        input_format: str = None,
        output_format: str = None,
        type_of_form: float = None,
        other: str = None,
        charge_rate_ot: str = None,
        charge_rate_holiday: str = None,
        charge_rate_weekend: str = None,
        volume: float = None,
        shifts: str = None,
        qc_schedule: str = None,
        qc_unit: str = None,
        qa_critical: str = None,
        qa_non_critical: str = None,
        test_date_start: str = None,
        test_date_end: str = None,
        setup_date_start: str = None,
        setup_date_end: str = None
    ):
        self.project_id = project_id,
        self.project_code = project_code,
        self.project_name = project_name,
        self.project_type = project_type,
        self.project_description = project_description,
        self.project_date_end = project_date_end,
        self.project_date_start = project_date_start,
        self.project_term = project_term,
        self.project_complexity = project_complexity,
        self.active_status = active_status,
        self.contract_type = contract_type,
        self.contract_expire_date = contract_expire_date,
        self.contact_from_customer = contact_from_customer,
        self.other_services = other_services,
        self.security_delivery = security_delivery,
        self.security_operaction = security_operaction,
        self.security_communication = security_communication,
        self.security_deleting_data = security_deleting_data,
        self.auto_delete_type = auto_delete_type,
        self.auto_delete_logs_path = auto_delete_logs_path,
        self.credit_term = credit_term,
        self.digi_pay_activated = digi_pay_activated,
        self.frequency = frequency,
        self.planned_fte = planned_fte,
        self.bill_unit = bill_unit,
        self.min_charge_activated = min_charge_activated,
        self.batch_amount = batch_amount,
        self.forecast = forecast,
        self.tat = tat,
        self.connection_method = connection_method,
        self.offline_app = offline_app,
        self.input_format = input_format,
        self.output_format = output_format,
        self.type_of_form = type_of_form,
        self.other = other,
        self.charge_rate_ot = charge_rate_ot,
        self.charge_rate_holiday = charge_rate_holiday,
        self.charge_rate_weekend = charge_rate_weekend,
        self.volume = volume,
        self.shifts = shifts,
        self.qc_schedule = qc_schedule,
        self.qc_unit = qc_unit,
        self.qa_critical = qa_critical,
        self.qa_non_critical = qa_non_critical,
        self.test_date_start = test_date_start,
        self.test_date_end = test_date_end,
        self.setup_date_start = setup_date_start,
        self.setup_date_end = setup_date_end 

In [32]:
class DimProjectExecutor:
    def __init__(
        self,
        *kwargs,
        environment: str,
        uri: str,
        database_name: str,
        collection_name: str, 
        schema: str,
        table: str,
        index: list = None,
        db: DatabaseConnect
    ):
        self.environment = environment
        self.uri = uri
        self.database_name = database_name
        self.collection_name = collection_name
        self.schema = schema
        self.table = table
        self.index = index
        self.db = db
        
    def get_data(self):
        if self.environment == 'development':
            data_load = pickle.load(open('project.pickle', 'rb'))
        else:
            client = MongoClient(self.uri)
            db = client[self.database_name]
            tmp_docs = db[self.collection_name].aggregate([
                {
                    "$project": {
                        "project_code": { "$ifNull": [ "$code", None ] },
                        "project_name": { "$ifNull": [ "$name", None ] },
                        "project_type": { "$ifNull": [ "$type", None ] },
                        "project_description": { "$ifNull": [ "$description", None ] },
                        "project_date_end": { "$ifNull": [ "$date_end", None ] },
                        "project_date_start": { "$ifNull": [ "$date_start", None ] },
                        "project_term": { "$ifNull": [ "$term", None ] },
                        "project_complexity": { "$ifNull": [ "$complexity", None ] },
                        "active_status": { "$ifNull": [ "$active", None ] },
                        "contract_type": { "$ifNull": [ "$contract_type", None ] },
                        "contract_expire_date": {"$ifNull": [ "$expired_date", None ] },
                        "contact_from_customer": { "$ifNull": [ "$customer_name", None ] }, # Not sure !!
                        "other_services": { "$ifNull": [ "$other_services", None ] },
                        "security_delivery": { "$ifNull": [ "$security_delivery", None ] },
                        "security_operation": { "$ifNull": [ "$security_operation", None ] },
                        "security_communication": { "$ifNull": [ "$security_communication", None ] },
                        "security_deleting_data": { "$ifNull": [ "$security_data", None ] },
                        "auto_delete_type": { "$ifNull": [ "$auto_delete_type", None ] },
                        "auto_delete_logs_path": { "$ifNull": [ "$logs_path", None ] },
                        "credit_term": { "$ifNull": [ "$credit_term", None ] },
                        "digi_pay_activated": { "$ifNull": [ "$digi_pay_activated", None ] },
                        "frequency": { "$ifNull": [ "$frequency", None ] },
                        "planned_fte": { "$ifNull": [ "$planned_fte", None ] },
                        "bill_unit": { "$ifNull": [ "$bill_unit", None ] },
                        #"billing_price": { "$ifNull": [ "$billing_price", None ] } # split to another table ??
                        "min_charge_activated": { "$ifNull": [ "$min_charge_activated", None ] },
                        "batch_amount": { "$ifNull": [ "$batch_amount", None ] },
                        "forecast": { "$ifNull": [ "$forecast", None ] },
                        "tat": { "$ifNull": [ "$tat", None ] },
                        "connection_method": { "$ifNull": [ "$connection_method", None ] },
                        "offline_app": { "$ifNull": [ "$offline_app", None ] },
                        "input_format": { "$ifNull": [ "$format_input", None ] },
                        "output_format": { "$ifNull": [ "$format_output", None ] },
                        "type_of_form": { "$ifNull": [ "$form_type", None ] },
                        "others": { "$ifNull": [ "others", None ]},
                        "charge_rate_ot": { "$ifNull": [ "$charge_rate_ot", None ] },
                        "charge_rate_holiday": { "$ifNull": [ "$charge_rate_holiday", None ] },
                        "charge_rate_weekend": { "$ifNull": [ "$charge_rate_weekend", None ] },
                        "volume": { "$ifNull": [ "$volume", None ] },
                        "shifts": { "$ifNull": [ "$shifts", None ] },
                        "qc_schedule": { "$ifNull": [ "$qc_schedule", None ] },
                        "qc_unit": { "$ifNull": [ "$qc_unit", None ] },
                        "qa_critical": { "$ifNull": [ "$qc_quality_critical", None ] },
                        "qa_non_critical": { "$ifNull": [ "$qc_quality_non_critical", None ] },
                        "test_date_start": { "$ifNull": [ "$test_date_start", None ] },
                        "test_date_end": { "$ifNull": [ "$test_date_end", None ] },
                        "setup_date_end": { "$ifNull": [ "$prep_date_end", None ] },  # Not sure
                        "setup_date_start": { "$ifNull": [ "$prep_date_start", None ] }, # Note sure
                        #"approve_status":
                        #"min_charge_type":
                        #"min_charge_amount":
                        #"other_agreement":
                    }
                }
            ])
            data_load = [item for item in tmp_docs]
            client.close()
        return [item for item in data_load]
    
    def fix_empty(self, data):
        if data == [] or data == {}:
            return None

        elif type(data) is list or type(data) is dict:
            if len(data) == 0:
                return None
            else:
                return str(data)


    def transform_data(self):
        data = self.get_data()
        df = pd.json_normalize(data)
        df['project_id'] = [str(item) for item in df['_id']]
        df = df.drop(['_id'], axis=1)
        df['project_complexity'] = [self.fix_empty(item) for item in df['project_complexity']]
        df["contract_type"] = [self.fix_empty(item) for item in df['contract_type']]
        df["shifts"] = [self.fix_empty(item) for item in df["shifts"]]
        df["input_format"] = [self.fix_empty(item) for item in df["input_format"]]
        df["output_format"] = [self.fix_empty(item) for item in df["output_format"]]
        print(df.columns)
        if self.index == None:
            self.db.update_df(df, self.schema, self.table)
        else:
            self.db.update_df_with_index(df, self.schema, self.table, self.index)

In [33]:
db_connect = DatabaseConnect(uri = config.DWH_SQLALCHEMY_URI)
etl = DimProjectExecutor(
    environment = config.ENVIRONMENT,
    uri = config.ELROND_URI,
    database_name = config.ELROND_DATABASE,
    collection_name = config.ELROND_PROJECT_COLLECTION,
    schema = config.DWH_ANALYTIC_SCHEMA,
    table = config.DWH_DIM_PROJECT_TABLE,
    db = db_connect,
    index = ['project_id']
)
etl.transform_data()
db_connect.close()

Index(['project_code', 'project_name', 'project_type', 'project_description',
       'project_date_end', 'project_date_start', 'project_term',
       'project_complexity', 'active_status', 'contract_type',
       'contract_expire_date', 'contact_from_customer', 'other_services',
       'security_delivery', 'security_operation', 'security_communication',
       'security_deleting_data', 'auto_delete_type', 'auto_delete_logs_path',
       'credit_term', 'digi_pay_activated', 'frequency', 'planned_fte',
       'bill_unit', 'min_charge_activated', 'batch_amount', 'forecast', 'tat',
       'connection_method', 'offline_app', 'input_format', 'output_format',
       'type_of_form', 'others', 'charge_rate_ot', 'charge_rate_holiday',
       'charge_rate_weekend', 'volume', 'shifts', 'qc_schedule', 'qc_unit',
       'qa_critical', 'qa_non_critical', 'test_date_start', 'test_date_end',
       'setup_date_end', 'setup_date_start', 'project_id'],
      dtype='object')
