In [4]:
from pymongo import MongoClient
import pickle
from pprint import pprint
import config
import pandas as pd
import function as func
import datetime
from typing import List, Tuple
from db_connect import EngineConnect as DatabaseConnect
from sqlalchemy.sql import text

In [5]:
class DimProjectManagerModel:
    def __init__(
        self, 
        *kwargs,
        project_id: str,
        project_name: str,
        user_name: str,
    ):
        self.project_id = project_id,
        self.project_name = project_name
        self.user_name = user_name
        

class DimProjectManagerHandle:
    def __init__(
        self,
        *kwargs,
        project_id: str,
        project_managers: List[str],
        project_name: str,
    ):
        self.project_id = project_id
        self.project_managers = project_managers
        self.project_name = project_name
    
    def run(self):
        data = [
            DimProjectManagerModel(
                project_id = self.project_id, 
                project_name = self.project_name, 
                user_name = item,
                )
            for item in self.project_managers
        ]
        return data
    
    
class DimProjectManagerExecutor:
    def __init__(
        self,
        *kwargs,
        environment: str,
        uri: str,
        database_name: str,
        collection_name: str, 
        schema: str,
        table: str,
        db: DatabaseConnect
    ):
        self.environment = environment
        self.uri = uri
        self.database_name = database_name
        self.collection_name = collection_name
        self.schema = schema
        self.table = table
        self.db = db
        
    
    def get_data(self):
        if self.environment == 'development':
            data_load = pickle.load(open('project_manager.pickle', 'rb'))
        else:
            client = MongoClient(self.uri)
            db = client[self.database_name]
            tmp_docs = db[self.collection_name].aggregate([
                {
                    "$project": {
                        "name": { "$ifNull": [ "$name", None ] },
                        "project_managers": { "$ifNull": [ "$project_managers", None ] },
                    }
                }
            ])
            data_load = [item for item in tmp_docs]
            client.close()
        return [item for item in data_load]
    
    def etl(self):
        data = self.get_data()
        end = []
        for item in data:
            payload = {
                'project_id': str(item['_id']),
                'project_name': item['name'],
                'project_managers': item['project_managers'],
            }
            end.extend(DimProjectManagerHandle(**payload).run())
        sql = "TRUNCATE TABLE {schema}.{table};".format(schema = self.schema, table = self.table)
        self.db.raw_sql(sql)
        self.db.update([item.__dict__ for item in end], self.schema, self.table)
        self.db.close()

In [7]:
db_connect = DatabaseConnect(uri = config.DWH_SQLALCHEMY_URI)
dim_project_manager = DimProjectManagerExecutor( 
    environment = config.ENVIRONMENT,
    uri = config.ELROND_URI,
    database_name = config.ELROND_DATABASE,
    collection_name = config.ELROND_PROJECT_COLLECTION,
    schema = config.DWH_ANALYTIC_SCHEMA,
    table = config.DWH_DIM_PROJECT_MANAGER_TABLE,
    db = db_connect
)
dim_project_manager.etl()