In [1]:
from db_connect import EngineConnect as DatabaseConnect
import config
from pprint import pprint
import function as func

import pickle
import datetime
import pandas as pd
from pymongo import MongoClient

In [2]:
class FactPerformanceModel:
    def __init__(
        self,
        *kwargs,
        ori_id: str,
        project_id: str,
        group_id: str = None,
        document_id: str = None,
        reworked: bool = False,
        work_type_id: int,
        process_id: int,
        number_of_record: int,
        user_name: str,
        ip: str = None,
        captured_date_timestamp: str,
        captured_date_key: int,
        captured_time_key: int,
        total_time_second: int
    ):
        self.ori_id = ori_id
        self.project_id = project_id
        self.group_id = group_id
        self.document_id = document_id
        self.reworked = reworked
        self.work_type_id = work_type_id        
        self.process_id = process_id
        self.number_of_record = number_of_record
        self.user_name = user_name
        self.ip = ip
        self.captured_date_timestamp = captured_date_timestamp
        self.captured_date_key = captured_date_key
        self.captured_time_key = captured_time_key
        self.total_time_second = total_time_second

In [7]:
class FactPerformanceExecutor:
    def __init__(
        self,
        *kwargs,
        environment: str,
        uri: str,
        database_name: str,
        collection_name: str,
        project_id: str,
        schema: str,
        table: str,
        index: list = None,
        db: DatabaseConnect
    ):
        self.environment = environment
        self.uri = uri
        self.database_name = database_name
        self.collection_name = collection_name
        self.schema = schema
        self.table = table
        self.db = db
        self.index = index
        self.project_id = project_id
    
    def get_data(self):
        if self.environment == 'development':
            data_load = pickle.load(open('performance/'+self.project_id+'.pickle', 'rb'))
        else:
            client = MongoClient(self.uri)
            db = client[self.database_name]
            query = {}
            tmp_docs = db[self.collection_name].find(query).limit(100)
            data_load = [item for item in tmp_docs]
            client.close()
        return data_load

    def data_handle(self, performance, performance_keys):
        ori_id = func.bson_object_to_string(performance['_id'])
        project_id = self.project_id
        group_id = performance['group_id']
        if 'documentId' in performance_keys:
            document_id = performance['documentId']
        elif 'doc_id' in performance_keys:
            document_id = performance['doc_id']
        if 'rework_count' in performance_keys:
            if performance['rework_count'] <= 0: 
                reworked = False
            else:
                reworked = True
        elif 'has_rework' in performance_keys:
            reworked = performance['has_rework']
        work_type_id = func.get_working_type_id_by_name(performance['work_type']) 
        user_name = performance['username']
        ip = None
        if 'ip' in performance_keys:
            ip = performance['ip']
        if 'captured_date' in performance_keys:
            captured_date_timestamp = datetime.datetime.strptime(performance['captured_date'], '%d/%m/%Y')
            captured_date_key = func.time_to_date_key(captured_date_timestamp)
            captured_time_key = 0
        elif 'time' in performance_keys:
            captured_date_timestamp = performance['time']
            captured_date_key = func.time_to_date_key(captured_date_timestamp)
            captured_time_key = func.time_to_time_key(captured_date_timestamp)
        
        if self.project_id == '5db144de27f919001f5f25e5':
            layout_name = performance['layout_name']
            module_type = performance['type'] 
            section_name = performance['section_name']
            if section_name != None:
                section_name = section_name[0]
            else:
                section_name = ''
            task_name = performance['task_name'] 
            process_id = func.get_process_id_performance(module_type)
        else:
            process_id = 0
   
        
        number_of_record = performance['records']
        total_time_second = performance['total_time']/100    
        return  ori_id, project_id, group_id, document_id, reworked, work_type_id, process_id, \
        number_of_record, user_name, ip, captured_date_timestamp,  captured_date_key,  captured_time_key,  total_time_second 
    
    def transform_data(self):
        data_performance = self.get_data()
        results = []
        if len(data_performance) == 0:
            print('404 not found')
        elif  self.project_id != '5db144de27f919001f5f25e5':
            print('not handle yet')
        else:
            performance_keys = data_performance[0].keys()
            for performance in data_performance:
                ori_id, project_id, group_id, document_id, reworked, work_type_id, process_id, number_of_record, user_name, ip, \
                captured_date_timestamp, captured_date_key, captured_time_key, total_time_second = self.data_handle(performance, performance_keys)
                obj_ = FactPerformanceModel(
                    ori_id = ori_id,  
                    project_id = project_id,  
                    group_id = group_id,  
                    document_id = document_id,  
                    reworked = reworked,  
                    work_type_id = work_type_id,  
                    process_id = process_id,  
                    number_of_record = number_of_record,  
                    user_name = user_name, 
                    ip = ip, 
                    captured_date_timestamp = captured_date_timestamp,  
                    captured_date_key = captured_date_key,  
                    captured_time_key = captured_time_key,  
                    total_time_second = total_time_second 
                )
                results.append(obj_)
            self.db.create([item.__dict__ for item in results], self.schema, self.table)
            pprint(results[0].__dict__)

In [8]:
db_connect = DatabaseConnect(uri = config.DWH_SQLALCHEMY_URI)
list_t = [
     '5e1d3c314109de001b9b4465',
     '5f06d14f4497e9001bb64fdf',
     '5e0dce517c655b59c53f8e79',
     '5f194f5a4497e9001bb65222',
     '5e69edb94109de001b9b459b',
     '5e86fb444109de001b9b4653',
     '5eb8aea7c3715b001bbb3d6b',
     '5f0ffa504497e9001bb65125',
     '5d89f91c66ba1f001fcc11e3',
     '5d1efa6345d56500143bb348',
     '5f768d4389f56b001bd5b971',
     '5ecdc6ada246060014d07f25',
     '5f43a4b680ec50001c246ecf',
     '5f962f1d4ebd14001c7e843a',
     '5ef95f2835497a001b114bab',
     '5dc4f4856c4d6a001ecad9c9',
     '5f22951d4497e9001bb656c8',
     '5ee323e7324b38001b566660',
     '5fb493d64ebd14001c7e84e6',
     '5e0dce4f7c655b59c53f8e0b',
     '5f5ec11c4496d7001b6846c4',
     '5e8d9c8d98d753001b7efd8f',
     '5f56eb7e4496d7001b6846b2',
     '5f3c8faf80ec50001c246ec9',
     '5db5c87345052400142992e9',
     '5e0dce517c655b59c53f8e7b',
     '5ed862865b721b00142ebaaa',
     '5e21a6504109de001b9b4490',
     '5e0dce507c655b59c53f8e5c',
     '5d4432edd26b07001e0439f0',
     '5ec1f01bc3715b001bbb3d83',
     '5e7826754109de001b9b45fd',
     '5e5334134109de001b9b451d',
     '5e0dce4e7c655b59c53f8da5',
     '5f7d10ae59249a001bee1e33',
     '5f03ffc44497e9001bb64f18',
     '5ee32316324b38001b56665e',
     '5e9e7ec598d753001b7efe6b',
     '5db144de27f919001f5f25e5',
     '5f6c5a6e89f56b001bd5b941',
     '5e8475b84109de001b9b4650',
     '5f5ed1f24496d7001b6846cb',
     '5eafeecec3715b001bbb3d62',
     '5f3a052980ec50001c246e33',
     '5da6f6ef18b669001ea8c424',
     '5e0dce507c655b59c53f8e43',
     '5e9da59b98d753001b7efe67',
     '5f2bc8ea4497e9001bb6593d',
     '5f50c19580ec50001c246edf',
     '5e7d568e4109de001b9b4636',
     '5ea8e9e4c3715b001bbb3d4d',
     '5e44c57f4109de001b9b44de',
     '5ef4115e35497a001b114999',
     '5e0dce517c655b59c53f8e8a',
     '5d7a266cbbda49001e74d249',
     '5f9b6a5dd0106d001fb825dd',
     '5ed8ced15b721b00142ebaae',
     '5dd4b868642c7a001e7a3e99'
]
for project_id in list_t:
    fact = FactPerformanceExecutor(
        environment = config.ENVIRONMENT,
        uri = config.ELROND_URI,
        database_name = config.ELROND_DATABASE,
        collection_name = config.ELROND_ECLAIMS_PERFORMANCE_REPORT_COLLECTION,
        schema = config.DWH_ANALYTIC_SCHEMA,
        table = config.DWH_FACT_PERFORMANCE_TABLE,
        db = db_connect,
        project_id = project_id
    )
    fact.transform_data()

not handle yet
not handle yet
not handle yet
not handle yet
not handle yet
404 not found
not handle yet
not handle yet
404 not found
not handle yet
not handle yet
404 not found
not handle yet
not handle yet
not handle yet
404 not found
not handle yet
404 not found
not handle yet
not handle yet
not handle yet
404 not found
not handle yet
not handle yet
not handle yet
not handle yet
not handle yet
not handle yet
not handle yet
404 not found
404 not found
not handle yet
404 not found
not handle yet
not handle yet
not handle yet
404 not found
not handle yet
{'captured_date_key': 20191203,
 'captured_date_timestamp': datetime.datetime(2019, 12, 3, 0, 0),
 'captured_time_key': 0,
 'document_id': '5de6246d62ca2e0011908f4e',
 'group_id': None,
 'ip': None,
 'number_of_record': 1,
 'ori_id': '5de6688d52439f0014f786d9',
 'process_id': 3,
 'project_id': '5db144de27f919001f5f25e5',
 'reworked': False,
 'total_time_second': 1562.89,
 'user_name': 'nhattta',
 'work_type_id': 1}
not handle yet
404 no