In [1]:
import numpy as np
import pandas as pd
import os
import gc
import sqlalchemy as db
from sqlalchemy import create_engine
import pymysql
from datetime import date, datetime

import datacompy

In [2]:
# Identify Path for bill dataset
data_path1 = 'E:\\EASi\\5-Operations\\50-Management\\500-DataCentral\\2-Static Dashboard\\WIP\\data\\input\\validation\\'
file_name1 = os.path.join(data_path1,'tc.csv')
# Identify path for scl dataset
data_path2 = 'E:\\EASi\\5-Operations\\50-Management\\500-DataCentral\\2-Static Dashboard\\WIP\\data\\input\\validation\\'
file_name2 = os.path.join(data_path2,'vital.csv')

In [3]:
# Call dataset
df1 = pd.read_csv(file_name1)
df2 = pd.read_csv(file_name2)

In [4]:
# Identify required attributes
tc_df = df1.loc[:,['DETAILDATE', 
                   'USERPSEMPLID',
                   'CUST_ID',
                   'JOB_REQ_NBR',
                   'PROJECTID',
                   'ACTIVITYID',
                   'HOURS',
                   'COMMENTS'
                  ]]

init_vital_df = df2.loc[:,['Customer Id',
                      'Employee Name',
                      'Employee ID',
                      'Project Id',
                      'WBS_Activity Id',
                      'Activity Descr',
                      'Resource Type',
                      'Accounting Status',
                      'Quantity_Unit',
                      'Transaction Date'
                     ]]

# Rename column to standardize
tc_df.rename({'DETAILDATE':'trans_date',
              'USERPSEMPLID':'easi_id',
              'CUST_ID':'client_id',
              'JOB_REQ_NBR':'job_req',
              'PROJECTID':'proj_id',
              'ACTIVITYID':'activity_id',
              'HOURS':'hours',
              'COMMENTS':'comments'
             },axis=1, inplace=True)

init_vital_df.rename({'Customer Id':'client_id',
                 'Employee Name':'employee_name',
                 'Employee ID':'easi_id',
                 'Project Id':'proj_id',
                 'WBS_Activity Id':'wbs_id',
                 'Activity Descr':'act_desc',
                 'Resource Type':'res_type',
                 'Accounting Status':'acc_status',
                 'Quantity_Unit':'hours',
                 'Transaction Date':'trans_date'},axis=1, inplace=True)

tc_df['trans_date'] = tc_df['trans_date'].astype('datetime64[ns]')
init_vital_df['trans_date'] = init_vital_df['trans_date'].astype('datetime64[ns]')

In [5]:
# Synchro data types
tc_df.dtypes, init_vital_df.dtypes

tc_df['proj_id'] = tc_df['proj_id'].astype(str)
tc_df['hours'] = tc_df['hours'].astype('float64')
init_vital_df['proj_id'] = init_vital_df['proj_id'].astype(str)
tc_df.dtypes, init_vital_df.dtypes


(trans_date     datetime64[ns]
 easi_id                 int64
 client_id               int64
 job_req                 int64
 proj_id                object
 activity_id            object
 hours                 float64
 comments               object
 dtype: object, client_id                 int64
 employee_name            object
 easi_id                   int64
 proj_id                  object
 wbs_id                   object
 act_desc                 object
 res_type                 object
 acc_status               object
 hours                   float64
 trans_date       datetime64[ns]
 dtype: object)

In [6]:
# Filter vital dataset as required on particular weekending
acc_filter = ['BIL','BLD','XRV']
res_filter = ['LABOR','SUBCN']

temp_df1 = init_vital_df.loc[init_vital_df['acc_status'].isin(acc_filter)]
vital_df = temp_df1.loc[temp_df1['res_type'].isin(res_filter)]
vital_df = vital_df.drop(columns=['acc_status','res_type','employee_name'])
vital_df.head(0)

tc_df = tc_df.drop(columns=['comments'])


In [28]:
# Add Resource connectivity
engine = db.create_engine('mysql+pymysql://admin:password@10.140.9.93:3306/datacentralserver', echo=True)
resource_df = pd.read_sql_query("SELECT EASiID as easi_id, FullName as resource, Department as department FROM personnelt WHERE Active='Yes' AND ClientCode NOT IN(SELECT clientCode from personnelt WHERE clientcode IN('','Internal'))",engine)

2020-01-16 15:14:48,891 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'sql_mode'
2020-01-16 15:14:48,894 INFO sqlalchemy.engine.base.Engine {}
2020-01-16 15:14:48,905 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2020-01-16 15:14:48,907 INFO sqlalchemy.engine.base.Engine {}
2020-01-16 15:14:48,921 INFO sqlalchemy.engine.base.Engine SELECT DATABASE()
2020-01-16 15:14:48,923 INFO sqlalchemy.engine.base.Engine {}
2020-01-16 15:14:48,932 INFO sqlalchemy.engine.base.Engine show collation where `Charset` = 'utf8mb4' and `Collation` = 'utf8mb4_bin'
2020-01-16 15:14:48,934 INFO sqlalchemy.engine.base.Engine {}
2020-01-16 15:14:48,941 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS CHAR(60)) AS anon_1
2020-01-16 15:14:48,943 INFO sqlalchemy.engine.base.Engine {}
2020-01-16 15:14:48,947 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS CHAR(60)) AS anon_1
2020-01-16 15:14:48,952 INFO sqlalchemy.engine.base.E

In [29]:
tc_df.shape, vital_df.shape, tc_df['hours'].sum(), vital_df['hours'].sum()

((491, 7), (518, 7), 2960.0, 3150.0)

In [40]:
temp_df = pd.merge(tc_df, vital_df, how='inner', indicator=True, on=['easi_id','proj_id','trans_date','client_id','hours'])
temp_df2 = pd.merge(temp_df, resource_df,how='left', on='easi_id')
temp_df3 = temp_df2.groupby('proj_id')['hours'].sum()
temp_df2.head()

Unnamed: 0,trans_date,easi_id,client_id,job_req,proj_id,activity_id,hours,wbs_id,act_desc,_merge,resource,department
0,2019-11-25,4563307,266644,5371058,EM1-0063-SIE,P03-370364-ASP,2.0,P03-370364-ASP,RD341PMP-02-04-OUT,both,"Haq,Sohail",MEC
1,2019-11-25,4563307,266644,5371058,EM1-0063-SIE,P01-370364-AM,6.0,P01-370364-AM,RD341PMP-02-02-OUT,both,"Haq,Sohail",MEC
2,2019-11-25,5518599,266644,5301971,E1B100500639,M01-500639,2.0,M01-500639,RD341MFG-01-08,both,"Mohan,Inder",MEC
3,2019-11-26,5518599,266644,5301971,E1B100500639,M01-500639,8.0,M01-500639,RD341MFG-01-08,both,"Mohan,Inder",MEC
4,2019-11-27,5518599,266644,5301971,E1B100500639,M01-500639,9.0,M01-500639,RD341MFG-01-08,both,"Mohan,Inder",MEC
