In [1]:
import numpy as np
import pandas as pd
import os
import gc
import sqlalchemy as db
from sqlalchemy import create_engine
import pymysql
from datetime import date, datetime

In [4]:
# Identify Path for bill dataset
data_path1 = 'E:\\EASi\\5-Operations\\50-Management\\500-DataCentral\\2-Static Dashboard\\WIP\\data\\input\\'
file_name1 = os.path.join(data_path1,'bill.csv')
# Identify path for scl dataset
data_path2 = 'E:\\EASi\\5-Operations\\50-Management\\500-DataCentral\\2-Static Dashboard\\WIP\\data\\input\\'
file_name2 = os.path.join(data_path2,'scl.csv')

In [5]:
# Call dataset
df1 = pd.read_csv(file_name1)
df2 = pd.read_csv(file_name2)

In [6]:
# identify all required attributes
bill_df = df1.loc[:,['Customer Id',
                    'Employee ID',
                    'Employee Name',
                    'Proj Invoice Header Value1',
                    'WBS_Activity Id',
                    'Activity Descr',
                    'Resource Type',
                    'Accounting Status',
                    'Quantity_Unit',
                    'Bill Rate',
                    'Total Amount',
                    'Pay End Date'
                    ]]
scl_df = df2.loc[:,['Customer Id',
                    'Employee ID',
                    'Employee Name',
                    'Resource Type',
                    'Accounting Status',
                    'Quantity_Unit',
                    'WBS_Activity Id',
                    'Activity Descr',
                    'Pay End Date'
                    ]]

# Modify column name
bill_df.rename({'Customer Id':'client_id',
                'Employee ID':'easi_id',
               'Employee Name':'name',
               'Proj Invoice Header Value1':'sow_no',
               'WBS_Activity Id':'wbs_id',
               'Activity Descr':'description',
               'Resource Type':'res_type',
               'Accounting Status':'acc_status',
               'Quantity_Unit':'hours',
               'Bill Rate':'bill_rate',
               'Total Amount':'amount', 
               'Pay End Date':'weekending'},axis=1,inplace=True)

scl_df.rename({'Customer Id':'client_id',
               'Employee ID':'easi_id', 
               'Employee Name':'name', 
               'Resource Type':'res_type', 
               'Accounting Status':'acc_status',
               'Quantity_Unit':'hours',
               'WBS_Activity Id':'wbs_id',
               'Activity Descr':'act_desc',
               'Pay End Date':'weekending'},axis=1,inplace=True)

# Change data type as required
bill_df['weekending'] = bill_df['weekending'].astype('datetime64[ns]')
scl_df['weekending'] = scl_df['weekending'].astype('datetime64[ns]')

In [7]:
# filtering if required
res_type_hours  = ['LABOR','SUBCN']
acc_status_hours  = ['IEH','PAY']

res_type_amount  = ['LABOR','SUBCN','EQUIP']
acc_status_amount  = ['BIL','BLD','XRV']

hours_df = bill_df.loc[bill_df['res_type'].isin(res_type_hours)]
hours_df = bill_df.loc[bill_df['acc_status'].isin(acc_status_hours)]

scl_hours_df = scl_df.loc[scl_df['res_type'].isin(res_type_hours)]
scl_hours_df = scl_df[scl_df['acc_status'].isin(acc_status_hours)]

amount_df = bill_df.loc[bill_df['res_type'].isin(res_type_amount)]
amount_df = bill_df.loc[bill_df['acc_status'].isin(acc_status_amount)]


In [8]:
# drop internal people
scl_hours_df = scl_hours_df.drop(scl_hours_df[(scl_hours_df['easi_id'] == 6037624)|
                                              (scl_hours_df['easi_id'] == 6142088)|
                                              (scl_hours_df['easi_id'] == 6271423)|
                                              (scl_hours_df['easi_id'] == 6733494)|
                                              (scl_hours_df['easi_id'] == 6760845)].index)

In [67]:
# link dataset with data central
engine = db.create_engine('mysql+pymysql://admin:password@10.140.9.159:3306/datacentralserver', echo=True)
client_df = pd.read_sql_query("SELECT * FROM client",engine)
resource_df = pd.read_sql_query("SELECT EASiID as easi_id, Clientcode as client_code, Department as department FROM personnelt WHERE Active='Yes' AND ClientCode NOT IN(SELECT clientCode from personnelt WHERE clientcode IN('','Internal'))",engine)


2019-11-12 10:04:14,734 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'sql_mode'
2019-11-12 10:04:14,736 INFO sqlalchemy.engine.base.Engine {}
2019-11-12 10:04:14,743 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2019-11-12 10:04:14,745 INFO sqlalchemy.engine.base.Engine {}
2019-11-12 10:04:14,751 INFO sqlalchemy.engine.base.Engine SELECT DATABASE()
2019-11-12 10:04:14,752 INFO sqlalchemy.engine.base.Engine {}
2019-11-12 10:04:14,758 INFO sqlalchemy.engine.base.Engine show collation where `Charset` = 'utf8mb4' and `Collation` = 'utf8mb4_bin'
2019-11-12 10:04:14,760 INFO sqlalchemy.engine.base.Engine {}
2019-11-12 10:04:14,765 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS CHAR(60)) AS anon_1
2019-11-12 10:04:14,768 INFO sqlalchemy.engine.base.Engine {}
2019-11-12 10:04:14,771 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS CHAR(60)) AS anon_1
2019-11-12 10:04:14,773 INFO sqlalchemy.engine.base.E

In [86]:
# Join with client table in DC
combined_hours_df = hours_df.merge(client_df,on='client_id',how='left')
combined_amount_df = amount_df.merge(client_df,on='client_id',how='left')
combined_scl_df = scl_hours_df.merge(resource_df,on='easi_id',how='left')

# Group it by client ID - Provide quick summary based on hours or amount
hours_client_df = combined_hours_df.groupby('client_code')['hours'].sum()
amount_client_df = combined_amount_df.groupby('client_code')['amount'].sum() 
scl_client_act_df = combined_scl_df.groupby(('act_desc'))['hours'].sum()
scl_client_res_df = combined_scl_df.groupby(('client_code'))['hours'].sum()

temp_df = hours_client_df.reset_index().merge(amount_client_df.reset_index(), on='client_code', how='inner')
temp_df.rename({'hours':'bill'},axis=1, inplace=True)
td_df = temp_df.merge(scl_client_res_df.reset_index(), on='client_code', how='left')
td_df.rename({'hours':'nonbill', 
              'client_code':'client',
              'amount':'revenue'},axis=1, inplace=True)
td_df.fillna(0,inplace=True)
td_df.insert(4,'fring',0) 
td_df.insert(5,'weekending','2019-11-09') #-----Change to current weekending-----#

td_df
# once validated, insert to mysql with code below
# td_df.to_sql('time_dimension',con=engine, if_exists='append',index=False)

2019-11-12 10:16:20,076 INFO sqlalchemy.engine.base.Engine DESCRIBE `time_dimension`
2019-11-12 10:16:20,079 INFO sqlalchemy.engine.base.Engine {}
2019-11-12 10:16:20,089 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2019-11-12 10:16:20,091 INFO sqlalchemy.engine.base.Engine INSERT INTO time_dimension (client, bill, revenue, nonbill, fring, weekending) VALUES (%(client)s, %(bill)s, %(revenue)s, %(nonbill)s, %(fring)s, %(weekending)s)
2019-11-12 10:16:20,092 INFO sqlalchemy.engine.base.Engine ({'client': 'CAT', 'bill': 730.75, 'revenue': 63450.25, 'nonbill': 48.25, 'fring': 0, 'weekending': '2019-11-09'}, {'client': 'HAL', 'bill': 123.0, 'revenue': 11685.000000000004, 'nonbill': 0.0, 'fring': 0, 'weekending': '2019-11-09'}, {'client': 'HUT', 'bill': 57.0, 'revenue': 3375.0, 'nonbill': 3.0, 'fring': 0, 'weekending': '2019-11-09'}, {'client': 'PAR', 'bill': 82.0, 'revenue': 8920.0, 'nonbill': 0.0, 'fring': 0, 'weekending': '2019-11-09'}, {'client': 'SIE', 'bill': 2064.0, 'revenue': 

In [138]:
# perpare dataset to be inputed in DC
bill_info = combined_hours_df.loc[:,['easi_id',
                                     'sow_no',
                                     'wbs_id',
                                     'description',
                                     'client_code',
                                     'hours',
                                     'weekending']]

# once validated, insert to mysql with code below
bill_info.to_sql('bill',con=engine, if_exists='append',index=False)

2019-11-12 11:22:33,717 INFO sqlalchemy.engine.base.Engine DESCRIBE `bill`
2019-11-12 11:22:33,719 INFO sqlalchemy.engine.base.Engine {}
2019-11-12 11:22:33,727 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2019-11-12 11:22:33,734 INFO sqlalchemy.engine.base.Engine INSERT INTO bill (easi_id, sow_no, wbs_id, description, client_code, hours, weekending) VALUES (%(easi_id)s, %(sow_no)s, %(wbs_id)s, %(description)s, %(client_code)s, %(hours)s, %(weekending)s)
2019-11-12 11:22:33,735 INFO sqlalchemy.engine.base.Engine ({'easi_id': 5499555, 'sow_no': '19006', 'wbs_id': 'P02-19006-QA', 'description': 'QA & Automated Test', 'client_code': 'CAT', 'hours': 8.0, 'weekending': datetime.datetime(2019, 11, 2, 0, 0)}, {'easi_id': 5499555, 'sow_no': '19006', 'wbs_id': 'P02-19006-QA', 'description': 'QA & Automated Test', 'client_code': 'CAT', 'hours': 8.0, 'weekending': datetime.datetime(2019, 11, 2, 0, 0)}, {'easi_id': 5668372, 'sow_no': '19006', 'wbs_id': 'P04-19006-TEC', 'description': 'Techi

In [114]:
scl_info = combined_scl_df.loc[:,['easi_id', 
                                 'hours', 
                                 'wbs_id', 
                                 'act_desc',
                                 'weekending']]
scl_info.shape
scl_info.head()

# once validated, insert to mysql with code below
# scl_info.to_sql('scl',con=engine, if_exists='append',index=False)


Unnamed: 0,easi_id,hours,wbs_id,act_desc,weekending
0,4563307,4.0,SCL-16-2,Training,2019-11-02
1,4847893,0.5,SCL-16-2,Training,2019-11-02
2,4847893,0.75,SCL-16-2,Training,2019-11-02
3,5342274,4.0,SCL-16-3,Bench/Idle Time,2019-11-02
4,5518599,1.5,SCL-16-2,Training,2019-11-02
