In [1]:
# Import Library
import numpy as np
import pandas as pd
import os
import sqlalchemy as db
import mysql.connector
from mysql.connector import errorcode
from sqlalchemy import create_engine
import pymysql
import xlrd
from datetime import date, datetime

In [2]:
# Map the dataset
data_path1 = 'E:\\EASi\\5-Operations\\50-Management\\500-DataCentral\\3-Super User\\Development\\data_repository\\msp_data\\'

file_name1 = os.path.join(data_path1,'msp_data.xlsx')

In [3]:
df1 = pd.read_excel(file_name1, sheet_name='Resource', index_col=0, na_values=['NA'], usercols="A") # resource tab 
df2 = pd.read_excel(file_name1, sheet_name='Assignment', index_col=0, na_values=['NA'], usercols="A") # assignment tab 
df3 = pd.read_excel(file_name1, sheet_name='Project', index_col=0, na_values=['NA'], usercols="A") # project tab 
df4 = pd.read_excel(file_name1, sheet_name='Task', index_col=0, na_values=['NA'], usercols="A") # task tab 
df5 = pd.read_excel(file_name1, sheet_name='AssignmentBaseline', index_col=0, na_values=['NA'], usercols="A") # Assignment Baseline tab 

In [6]:
#-------------------- Data prep RESOURCES --------------------#
res_df = df1.loc[:,['ResourceGroup',
                    'TypeName',
                    'ResourceDepartments',
                    'DataCentralID',
                    'ResourceNature'
                   ]]

res_df.rename({'ResourceGroup':'res_group',
               'TypeName':'res_type',
               'ResourceDepartments':'res_dept',
               'DataCentralID':'easi_id',
               'ResourceNature':'res_nature'},axis=1, inplace=True)

# Resource filter 
res_filter = ['AMI','PDE','MMG','DVV','PROC','SES']
temp_res_df1 = res_df.loc[res_df['res_group'].isin(res_filter)]
temp_res_df2 = temp_res_df1.loc[temp_res_df1['easi_id'] != '0']

resource_df = temp_res_df2.reset_index()

resource_df.shape

(99, 6)

In [7]:
#-------------------- Data prep ASSIGNMENT --------------------#
assignment_df1 = df2.loc[:,['AssignmentId',
                            'ResourceId',
                            'TaskId'
                           ]]

assignment_df1.rename({'AssignmentId':'ass_id',
                       'ResourceId':'res_id',
                       'TaskId':'task_id'}, axis=1, inplace=True)

assignment_df = assignment_df1.reset_index() 
assignment_df.shape

(1770, 4)

In [10]:
#-------------------- Data prep PROJECTS --------------------#
proj_df = df3.loc[:,['ProjectName',
                     'SOWNb',
                     'ProjectActualFinishDate',
                     'year',
                     'CarryOverProject'
                    ]]
proj_df.rename({'ProjectName':'proj_name',
                'ResourceId':'res_id',
                'TaskId':'task_id',
                'SOWNb':'sow_no',
                'ProjectActualFinishDate':'end_date'
                  },axis=1, inplace=True)

proj_df['end_date'] = proj_df['end_date'].astype('datetime64[ns]')

project_filter = ['2020']

# Filter to project that has null ProjectActualFinishDate
proj_df = proj_df[proj_df.end_date.isnull()].reset_index()
project_df = proj_df.loc[proj_df['year'].isin(project_filter)]

project_df.shape

(48, 6)

In [11]:
#-------------------- Data prep TASKS --------------------#

task_df1 = df4.loc[:,['ParentTaskId',
                      'TaskId',
                      'ParentTaskName',
                      'TaskName',
                      'TaskOutlineLevel'                     
                     ]]

task_df1.rename({'TaskId':'task_id',
                 'ParentTaskId':'parent_task_id',
                 'ParentTaskName':'parent_task_name',
                 'TaskName':'task_name',
                 'TaskOutlineLevel':'outline_level'
                },axis=1, inplace=True)

task_df = task_df1.reset_index()
task_df.shape

(1709, 6)

In [12]:
#-------------------- Data prep Assignment Baseline --------------------#
ass_base_df1 = df5.loc[:,["AssignmentId",
                         "AssignmentBaselineCost",
                         "AssignmentBaselineWork",
                         "TaskId"]]

ass_base_df = ass_base_df1.reset_index()



(1273, 5)

In [13]:
# Combine data for the for resource-assignment-engagement
proj_task_df = task_df.merge(project_df, on='ProjectId', how='left') # one to many
proj_task_assignment_df = proj_task_df.merge(assignment_df, left_on='task_id', right_on='task_id', how='inner') 
all_df = proj_task_assignment_df.merge(resource_df, left_on='res_id', right_on='ResourceId', how='left')



((1709, 6), (48, 6), (1709, 11))

In [None]:
temp1_df = all_df.loc[:,['sow_no',
                         'outline_level',
                         'proj_name',
                         'parent_task_name',
                         'task_name',
                         'easi_id'
                        ]]
level_filter = ['1','2','3']

temp2_df = temp1_df[temp1_df.easi_id.notnull()]
temp3_df = temp2_df[temp2_df.sow_no.notnull()]
msp_data = temp3_df.loc[temp3_df['outline_level'].isin(level_filter)]

msp_data.sort_values(by=['sow_no'], inplace=True)

msp_data.rename({'sow_no':'project_no',
                 'outline_level':'level',
                 'proj_name':'project_title'},axis=1, inplace=True)

msp_data.shape


In [None]:
# # DB data replacement protocol
# try:
#     connection = mysql.connector.connect(host='10.140.9.93',
#                                          database='datacentralserver',
#                                          user='admin',
#                                          password='password')
    
# except mysql.connector.Error as err:
#     if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
#         print('Invalid credential. Unable to access database.')
#     elif err.errno == errorcode.ER_BAD_DB_ERROR:
#         print('Database does not exists')
#     else:
#         print('Failed to connect to database')

# try:
#     cursor = connection.cursor()
#     delete_query = "DELETE FROM msp_data"
#     cursor.execute(delete_query)
#     connection.commit()
#     print("Total rows deleted: %d" % cursor.rowcount)
    
    
# except mysql.connector.Error as err:
#     print("Error:", err.message)
#     connection.close()
    
# except:
#     print("Unknown error occured!")
#     connection.close()

# finally:
# #   cursor.close()
#     connection.close()

In [None]:
# # DB new dataset
# engine = db.create_engine('mysql+pymysql://admin:password@10.140.9.93:3306/datacentralserver', echo=True)
# msp_data.to_sql('msp_data',con=engine, if_exists='append',index=False)