In [1]:
# Import library
import numpy as np 
import pandas as pd 
import os 
import glob 
import sys 
from sqlalchemy import create_engine 
import sqlalchemy as db 

from fetcher.async_fetcher import FetchMSPDataAsync 

In [2]:
MSP_ROOT_URL = "https://allegiscloud.sharepoint.com"
MSP_USERNAME = "fwidjojoputra@easi.com"
MSP_PASS = "Dart23499"

if __name__ == "__main__":
    
    table_df_dict = FetchMSPDataAsync(
            [MSP_ROOT_URL, MSP_USERNAME, MSP_PASS],
            [
                "Projects"
#                 "Tasks",
#                 "Assignments",
#                 "Resources"
#                 "ResourceTimephasedDataSet",
#                 "ResourceDemandTimephasedDataSet",
#                 "AssignmentTimephasedDataSet",
#                 "AssignmentBaselineTimephasedDataSet",
#                 "ProjectBaselines"            
            ],
        ) 

------- Getting available fields in MSP Project Online -------


UnboundLocalError: local variable 'l' referenced before assignment

In [None]:
# Fetch dataframe
prj_df = table_df_dict['Projects']
tsk_df = table_df_dict['Tasks']
ass_df = table_df_dict['Assignments']
res_df = table_df_dict['Resources']
rt_df = table_df_dict['ResourceTimephasedDataSet']
rdt_df = table_df_dict['ResourceDemandTimephasedDataSet'] # May not need this
ast_df = table_df_dict['AssignmentTimephasedDataSet']
asbt_df = table_df_dict['AssignmentBaselineTimephasedDataSet']
prjb_df = table_df_dict['ProjectBaselines']

In [None]:
prj_df.shape, res_df.shape, ass_df.shape, tsk_df.shape, rdt_df.shape, rt_df.shape, ast_df.shape

In [None]:
# Filter active vs unactive project
prj_df = prj_df[prj_df.ProjectActualFinishDate.isnull()]
res_df = res_df[res_df['DataCentralID'].apply(lambda x: len(str(x)) == 8)]

In [None]:
# MAIN DATAFRAME
temp1_df = prj_df.merge(tsk_df, left_on='ProjectId', right_on='ProjectId', how='right') # This is for project and task dataset
temp2_df = temp1_df.merge(ass_df, left_on='TaskId', right_on='TaskId', how='inner')     # join to assignment df
temp3_df = temp2_df.loc[temp2_df['ResourceId'].isin(res_df['ResourceId'])]

nature_filter = ['Actual Work','Potential Work','At risk Work']
temp4_df = temp3_df.loc[temp3_df['ProjectNature'].isin(nature_filter)]
temp5_df = temp4_df.loc[:,['ResourceId',
                           'ProjectNature']]

temp5_df.rename({'ProjectNature':'work_nature'}, axis=1, inplace=True)

In [None]:
# CAPACITY DATASET
rt_temp_df = rt_df.loc[rt_df['ResourceId'].isin(res_df['ResourceId'])]
res_capacity_df = rt_temp_df.merge(res_df, left_on=['ResourceId','ResourceName','ResourceModifiedDate'], right_on=['ResourceId','ResourceName','ResourceModifiedDate'], how='inner')

# DEMAND DATASET
rdt_temp_df = rdt_df.loc[rdt_df['ResourceId'].isin(res_df['ResourceId'])]
res_demand_df = rdt_temp_df.merge(res_df, left_on=['ResourceId','ResourceName'], right_on=['ResourceId','ResourceName'], how='inner')

# MERGE LOAD & CAPACITY
combined_df = res_capacity_df.merge(res_demand_df, on=['ResourceId','TimeByDay'], how='inner')

# DATA LOC for processing
dem_cap_df = combined_df.loc[:,['ResourceId',
                                'DataCentralID_x',
                                'TimeByDay',
                                'Capacity',
                                'ResourceDemand']]
dem_cap_df.rename({'DataCentralID_x':'easi_id',
                   'TimeByDay':'time',
                   'Capacity':'capacity',
                   'ResourceDemand':'demand'},axis=1,inplace=True)
dem_cap_df.dropna()
dem_cap_df.shape

In [None]:
# COMBINED MAIN DATAFRAME & LOAD CAP DATAFRAME
x = dem_cap_df.merge(temp5_df, on='ResourceId', how='inner')

# Change datatypes for optmization
x['easi_id'] = x['easi_id'].astype('int64')
x['demand'] = x['demand'].astype('float16')
x['capacity'] = x['capacity'].astype('float16')
x['time'] = x['time'].astype('datetime64[ns]')

In [None]:
# CALL DATASET FROM DB
engine = db.create_engine('mysql+pymysql://mikeez:maikil@10.140.9.15:3306/dc_dev', echo=False)
res_db_df = pd.read_sql_query("SELECT easi_id, first_name, last_name, team, base FROM resources", engine)
team_db_df = pd.read_sql_query("SELECT * FROM teams", engine)
dep_db_df =  pd.read_sql_query("SELECT * FROM departments", engine)

# Data prep
db1_df = res_db_df.merge(team_db_df, left_on='team', right_on='id', how='left')
db2_df = db1_df.merge(dep_db_df, left_on='department',right_on='id', how='left')
db2_df['full_name'] = db2_df['last_name'] + "," + db2_df['first_name']
db3_df = db2_df.loc[:,['easi_id',
                      'full_name',
                      'name_x',
                      'name_y'
                      ]]
db3_df.rename({'name_x':'team',
               'name_y':'department'}, axis=1, inplace=True)

res_db_fil = ['MEC','SES']
db4_df = db3_df.loc[db3_df['department'].isin(res_db_fil)]

In [None]:
# JOIN WITH DB DATAFRAME
x1 = x.merge(db4_df, left_on='easi_id', right_on='easi_id', how='inner')

x2 = x1.loc[:,['work_nature',
               'demand',
               'capacity',
               'time',
               'full_name',
               'team',
               'department']]

In [None]:
# FILTER OUT EXPIRED DATASET
end_df = x2.loc[x2['time'] > '2019-12-31']

In [None]:
# Split into 3 dataframes depending on work nature
end_df['time'] = pd.to_datetime(end_df['time']).apply(lambda x: '{year}-{month}'.format(year=x.year, month=x.month))

act_filter = ['Actual Work']
pot_filter = ['Potential Work']
ris_filter = ['At risk Work']

act_df = end_df.loc[end_df['work_nature'].isin(act_filter)]
pot_df = end_df.loc[end_df['work_nature'].isin(pot_filter)]
ris_df = end_df.loc[end_df['work_nature'].isin(ris_filter)]


In [None]:
# Group by the time and name
act_agg_df = act_df.groupby(['full_name','time'])['demand','capacity'].sum().reset_index()
pot_agg_df = pot_df.groupby(['full_name','time'])['demand','capacity'].sum().reset_index()
ris_agg_df = ris_df.groupby(['full_name','time'])['demand','capacity'].sum().reset_index()

In [None]:
act_agg_df.shape

In [None]:
end_df['time'] = pd.to_datetime(end_df['time']).apply(lambda x: '{year}-{month}'.format(year=x.year, month=x.month))
test = end_df.groupby(['full_name','time'])['demand','capacity'].sum().reset_index()
test.head()

In [None]:
# INJECT TO DB
# end_df.to_sql('timephases',con=engine, if_exists='append', index=False)

In [None]:
#---------------------- TO BE TESTED----------------------#
#------------- TEST METHOD SPLIT per dataset -------------#
#----------------------TO BE TESTED----------------------#
res_cap_df = res_capacity_df.loc[:,['ResourceId',
                                     'Capacity',
                                     'TimeByDay',
                                     'DataCentralID'
                                    ]]

res_cap_df.rename({'Capacity':'capacity',
                    'TimeByDay':'time',
                    'DataCentralID':'easi_id'}, axis=1, inplace=True)


res_cap_df.shape

In [None]:
temp5_df.loc[temp5_df['ResourceId']== 'ee4cf8ec-b0de-e811-9640-d4258bdae0ca']


In [None]:
# merge with main df
z = res_cap_df.merge(temp5_df, on='ResourceId', how='inner')

# convert dataset
z['easi_id'] = z['easi_id'].astype('int64')
z['capacity'] = z['capacity'].astype('float16')
z['time'] = z['time'].astype('datetime64[ns]')
z['capacity'] = z['capacity'].apply(lambda x: round(x,1))

# JOIN WITH DB DATAFRAME
z1 = z.merge(db4_df, left_on='easi_id', right_on='easi_id', how='inner')

z2 = z1.loc[:,['work_nature',
               'capacity',
               'time',
               'full_name',
               'team',
               'department']]


z2['time'] = pd.to_datetime(z['time']).apply(lambda x: '{year}-{month}'.format(year=x.year, month=x.month))

z2_act_filter = ['Actual Work']
z2_pot_filter = ['Potential Work']
z2_ris_filter = ['At risk Work']

z2_act_df = z2.loc[z2['work_nature'].isin(z2_act_filter)]
z2_pot_df = z2.loc[z2['work_nature'].isin(z2_pot_filter)]
z2_ris_df = z2.loc[z2['work_nature'].isin(z2_ris_filter)]

z2_act_df = z2_act_df.loc[z2_act_df['time'] > '2020-00']


In [None]:
z3 = z2_act_df.loc[z2_act_df['time'] > '2020-00']
