In [14]:
import pandas as pd
import numpy as np
import json
import os
import urllib3
import csv
import logging
import re
import uuid
from pathlib import Path
from idmc_utils import generate_taskflow, package_import
from urllib.parse import quote

# Set the pandas options
pd.set_option('display.max_columns', None)

# Initialise the job

In [15]:
# Initialise the log file
logging.basicConfig(
    filename='logs/console.log',
    level=logging.DEBUG,
    format='%(asctime)s:%(levelname)s:%(message)s',
)

In [16]:
# Read the config file
logging.info('Reading the config file')
with open('config/config.json', 'r') as infile:
    config = json.load(infile)


# Read the Inputs

In [17]:
# Read the execution plans
logging.info('Reading the execution plans')
dfPlans = pd.read_csv('in/plans.csv', dtype='str', encoding='utf-8', na_filter=False)
dfPlans['plan_step_order'] = dfPlans['plan_step_order'].astype(int)

# Lookup the Converted Mapping Task IDs

In [18]:
# Login to IDMC
logging.info('Logging into IDMC')
http = urllib3.PoolManager()

data = '{ "username": "' + config['idmc']['user'] + '", "password": "' + config['idmc']['password'] + '" }'

url = 'https://' + config['idmc']['host'] + '/saas/public/core/v3/login'
r = http.request(
    'POST', 
    url,
    timeout=3000,
    body=data,
    headers={
            'Accept': 'application/json',
            'Content-Type': 'application/json'
        }
    )
    
# Convert the response into a datframe
result = json.loads(r.data.decode('utf-8'))
sessionID = result['userInfo']['sessionId']


In [19]:
# Get the secure agent ID
logging.info('Getting the secure agent ID')
url = 'https://' + config['idmc']['pod'] + '.' + config['idmc']['host'] + '/saas/api/v2/runtimeEnvironment/name/' + quote(config['idmc']['agentGroupName'])
r = http.request(
    'GET', 
    url,
    timeout=3000,
    headers={
            'Accept': 'application/json',
            'icSessionId': sessionID
        }
    )
    
# Convert the response into a datframe
result = json.loads(r.data.decode('utf-8'))
agentGroupID = result['id']
agentGroupGUID = result['federatedId']
agentGroupName = config['idmc']['agentGroupName']

In [20]:
# Initialise the tasks data frame
logging.info('Getting a list of the mapping tasks')
skip = 0
limit = 200
i = 0
dfTasks = pd.DataFrame()

# Page through mapping task queries
while True:

    # Get a list of the mapping tasks
    url = 'https://' + config['idmc']['pod'] + '.' + config['idmc']['host'] + '/saas/public/core/v3/objects?q=type==%27MTT%27&limit=' + str(limit) + '&skip=' + str(skip)
    r = http.request(
        'GET', 
        url,
        timeout=3000,
        headers={
                'Accept': 'application/json',
                'INFA-SESSION-ID': sessionID
            }
        )
        
    # Convert the response into a datframe
    result = json.loads(r.data.decode('utf-8'))
    dfTmp = pd.json_normalize(result)
    dfResp = dfTmp.copy()
    taskCount = dfResp.iloc[0]['count']
    dfResp = dfResp['objects'].explode()
    dfResp = pd.DataFrame(dfResp)
    dfResp = pd.json_normalize(dfResp['objects'])
    dfTasks = pd.concat([dfTasks, dfResp], ignore_index=True)

    # Break if all records have been returned
    i = i + limit
    if i > taskCount:
        break



In [21]:
# Join the IDMC info onto the plans
dfResp['step_name'] = dfResp['path'].apply(lambda x: os.path.basename(x))
dfResp = dfResp[['step_name','id','path']]
dfResp = dfResp.rename(columns={'id': 'infa_id', 'path': 'infa_path'})
dfPlans = dfPlans.merge(dfResp, how='left', on='step_name')
dfPlans['agent_id'] = agentGroupID
dfPlans['agent_guid'] = agentGroupGUID
dfPlans['agent_name'] = agentGroupName
dfPlans['script_dir'] = config['local']['scriptsDir']
dfPlans['script_args'] = '' # TODO placeholder for any args that need to be passed to the step script

In [22]:
# Generate the unique identifiers
dfPlans['dac2idmc_step_id'] = dfPlans.apply(lambda x: str(uuid.uuid4()).replace('-',''), axis=1)
map_order_uuid = { step_order: str(uuid.uuid4()).replace('-','') for step_order in dfPlans['plan_step_order'].unique() }
dfPlans['dac2idmc_group_id'] = dfPlans['plan_step_order'].map(map_order_uuid)


In [23]:
dfPlans.head()

Unnamed: 0,plan_wid,plan_name,plan_inactive_flag,plan_step_wid,plan_step_order,plan_step_type,step_guid,step_wid,step_cmd,step_name,infa_id,infa_path,agent_id,agent_guid,agent_name,script_dir,script_args,dac2idmc_step_id,dac2idmc_group_id
0,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,4F418CAB76FCD515C5A7F3C94D552CF,0,REGULAR,4F418CAB76FCD515C5A7F3C94D552CF,1911f67f35d9a487283f503fc7ab2ac,SDE_ORA_EmployeeDailySnapshotFact_2,SDE_ORA_EmployeeDailySnapshotFact_2,a0Ks8uNXYKLg38LRNzw6gv,Default/SDE_ORA_EmployeeDailySnapshotFact_2,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,e485876319d848c59df09187c89b40fd,b4871a946a6344129827e45c9d8b4157
1,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,D1A6418BA9B721A3282ECC5948E8866D,0,REGULAR,,15cf56791e1279889d61b836271199a,SDE_ORA_EmployeeDailySnapshotFact_3,SDE_ORA_EmployeeDailySnapshotFact_3,8gORkCCxfM8hk1VxABRRqd,Default/SDE_ORA_EmployeeDailySnapshotFact_3,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,94b84ec8dfbf439082e2f9b7b0e8806c,b4871a946a6344129827e45c9d8b4157
2,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,C4FA37D5638DC49425B9A3E8572EF1,0,REGULAR,,17d561355624ced1967afc8fb7a1836e,SDE_ORA_EmployeeDailySnapshotFact_4,SDE_ORA_EmployeeDailySnapshotFact_4,6vPlMGJ7sw6juVvUtKpxYq,Default/SDE_ORA_EmployeeDailySnapshotFact_4,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,b47346a9bb4043d2835baa7beffcb05c,b4871a946a6344129827e45c9d8b4157
3,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,C1CB82CC89178BE3424CEE1896ED,0,REGULAR,,88818EC577E3BDEA5FAF5D56ED6E8442,,TASK_GROUP_Extract_EmployeeDailySnapshotFact_P...,7PjDO1zxryekytQ7Oh1zEP,Default/TASK_GROUP_Extract_EmployeeDailySnapsh...,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,ca6c4589c49844019d1c1c16fea5285d,b4871a946a6344129827e45c9d8b4157
4,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,C9EA5C489D4F8CE8B019A694E906B6B,1,REGULAR,,6bcdbdd6812a9ea517908ca566436fb,SDE_ORA_EmployeeDailySnapshotFact_1,SDE_ORA_EmployeeDailySnapshotFact_1,aeOQ6VugmCxipbYyKlDRUr,Default/SDE_ORA_EmployeeDailySnapshotFact_1,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,e39c5cb55faa4b049019d848968e28f4,98bbaa99986540bbadbb8194a49a8853


In [24]:
# Log an error if any plans did not find an existing matching task
dfMissing = dfPlans[(dfPlans['infa_id'] == '') | (dfPlans['infa_id'].isna())].copy()
if len(dfMissing.index) > 0:
    logging.error('Some plans are missing a converted mapping task. Please see "out/missing_tasks.csv" for more details')
    dfMissing.to_csv('out/missing_tasks.csv', index=False, quoting=csv.QUOTE_ALL)

# Generate the Taskflows

In [None]:
#TODO Delete this after initial testing is finished
dfPlans = dfPlans.head(1)
dfPlans

plan_wid                         201C9CC7C59D167A79A2E247C6189A67
plan_name                   Echo Employee Snapshot Oracle R12.1.3
plan_inactive_flag                                              N
plan_step_wid                     4F418CAB76FCD515C5A7F3C94D552CF
plan_step_order                                                 0
plan_step_type                                            REGULAR
step_guid                         4F418CAB76FCD515C5A7F3C94D552CF
step_wid                          1911f67f35d9a487283f503fc7ab2ac
step_cmd                      SDE_ORA_EmployeeDailySnapshotFact_2
step_name                     SDE_ORA_EmployeeDailySnapshotFact_2
infa_id                                    a0Ks8uNXYKLg38LRNzw6gv
infa_path             Default/SDE_ORA_EmployeeDailySnapshotFact_2
agent_id                                     010SU125000000000002
agent_guid                                 cPbb2XLzpoweqfBP9W6kOJ
agent_name                                         AUW487V7S3-AAD
script_dir

In [26]:
#TODO add loop through execution plans
planIds = dfPlans['plan_wid'].unique()

for planId in planIds:

    # Get the plan for the current id
    dfPlan = dfPlans[dfPlans['plan_wid'] == planId].copy()

    # Generate the taskflow ID
    taskflowID = str(uuid.uuid4()).replace('-','')
    #TODO replace taskflowName with plan name
    taskflowName = dfPlan.iloc[0]['plan_name']
    taskflowName = re.sub(r'\W+', '_', taskflowName)

    logging.info(f'Create workspace tree "tmp/{ taskflowName }/Explore/Default"')

    # Create the workspace directories
    treePath = Path(f'tmp/{ taskflowName }/Explore/Default')
    treePath.mkdir(parents=True, exist_ok=True)

    logging.info(f'Generating taskflow "{ taskflowName }"')
    generate_taskflow.generate_taskflow(taskflowID, taskflowName, dfPlan)

    logging.info(f'Packaging taskflow "{ taskflowName }"')
    package_import.package_import(taskflowID, taskflowName)

    logging.info(f'Done!')

AttributeError: 'str' object has no attribute 'unique'

In [None]:
dfPlans.sort_values('plan_step_order')

Unnamed: 0,plan_wid,plan_name,plan_inactive_flag,plan_step_wid,plan_step_order,plan_step_type,step_guid,step_wid,step_cmd,step_name,infa_id,infa_path,agent_id,agent_guid,agent_name,script_dir,script_args,dac2idmc_step_id,dac2idmc_group_id
0,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,4F418CAB76FCD515C5A7F3C94D552CF,0,REGULAR,4F418CAB76FCD515C5A7F3C94D552CF,1911f67f35d9a487283f503fc7ab2ac,SDE_ORA_EmployeeDailySnapshotFact_2,SDE_ORA_EmployeeDailySnapshotFact_2,a0Ks8uNXYKLg38LRNzw6gv,Default/SDE_ORA_EmployeeDailySnapshotFact_2,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,0031048a41b7438499fd483631446e41,69d57dbd8b5847c9871804fc647a0b80
1,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,D1A6418BA9B721A3282ECC5948E8866D,0,REGULAR,,15cf56791e1279889d61b836271199a,SDE_ORA_EmployeeDailySnapshotFact_3,SDE_ORA_EmployeeDailySnapshotFact_3,8gORkCCxfM8hk1VxABRRqd,Default/SDE_ORA_EmployeeDailySnapshotFact_3,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,20fbaa4d0b714e48ab5bbe573409d9a2,69d57dbd8b5847c9871804fc647a0b80
2,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,C4FA37D5638DC49425B9A3E8572EF1,0,REGULAR,,17d561355624ced1967afc8fb7a1836e,SDE_ORA_EmployeeDailySnapshotFact_4,SDE_ORA_EmployeeDailySnapshotFact_4,6vPlMGJ7sw6juVvUtKpxYq,Default/SDE_ORA_EmployeeDailySnapshotFact_4,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,8fd391fbdb4e4446923ba049883de427,69d57dbd8b5847c9871804fc647a0b80
3,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,C1CB82CC89178BE3424CEE1896ED,0,REGULAR,,88818EC577E3BDEA5FAF5D56ED6E8442,,TASK_GROUP_Extract_EmployeeDailySnapshotFact_P...,7PjDO1zxryekytQ7Oh1zEP,Default/TASK_GROUP_Extract_EmployeeDailySnapsh...,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,7908441a5fc2442cb4e6f79e30c1c9fe,69d57dbd8b5847c9871804fc647a0b80
4,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,C9EA5C489D4F8CE8B019A694E906B6B,1,REGULAR,,6bcdbdd6812a9ea517908ca566436fb,SDE_ORA_EmployeeDailySnapshotFact_1,SDE_ORA_EmployeeDailySnapshotFact_1,aeOQ6VugmCxipbYyKlDRUr,Default/SDE_ORA_EmployeeDailySnapshotFact_1,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,55c830e738154a92bcbdb9a34fd65e84,eec04ab5f1d64e6bbab2f5eeaddf6094
5,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,743C7FED4E93373BF741E35ACCC2FB,1,REGULAR,,3e589d7525743f1159f9aef6d26dfb6d,SDE_ORA_EmployeeDailySnapshotFact_PerformanceR...,SDE_ORA_EmployeeDailySnapshotFact_PerformanceR...,bspgWgKqNAAkdFCD8X1EGw,Default/SDE_ORA_EmployeeDailySnapshotFact_Perf...,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,c9e6b652ceee406e95e4c6aa0da36cf4,eec04ab5f1d64e6bbab2f5eeaddf6094
6,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,352497EEA2C3B9CC108AA6D1C1D461,2,REGULAR,,cddaf55424eff460187a8840ba81641,SDE_ORA_EmployeeDailySnapshotFact_1_Update,SDE_ORA_EmployeeDailySnapshotFact_1_Update,980nZ1ZyECqlJELIoT6seu,Default/SDE_ORA_EmployeeDailySnapshotFact_1_Up...,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,de50f7b102f74ee2843fe8b12cd7d484,12d8edf413814618bb25852f13c5d146
7,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,453CB6AF9425B92F9830964A1BD6B2F8,3,REGULAR,,8049dfe7391b73f4b03f91685796a525,SDE_ORA_EmployeeDailySnapshotFact,SDE_ORA_EmployeeDailySnapshotFact,48EkOr6iGA3jPFoSNlYhmt,Default/SDE_ORA_EmployeeDailySnapshotFact,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,1215773c75e14064a092d0d2e58506ec,b7844c90b25140439a02be0c217907e5
8,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,9228D527E27845B78E5B48C8AEAE7DFE,4,REGULAR,,66477cb62a1282f18536b1d9f122ee,SIL_EmployeeDailySnapshotFact,SIL_EmployeeDailySnapshotFact,hrqZyoLnJWAbb4e5QoamgT,Default/SIL_EmployeeDailySnapshotFact,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,1112787d61b749479410c0673835cd00,1cc966acaace49e1b692ca5656f0d27c
9,201C9CC7C59D167A79A2E247C6189A67,Echo Employee Snapshot Oracle R12.1.3,N,5A2C48922673BCBD13A6AC9FD9D5706E,5,REGULAR,,bbaaf446a81097abce5777bc8e2b3758,PLP_EmployeeDailySnapshot_Trim,PLP_EmployeeDailySnapshot_Trim,bacDD1nRpeZcyoy0ts7ZqA,Default/PLP_EmployeeDailySnapshot_Trim,010SU125000000000002,cPbb2XLzpoweqfBP9W6kOJ,AUW487V7S3-AAD,C:\Informatica\scripts,,980de111f312458aaf6ad427fb67522c,7a36bab5ef154f1d931e5ef8534c1591
