# GSO Comparison ##

In [1]:
#!pip install lxml
#!pip install datetime
from lxml import etree as ET
from datetime import datetime
import pandas as pd
import os

dir_workspace = os.path.join(os.getcwd(),'workspace_GSO_Comparison')
print(dir_workspace)

D:\GIT_Repositories\DevFactory\JupyterNoteBook_Dev\workspace_GSO_Comparison


#### Function : get_single_element_dictionary  ####
* To get the dictionary of given element string with ID as KEY

In [2]:
def get_single_element_dictionary(tree, main_element):
    dict_single = {}
    for sub_element in tree.findall(".//" + main_element):
        element_attrib = sub_element.attrib
        dict_single[element_attrib.get("id")] = element_attrib
    return dict_single

#### Function : get_gso_dictionary  ####
* To get the dictionary of all elements with element as KEY

In [3]:
def get_gso_dictionary(filename, list_elements):
    dict_gso = {}    
    tree = ET.parse(filename)
    root = tree.getroot()
    if root.tag == "entities":
        for single_element in list_elements:
            dict_gso[single_element] = get_single_element_dictionary(tree, single_element)
    return dict_gso

In [4]:
def convert_lxml_to_dict(lxml_dict):
    dict_out = {}
    for key in lxml_dict:
        dict_out[key] = lxml_dict[key]
    return dict_out

#### Function : get_missing_value  ####
* To get the missing key value from both the gso dictionary after comparing both

In [5]:
def compare(dict_gso_1, dict_gso_2):
    list_result = []
    list_summary = []
    for (dict1_key,dict1_value), (dict2_key,dict2_value) in zip(dict_gso_1.items(), dict_gso_2.items()):
       
        set_id = set(dict1_value)-set(dict2_value)
        list_left_extra = [convert_lxml_to_dict(dict1_value[x]) for x in set_id]
        set_id = set(dict2_value)-set(dict1_value)
        list_right_extra = [convert_lxml_to_dict(dict2_value[x]) for x in set_id]
         
        dict_result = {}
        dict_result['ELEMENT'] = dict1_key
        dict_result['LEFT_EXTRA'] = list_left_extra
        dict_result['RIGHT_EXTRA'] = list_right_extra
        list_result.append(dict_result)
        
        dict_summary = {}
        dict_summary['ELEMENT'] = dict1_key
        dict_summary['LEFT_EXTRA_COUNT'] = len(list_left_extra)
        dict_summary['RIGHT_EXTRA_COUNT'] = len(list_right_extra)
        list_summary.append(dict_summary)
        
    return list_summary,list_result

#### Main ####

In [6]:
list_elements = ['entiyType','beField','dataGroup','relation','occurrence','mapping','businessEntity',
                 'completeness','entityRelation','naturalKey','orderByParticipant']
file_name_1 = os.path.join(dir_workspace,'GSO Repository.gso')
file_name_2 = os.path.join(dir_workspace,'GSO Repository1.gso')

print("## INFO : List of Elements :- ")
print(*list_elements, sep = ", ")

print("## INFO : Generating dictionary for " + file_name_1)
before = datetime.now()
dict_gso_1 = get_gso_dictionary (file_name_1,list_elements)
print("## INFO : Successfully generated dictionary in " + str((datetime.now()-before).total_seconds()) + " seconds")

print("## INFO : Generating dictionary for " + file_name_2)
before = datetime.now()
dict_gso_2 = get_gso_dictionary (file_name_2,list_elements)
print("## INFO : Successfully generated dictionary in " + str((datetime.now()-before).total_seconds()) + " seconds")



## INFO : List of Elements :- 
entiyType, beField, dataGroup, relation, occurrence, mapping, businessEntity, completeness, entityRelation, naturalKey, orderByParticipant
## INFO : Generating dictionary for D:\GIT_Repositories\DevFactory\JupyterNoteBook_Dev\workspace_GSO_Comparison\GSO Repository.gso
## INFO : Successfully generated dictionary in 2.184968 seconds
## INFO : Generating dictionary for D:\GIT_Repositories\DevFactory\JupyterNoteBook_Dev\workspace_GSO_Comparison\GSO Repository1.gso
## INFO : Successfully generated dictionary in 2.124806 seconds


In [7]:
list_summary,list_result = compare(dict_gso_1,dict_gso_2)
df = pd.DataFrame(list_summary)
df

Unnamed: 0,ELEMENT,LEFT_EXTRA_COUNT,RIGHT_EXTRA_COUNT
0,entiyType,1,1
1,beField,0,1
2,dataGroup,0,0
3,relation,0,0
4,occurrence,0,0
5,mapping,1,0
6,businessEntity,0,0
7,completeness,0,0
8,entityRelation,0,0
9,naturalKey,0,0


In [8]:
entity_type =  [x for x in list_result if x['ELEMENT'] == 'entiyType'][0]
df1 = pd.DataFrame(entity_type['LEFT_EXTRA'])
df1


Unnamed: 0,desc,id,mainTable,name,nameSpace
0,Generated default type for table FT_T_ACCT,BGACCT000000000X,FT_T_ACCT,Account,GSC


In [9]:
df2 = pd.DataFrame(entity_type['RIGHT_EXTRA'])
df2

Unnamed: 0,desc,id,mainTable,name,nameSpace
0,Generated default type for table FT_T_ACCT,BGACCT000000000Y,FT_T_ACCT,Account,GSC
