In [24]:

import pandas as pd
import datacompy
import numpy as np
import os
from itertools import combinations
import re


COGNOS_TO_LOOKER = {
    'Service Scope Group': 'Service Scope Group Code',
    'Dominant Leg Flag': 'Is Dominant (Yes / No)',
    'POR Location Code': 'POR Location Code',
    'SC/RFA Number': 'Contract Number',
    'SC/RFA Contract Type': 'Contract Type Name',
    'SC/RFA Group Customer Code': 'Contract Group Customer Code',
    'SC/RFA Group Customer Name': 'Contract Group Customer Name',
    'TEU Quantity':'Total TEU'
}

yes_no_map = {
    'N': 'No',
    'Y': 'Yes'
}

IS_DOMINANT_TEXT='Is Dominant (Yes / No)'

def create_diff_df(df_merged, columns):
    conditions = []
    for column in columns:
        data_cognos = df_merged[f"{column}_cognos"]
        data_looker = df_merged[f"{column}_looker"]
        if data_cognos.dtype== np.float64:
            data_cognos = data_cognos.round(2)
            data_looker = data_looker.round(2)
            condition = np.isclose(data_cognos, data_looker, atol=0.1)
        elif data_cognos.dtype== np.datetime64:
            rounded_cognos = data_cognos.dt.round('S')
            rounded_looker = data_looker.dt.round('S')
            condition = (rounded_cognos == rounded_looker)
        elif data_cognos.dtype== object:
            condition = (data_cognos.str.upper() == data_looker.str.upper())
        else:
            continue
        conditions.append(condition)
    combined_condition = np.all(conditions, axis=0)
    diff_df = df_merged[~combined_condition].copy()
    matching_df = df_merged[combined_condition].copy()
    diff_df = diff_df.applymap(lambda x: 'Not exists' if pd.isnull(x) else x)
    matching_df = matching_df.applymap(lambda x: 'Not exists' if pd.isnull(x) else x)
    return matching_df, diff_df

def filter_and_return_dataframes(df):
    df_missing_cognos = df.loc[df.filter(like='_cognos').eq('Not exists').all(axis=1)]
    df_missing_looker = df.loc[df.filter(like='_looker').eq('Not exists').all(axis=1)]
    df_remaining_diff = df.drop(df_missing_cognos.index).drop(df_missing_looker.index)
    return df_missing_cognos, df_missing_looker, df_remaining_diff.reset_index()

def split_dataframe(df):
    cols_df_cognos = [col for col in df.columns if not col.endswith("_looker")]
    df_cognos_new = df[cols_df_cognos]
    cols_df_looker = [col for col in df.columns if not col.endswith("_cognos")]
    df_looker_new = df[cols_df_looker]
    return df_cognos_new, df_looker_new

def compare_and_style_data(diff_df, keys, suffix_cognos, suffix_looker):
    def highlight_cells(val):
        if val == 'Same':
            return ''
        else:
            return 'background-color: red; color: white;'
    common_prefixes = set(col.split(suffix_cognos)[0] for col in diff_df.columns[2:])
    for prefix in common_prefixes:
        cognos_cols = [col for col in diff_df.columns if col.startswith(f'{prefix}{suffix_cognos}')]
        looker_cols = [col for col in diff_df.columns if col.startswith(f'{prefix}{suffix_looker}')]
        for cognos_col, looker_col in zip(cognos_cols, looker_cols):
            column_name = f'Compare_{prefix}_(Cognos <> Looker)'
            data_type = diff_df[cognos_col].dtype
            if data_type == np.float64:
                diff_df[cognos_col] = diff_df[cognos_col].round(2)
                diff_df[looker_col] = diff_df[looker_col].round(2)
                condition = np.isclose(diff_df[cognos_col], diff_df[looker_col], atol=0.1)
            elif data_type == np.datetime64:
                rounded_cognos = diff_df[cognos_col].dt.round('S')
                rounded_looker = diff_df[looker_col].dt.round('S')
                condition = (rounded_cognos == rounded_looker)
            elif data_type == object:
                condition = (diff_df[cognos_col].str.upper() == diff_df[looker_col].str.upper())
            else:
                continue
            diff_df[column_name] = diff_df.apply(lambda row: f'({row[cognos_col]} <> {row[looker_col]})'
                                                 if not condition[row.name]
                                                 else 'Same', axis=1)
    selected_keys = keys + [col for col in diff_df.columns if col.startswith('Compare')]
    styled_df = diff_df[selected_keys].style.applymap(highlight_cells, subset=pd.IndexSlice[:, diff_df[selected_keys].columns.str.startswith('Compare_')])
    return styled_df

COGNOS_PATH = r"C:\Users\hoa.nd\Desktop\WAP\compare\20230516_LA_Eagle-X_Account\Copy of 20230516_LA_Eagle-X_Account_cognos_limit5000.xlsx"
LOOKER_PATH = r"C:\Users\hoa.nd\Desktop\WAP\compare\20230516_LA_Eagle-X_Account\Copy of 20230516_LA_Eagle-X_Account-looker_limit5000.xlsx"

folder_path=r'C:\Users\hoa.nd\Desktop\WAP\compare\20230516_LA_Eagle-X_Account\child'

df_cognos = pd.read_excel(COGNOS_PATH, dtype=str)
colum_cognos_drops=['Trunk VVD Service Lane Code']
df_cognos=df_cognos.drop(colum_cognos_drops,axis=1)

df_looker = pd.read_excel(LOOKER_PATH,dtype=str)

colum_looker_drops=['CM applicable','COA Trunk Service Code']

df_looker=df_looker.drop(colum_looker_drops,axis=1)

df_cognos.rename(columns=COGNOS_TO_LOOKER, inplace=True)
df_cognos[IS_DOMINANT_TEXT] = df_cognos[IS_DOMINANT_TEXT].map(yes_no_map)


# ['Sales Month', 'Sales Week', 'Service Scope Group Code',
#        'Service Scope Code', 'Revenue Lane Code', 'Is Dominant (Yes / No)',
#        'POR Location Code', 'Trunk POL Location Code',
#        'Trunk POD Location Code', 'DEL Location Code', 'Contract Number',
#        'Contract Type Name', 'Contract Group Customer Code',
#        'Contract Group Customer Name', 'Total TEU']
# keys_list= ['Contract Group Customer Code', 'Contract Group Customer Name']
# not_keys=  [item for item in list(df_cognos.columns) if item not in keys_list]

not_keys= ['Trunk POL Location Code','Trunk POD Location Code','Total TEU','Contract Type Name']

keys_list = [x for x in list(df_cognos.columns) if x not in not_keys]

# compare = datacompy.Compare(df_cognos, df_looker, join_columns=keys_list, df1_name='Cognos', df2_name='Looker')
# print(compare.report())
df_merged = pd.merge(df_cognos, df_looker, on=keys_list, how='inner', suffixes=('_cognos', '_looker'))
df_merged
# print("Số giá trị trùng lặp trong df_cognos:", df_cognos.duplicated(subset=keys_list).sum())
# print("Số giá trị trùng lặp trong df_looker:", df_looker.duplicated(subset=keys_list).sum())
# df_cognos = df_cognos.drop_duplicates(subset=keys_list)
# df_looker = df_looker.drop_duplicates(subset=keys_list)
# df_merged = pd.merge(df_cognos, df_looker, on=keys_list, how='inner', suffixes=('_cognos', '_looker'), indicator=True)
# print("Số hàng sau khi thực hiện phép nối:", len(df_merged))


# for column in not_keys:
#     mismatch_data = compare.sample_mismatch(column.lower(), compare.all_mismatch().shape[0], for_display=True)
#     file_name=f'{column.replace('/', '')}.xlsx'
#     mismatch_data.to_excel(file_name,index=False)
# #     # Append the mismatch data to the report DataFrame
# #     mismatch_report = pd.concat([mismatch_report, mismatch_data])

# # # Write the report DataFrame to an Excel file
# # report_file_path = 'mismatch_report.xlsx'
# # mismatch_report.to_excel(report_file_path, index=False, engine='openpyxl')

# print(compare.report())





    




Unnamed: 0,Sales Month,Sales Week,Service Scope Group Code,Service Scope Code,Revenue Lane Code,Is Dominant (Yes / No),POR Location Code,Trunk POL Location Code_cognos,Trunk POD Location Code_cognos,DEL Location Code,Contract Number,Contract Type Name_cognos,Contract Group Customer Code,Contract Group Customer Name,Total TEU_cognos,Trunk POL Location Code_looker,Trunk POD Location Code_looker,Contract Type Name_looker,Total TEU_looker
0,2023/M02,2023/W07,EA,IAA,JT1EA,No,THLKR,THLCH,JPTYO,JPTYO,TTYOB00064A,Medium,G-JP107763,NIPPON SUISAN,8,THLCH,JPTYO,Medium,8
1,2023/M01,2023/W01,EA,IAA,JV2EA,No,THLKR,THLCH,JPUKB,JPUKB,TTYOB00064A,Medium,G-JP107763,NIPPON SUISAN,4,THLCH,JPUKB,Medium,4
2,2023/M03,2023/W13,EA,IAA,JT1EA,No,THLKR,THLCH,JPTYO,JPTYO,TTYOB00064A,Medium,G-JP107763,NIPPON SUISAN,6,THLCH,JPTYO,Medium,6
3,2023/M03,2023/W11,EA,IAA,JT1EA,No,THLKR,THLCH,JPTYO,JPTYO,TTYOB00064A,Medium,G-JP107763,NIPPON SUISAN,6,THLCH,JPTYO,Medium,6
4,2022/M11,2022/W44,LE,LEE,SX1LE,No,BRRIG,BRRIG,HKHKG,HKHKG,ITJB00103A,Medium,G-BR100108,AGRA AGROINDUSTRIAL DE ALIMENTOS SA,6,BRRIG,HKHKG,Medium,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3159,2022/M11,2022/W44,LW,LWW,AX3LW,No,MXUMN,MXZLO,JPYOK,KRYNG,TMEXB00496A,Long,G-MX105876,GRUPO PORCICOLA MEXICANO S A C V,10,MXZLO,JPYOK,Long,10
3160,2022/M06,2022/W24,LW,LWW,AX1LW,No,CLVAP,CLVAP,KRPUS,KRPUS,SCLB00911A,Long,G-CL100079,AGROSUPER COMERCIALIZADORA,16,CLVAP,KRPUS,Long,16
3161,2022/M06,2022/W22,LW,LWW,AX1LW,No,CLVAP,CLVAP,CNYTN,CNNSA,SCLB00911A,Long,G-CL100079,AGROSUPER COMERCIALIZADORA,4,CLVAP,CNYTN,Long,4
3162,2022/M06,2022/W23,LW,LWW,AX1LW,No,CLCNL,CLCNL,HKHKG,THLKR,SCLB00911A,Long,G-CL100079,AGROSUPER COMERCIALIZADORA,2,CLCNL,HKHKG,Long,2
