In [1]:
import cx_Oracle
import pandas as pd
import os
import pyarrow.parquet as pq

In [37]:
%%time
# Source database connection
source_conn = cx_Oracle.connect(
    user="apps",
    password="apps",
    dsn="csebsd2db.cswg.com:1521/csebsd2_int"
)
source_cursor = source_conn.cursor()


# Read the query from the file
with open('CM_full_cursor.sql', 'r') as file:
    query = file.read()

source_cursor.execute(query)
df = source_cursor.fetchall()

# Create a DataFrame from the fetched data
columns_info = [(desc[0], desc[1]) for desc in source_cursor.description]
columns_multiindex = pd.MultiIndex.from_tuples(columns_info, names=['ColumnName', 'DataType'])
cust_df = pd.DataFrame(df, columns=columns_multiindex)


# Display the data
print("The DataFrame of GTT:")
print(cust_df)

# Write the DataFrame to a Parquet file
# cust_df.to_parquet('CM_full_cursor.parquet', index=False)
# # [2397 rows x 316 columns]
# # Wall time: 1min 15s

# # Read data from the Parquet file
# df_gtt = pd.read_parquet('CM_full_cursor.parquet')
# print(df_gtt)

The DataFrame of GTT:
ColumnName                     PARTY_SITE_ID  \
DataType   <cx_Oracle.DbType DB_TYPE_NUMBER>   
0                                      25275   
1                                     376902   
2                                     376998   
3                                     376999   
4                                     377000   
...                                      ...   
2392                                 3467192   
2393                                 3470193   
2394                                 3470194   
2395                                 3470195   
2396                                 3470196   

ColumnName                          STORE_NBR  \
DataType   <cx_Oracle.DbType DB_TYPE_VARCHAR>   
0                                       20054   
1                                       25317   
2                                       25401   
3                                       25403   
4                                       25415   
...       

In [17]:
# Accessing column names and data types
column_names = cust_df.columns.get_level_values('ColumnName')
data_types = cust_df.columns.get_level_values('DataType')

# Printing column names and data types
print("Column Names:", list(column_names))
print("Data Types:", list(data_types))


Column Names: ['PARTY_SITE_ID', 'STORE_NBR', 'STORE_NAME', 'STORE_STATUS', 'STORE_BILL_TO_FLAG', 'STORE_SHIP_TO_FLAG', 'ACCT_ID', 'ACCT_NBR', 'ACCT_NAME', 'ACCT_STATUS', 'ACCT_CUST_CLASS_CODE', 'ACCT_CREATION_DATE', 'CHAIN_NBR', 'CHAIN_NAME', 'CHAIN_SHORT_NAME', 'CHAIN_INACTIVE_DATE', 'START_CUST_NBR', 'STRT_CUST_NBR2', 'END_CUST_NBR', 'END_CUST_NBR2', 'PRIVATE_LABEL_OFFSET', 'SUB_AUTH_OFFST', 'AD_START_DAY_OF_WEEK', 'COMPANY_CODE', 'HQ_CUST_NBR', 'DIVISION_NBR', 'DIVISION_NAME', 'EBS_GL_CUST_GROUP_NBR', 'EBS_GL_CUST_GROUP_NAME', 'STORE_GROUP_NBR', 'STORE_GROUP_NAME', 'GROUP_BUY_NBR', 'STORE_CREATION_DATE', 'STORE_INACTIVE_DATE', 'STORE_LAST_UPDATE_DATE', 'LOCATION_ID', 'STORE_ADDRESS1', 'STORE_ADDRESS2', 'STORE_ADDRESS3', 'STORE_ADDRESS4', 'STORE_CITY', 'STORE_POSTAL_CODE', 'STORE_STATE', 'STORE_PROVINCE', 'STORE_COUNTY', 'STORE_COUNTRY', 'STORE_TELEPHONE_NBR', 'CUST_STORE_NBR', 'CUST_ACCT_PROFILE_ID', 'PAY_TERMS', 'PAY_TERM_DESC', 'PAY_TERM_START_DATE', 'PAY_TERM_END_DATE', 'COLLECTO

In [20]:
# Example: Iterate over columns and perform some operation
for column_name, data_type in zip(column_names, data_types):
    # Your logic here
    print(f"Column Name: {column_name}, Data Type: {data_type}")


Column Name: PARTY_SITE_ID, Data Type: <cx_Oracle.DbType DB_TYPE_NUMBER>
Column Name: STORE_NBR, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
Column Name: STORE_NAME, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
Column Name: STORE_STATUS, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
Column Name: STORE_BILL_TO_FLAG, Data Type: <cx_Oracle.DbType DB_TYPE_CHAR>
Column Name: STORE_SHIP_TO_FLAG, Data Type: <cx_Oracle.DbType DB_TYPE_CHAR>
Column Name: ACCT_ID, Data Type: <cx_Oracle.DbType DB_TYPE_NUMBER>
Column Name: ACCT_NBR, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
Column Name: ACCT_NAME, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
Column Name: ACCT_STATUS, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
Column Name: ACCT_CUST_CLASS_CODE, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
Column Name: ACCT_CREATION_DATE, Data Type: <cx_Oracle.DbType DB_TYPE_DATE>
Column Name: CHAIN_NBR, Data Type: <cx_Oracle.DbType DB_TYPE_NUMBER>
Column Name: CHAIN_NAME, Data Type: <cx_Oracle.DbType DB_

In [21]:
# Example: Iterate over columns and perform some operation
for i, (column_name, data_type) in enumerate(zip(column_names, data_types), start=1):
    # Your logic here
    print(f"{i} Column Name: {column_name}, Data Type: {data_type}")


1 Column Name: PARTY_SITE_ID, Data Type: <cx_Oracle.DbType DB_TYPE_NUMBER>
2 Column Name: STORE_NBR, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
3 Column Name: STORE_NAME, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
4 Column Name: STORE_STATUS, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
5 Column Name: STORE_BILL_TO_FLAG, Data Type: <cx_Oracle.DbType DB_TYPE_CHAR>
6 Column Name: STORE_SHIP_TO_FLAG, Data Type: <cx_Oracle.DbType DB_TYPE_CHAR>
7 Column Name: ACCT_ID, Data Type: <cx_Oracle.DbType DB_TYPE_NUMBER>
8 Column Name: ACCT_NBR, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
9 Column Name: ACCT_NAME, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
10 Column Name: ACCT_STATUS, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
11 Column Name: ACCT_CUST_CLASS_CODE, Data Type: <cx_Oracle.DbType DB_TYPE_VARCHAR>
12 Column Name: ACCT_CREATION_DATE, Data Type: <cx_Oracle.DbType DB_TYPE_DATE>
13 Column Name: CHAIN_NBR, Data Type: <cx_Oracle.DbType DB_TYPE_NUMBER>
14 Column Name: CHAIN_NAME,

In [30]:
# Assuming you have a DataFrame named cust_df
column_names_only = cust_df.columns.get_level_values('ColumnName').tolist()

# Display the list of column names
print(len(column_names_only),column_names_only)


316 ['PARTY_SITE_ID', 'STORE_NBR', 'STORE_NAME', 'STORE_STATUS', 'STORE_BILL_TO_FLAG', 'STORE_SHIP_TO_FLAG', 'ACCT_ID', 'ACCT_NBR', 'ACCT_NAME', 'ACCT_STATUS', 'ACCT_CUST_CLASS_CODE', 'ACCT_CREATION_DATE', 'CHAIN_NBR', 'CHAIN_NAME', 'CHAIN_SHORT_NAME', 'CHAIN_INACTIVE_DATE', 'START_CUST_NBR', 'STRT_CUST_NBR2', 'END_CUST_NBR', 'END_CUST_NBR2', 'PRIVATE_LABEL_OFFSET', 'SUB_AUTH_OFFST', 'AD_START_DAY_OF_WEEK', 'COMPANY_CODE', 'HQ_CUST_NBR', 'DIVISION_NBR', 'DIVISION_NAME', 'EBS_GL_CUST_GROUP_NBR', 'EBS_GL_CUST_GROUP_NAME', 'STORE_GROUP_NBR', 'STORE_GROUP_NAME', 'GROUP_BUY_NBR', 'STORE_CREATION_DATE', 'STORE_INACTIVE_DATE', 'STORE_LAST_UPDATE_DATE', 'LOCATION_ID', 'STORE_ADDRESS1', 'STORE_ADDRESS2', 'STORE_ADDRESS3', 'STORE_ADDRESS4', 'STORE_CITY', 'STORE_POSTAL_CODE', 'STORE_STATE', 'STORE_PROVINCE', 'STORE_COUNTY', 'STORE_COUNTRY', 'STORE_TELEPHONE_NBR', 'CUST_STORE_NBR', 'CUST_ACCT_PROFILE_ID', 'PAY_TERMS', 'PAY_TERM_DESC', 'PAY_TERM_START_DATE', 'PAY_TERM_END_DATE', 'COLLECTOR_NAME', '

In [7]:
%%time
# Get all the party_site_ids from the records of cust_df
party_site_ids = cust_df['PARTY_SITE_ID'].tolist()
print('len(party_site_ids) :',len(party_site_ids))
# Define the batch size (e.g., 1000)
batch_size = 1000

# Calculate the number of batches
num_batches = (len(party_site_ids) + batch_size - 1) // batch_size
print(num_batches)
# Assuming you have a database connection and a cursor named 'source_cursor'
# Adjust the SQL query based on your database and table structure
with open('SelectPSCM.sql', 'r') as file:
    query_template = file.read()
query_template = f"SELECT  FROM xxfin.xxar_customer_master WHERE party_site_id IN ({})"

# Create an empty list to store DataFrames from each batch
all_dfs = []

# Iterate through batches
for i in range(num_batches):
    # Get the start and end indices for the current batch
    start_idx = i * batch_size
    end_idx = (i + 1) * batch_size

    # Extract the party_site_ids for the current batch
    current_batch = party_site_ids[start_idx:end_idx]

    # Generate the SQL query for the current batch
    query = query_template.format(','.join(map(str, current_batch)))

    # Execute the query and fetch the results
    source_cursor.execute(query)
    results = source_cursor.fetchall()

    # Create a DataFrame from the results
    columns = [desc[0] for desc in source_cursor.description]
    current_df = pd.DataFrame(results, columns=columns)

    # Append the current DataFrame to the list
    all_dfs.append(current_df)

# Concatenate all DataFrames into a single DataFrame
final_df = pd.concat(all_dfs, ignore_index=True)

# Display the final DataFrame
print(final_df)

# Get the number of rows in the final DataFrame
print(f"Number of rows in the final DataFrame: {final_df.shape[0]}")

len(party_site_ids) : 2397
3
      PARTY_SITE_ID  STORE_NBR                   STORE_NAME STORE_STATUS  \
0             25275      20054    BLACKIE'S FARM FRESH PROD       Active   
1            376902      25317           FAIRWAY E-COMMERCE       Active   
2            376998      25401       IMPERIAL BAGS PAPER CO       Active   
3            376999      25403       N & P FOOD SERVICES IN       Active   
4            377000      25415       PLAINFIELD TOB & CANDY       Active   
...             ...        ...                          ...          ...   
2387        2678359    9854083                FRED'S MARKET       Active   
2388        2859183      30115  CEDAR KNOLLS FARMERS MARKET       Active   
2389        3040128      30124                       C-TOWN       Active   
2390        3048123      30125                TUSCANY FOODS       Active   
2391        3460192  999999994               TEST 999999994       Active   

     STORE_BILL_TO_FLAG STORE_SHIP_TO_FLAG  ACCT_ID   ACCT

In [11]:
# Identify party_site_ids not in the final DataFrame

missing_party_site_ids = set(party_site_ids) - set(final_df['PARTY_SITE_ID'])
missing_party_site_ids_len = len(set(party_site_ids)) - len(set(final_df['PARTY_SITE_ID']))
print('Missing party_site_ids:', missing_party_site_ids)
print('missing_party_site_ids_len',missing_party_site_ids_len)
print(2397-2392)

Missing party_site_ids: {3470193, 3470194, 3470195, 3470196, 3467192}
missing_party_site_ids_len 5
5


In [15]:
import pandas as pd

# Your comma-separated values as a string
csv_values = "PARTY_SITE_ID, STORE_NBR, STORE_NAME, STORE_STATUS, STORE_BILL_TO_FLAG, STORE_SHIP_TO_FLAG, ACCT_ID, ACCT_NBR, ACCT_NAME, ACCT_STATUS, ACCT_CUST_CLASS_CODE, ACCT_CREATION_DATE, CHAIN_NBR, CHAIN_NAME, CHAIN_SHORT_NAME, CHAIN_INACTIVE_DATE, START_CUST_NBR, STRT_CUST_NBR2, END_CUST_NBR, END_CUST_NBR2, PRIVATE_LABEL_OFFSET, SUB_AUTH_OFFST, AD_START_DAY_OF_WEEK, COMPANY_CODE, HQ_CUST_NBR, DIVISION_NBR, DIVISION_NAME, EBS_GL_CUST_GROUP_NBR, EBS_GL_CUST_GROUP_NAME, STORE_GROUP_NBR, STORE_GROUP_NAME, GROUP_BUY_NBR, STORE_CREATION_DATE, STORE_INACTIVE_DATE, STORE_LAST_UPDATE_DATE, LOCATION_ID, STORE_ADDRESS1, STORE_ADDRESS2, STORE_ADDRESS3, STORE_ADDRESS4, STORE_CITY, STORE_POSTAL_CODE, STORE_STATE, STORE_PROVINCE, STORE_COUNTY, STORE_COUNTRY, STORE_TELEPHONE_NBR, CUST_STORE_NBR, CUST_ACCT_PROFILE_ID, PAY_TERMS, PAY_TERM_DESC, PAY_TERM_START_DATE, PAY_TERM_END_DATE, COLLECTOR_NAME, STATEMENT_CYCLE_NAME, ALT_CUST_NBR, BASE_CUST_NBR, CUST_STATEMENT_FORMAT, PACA_FLAG, TOBACCO_BILLABLE_FLAG, TOBACCO_REBATE, ZONE_NBR, MILITARY_DODAAC, SALES_REPRESENTATIVE, BILL_TO_STORE_NBR, BILL_TO_STORE_NAME, REQUEST_ID, CREATION_DATE, CREATED_BY, LAST_UPDATE_DATE, LAST_UPDATED_BY, LAST_UPDATE_LOGIN, PSUEDOEPHEDRINE_FLAG, CALL_FOR_CUSTOMER, CIGARETTE_LICENSE, SMALL_PACK, CIGARETTE_VALIDATION, AWI_CDH_CUSTOMER_CLASS, GAS_REWARDS, MILES, PRINT_STMTS, AWI_CDH_RACK_SERVICE, RETAIL_TYPE, TAX_EXEMPT, ADV_FLAG_7, ADV_FLAG6, BB_AD_PLAN, AD_COMMENTS, ADV_FLAG5, CONFO_AD_PLAN, DEFAULT_TRIP, CATALOG10, AWI_PRINT_BULLETIN, AWI_SIGN_KIT, SURVEYS, ADV_FLAG8, AWI_AD_OFFSET, AGNE_CUSTOMER, CONFO, HBA_GM_FLAG, HBA_GM_DISC_OVERIDE, HBA_GM_DISC, ITEM_ALLOWANCE, COD, SERV_CHARGE, SINGLE_PICK, AWI_CUSTOMER_TYPE, MULTI_STORE_NUMBER, EIGHT_WEEK_BB, BB_MOD_PROGRAM, KPC_END, KPC_RETAIL_ZONE, KPC_START, AWI_PRODUCT, SPECIALTY_FOODS, D_COST_PLUS_BRACKET_FLAG, D_COST_PLUS_BRACKET_NUMBER, D_DEPARTMENT, D_MERCHANDISER, D_PRICING_FLAG, D_SALESMAN, D_SKID_ROUNDING_FLAG, SY_COST_PLUS_BRACKET_FLAG, SY_COST_PLUS_BRACKET_NUMBER, SY_DEPARTMENT, SY_MERCHANDISER, SY_PRICING_FLAG, SY_SALESMAN, SY_SKID_ROUNDING_FLAG, M_COST_PLUS_BRACKET_FLAG, M_COST_PLUS_BRACKET_NUMBER, M_DEPARTMENT, M_MERCHANDISER, M_PRICING_FLAG, M_SALESMAN, M_SKID_ROUNDING_FLAG, P_COST_PLUS_BRACKET_FLAG, P_COST_PLUS_BRACKET_NUMBER, P_DEPARTMENT, P_MERCHANDISER, P_PRICING_FLAG, P_SALESMAN, P_SKID_ROUNDING_FLAG, GM_COST_PLUS_BRACKET_FLAG, GM_COST_PLUS_BRACKET_NUMBER, GM_DEPARTMENT, GM_MERCHANDISER, GM_PRICING_FLAG, GM_SALESMAN, GM_SKID_ROUNDING_FLAG, C_COST_PLUS_BRACKET_FLAG, C_COST_PLUS_BRACKET_NUMBER, C_DEPARTMENT, C_MERCHANDISER, C_PRICING_FLAG, C_SALESMAN, C_SKID_ROUNDING_FLAG, F_COST_PLUS_BRACKET_FLAG, F_COST_PLUS_BRACKET_NUMBER, F_DEPARTMENT, F_MERCHANDISER, F_PRICING_FLAG, F_SALESMAN, F_SKID_ROUNDING_FLAG, RG_COST_PLUS_BRACKET_FLAG, RG_COST_PLUS_BRACKET_NUMBER, RG_DEPARTMENT, RG_MERCHANDISER, RG_PRICING_FLAG, RG_SALESMAN, RG_SKID_ROUNDING_FLAG, DSD_VENDOR, PRIMARY_CONTACT_NAME, PRIMARY_CONTACT_PHONE_NBR, PRIMARY_CONTACT_EMAIL_ADDR, CUSTOMER_GROUP, STORE_GROUP, FINANCE_CHARGES, INTEREST_RATE, PAYMENT_GRACE_DAYS, INTEREST_PERIOD_DAYS, STATEMENT_ADDRESS1, STATEMENT_CITY, STATEMENT_STATE, STATEMENT_ZIP, FINANCE_CHARGE_EFFECTIVE_DATE, SUMMARY_STMT_FORMAT, DELINQUENT_START_DATE, AR_SEGMENT_GROUP, AR_GROUP_NAME, IMPORTED_SOURCE, REMITTANCE_ADDR, STATEMENT_TOLRNCE, INVOICE_TOLRNCE, SALES_REP, RISK_TIER, GS_RETAIL_ACCTNG_REPORT, STATEMENT_LEVEL, RELATED_CUST_LOCKBOX_APPLLY, SEND_ACH_NOTIFTN, STOCK_NAME, MULTI_STORE, EFT, PICKUP_DAY, PAY_DUE_DAY, CREDIT_HOLD, PAYMENT_TERMS, CREDIT_GROUP, BLANK_CHECK_CUSTOMER, AD_PLAN_GROUP, AWI_PARTY_SITE_ID, AWI_MULTI_STORE_NBR, PATRONAGE_NBR, PRINT_STATEMENTS, STATEMENT_INSTRUCTIONS, SEND_TO_AWI_CDH_FLG, CM_BILLING_CYCLE, CM_TERMS, CM_NP_XFER_FLAG, CM_PRINT_STMT, CM_RET_ACCT_STMT, CM_STMT_COPIES, CM_GROUP_CODE, CM_PAYMENT_METHOD, CM_STMT_LABELS, CM_PAYMENT_DUE_DAYS, CM_SUB_STORE, CM_CUST_XREF_NO, STMT_ADJ_GROUP_CODE, CUST_CODE_XREF, SITE_PICK_SLIP_REQ, SITE_PICK_SLIP_REQ_DISP, SITE_RANK, ACC_SUBS, REJ_REASON, MAX_VOL_PALL, ITEM_PAL_ROUND, LOG_ORD_QTY, MIN_ORD_QTY, ENFORCE_DEPT, MAX_ORD_QTY, PO_REQ, PO_REQ_DISP, RET_PRC_REQ, RET_PRC_REQ_DISP, CUST_XREF, CUST_XREF_DISP, LEGACY_CUST_NBR, LEGACY_COMPANY, SHIP_TO_SITE_USE_ID, BILL_TO_SITE_USE_ID, HOSTING_DEPT, RETAILER_DEPT, CONTACT_INFO, STORE_MANAGER, STORE_OWNER, STORE_TYPE, ACCEPT_SUBS, INT_PO_REQ, LAST_BILL_DATE, DIVISION_ID, REGION, DELEGATE_AUTRIZTN_FLG, AISLE_LIMIT, ACCT_TYPE, OLD_STORE_NBR, GS_DASH_0, GS_DASH_0_DESCRIPTION, GS_DASH_1, GS_DASH_1_DESCRIPTION, GS_DASH_2, GS_DASH_2_DESCRIPTION, GS_DASH_3, GS_DASH_3_DESCRIPTION, GS_DASH_4, GS_DASH_4_DESCRIPTION, GS_DASH_5, GS_DASH_5_DESCRIPTION, GS_DASH_6, GS_DASH_6_DESCRIPTION, GS_DASH_7, GS_DASH_7_DESCRIPTION, GS_DASH_8, GS_DASH_8_DESCRIPTION, GS_DASH_9, GS_DASH_9_DESCRIPTION, PLATINUM_DEPT, EXCLUDE_PDF_BKP, GS_ADV_CRD_GROC_RPK_CAYU_PCT, GS_ADV_CRD_MEAT_BULK_PCT, GS_ADV_CRD_CORDER_REPACK_PCT, GS_ADV_CRD_FROZEN_PCT, GS_ADV_CRD_BAKERY_PCT, GS_ADV_CRD_REFRIG_PCT, GS_ADV_CRD_ICREAM_PCT, GS_SALESMAN_NBR, GS_SALESMAN_NAME, GS_CM_DROP_SHIP_FLAG, GS_CM_PAYMENT_METHOD, GS_SELECTION_SERVICE_FEE_FLAG, GS_FRZ_RBT_FLAG, GS_STMT_FROZ_ADJ_FACTOR, GS_STMT_MEAT_ADJ_FACTOR, GS_STMT_GROC_ADJ_FACTOR, GS_NOTE_PAYOFF_CREDIT_RATE, GS_SALES_ADJ_REBATE_RATE, GS_TRANSFER_BAL_DASH_NBR, GS_CUST_CROSS_REF_DASH_NBR, GS_REBATE_CHECK, GS_GROUP_CODE_DESC, GS_PORTAL_FOLDER_NBR, STORE_FAX_NBR, GS_AUD_GROUP, CREDIT_LIMIT, GS_AD_GROUP, GS_TPR_GROUP, GS_GBY_GROUP, GS_PRICING_ZONE, GS_MAP_CD, GS_MAP_CD_LTR, GS_VENDOR_PACK_IND, GS_SUPER_SERVE, GS_NABISCO, GS_WILL_CALL, SWELL, HQ_FLAG, GS_PRIMARY_SHIP_FLAG, GS_FUEL_ZONE, GS_FUEL_ZONE_DESC, GS_AD_GROUP_DESC, GS_PRICING_ZONE_DESC"  # Replace ... with the remaining values

# Split the string into a list of values
values_list = csv_values.split(', ')

# Create a DataFrame with one column containing the values
df = pd.DataFrame({'Columns in CM': values_list})

# Export DataFrame to Excel
df.to_excel('output_file.xlsx', index=False)


In [23]:
pip install openpyxl


Note: you may need to restart the kernel to use updated packages.


DEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [26]:
# import pandas as pd
from openpyxl import load_workbook

# Load the existing Excel file
existing_file_path = 'output_file.xlsx'
book = load_workbook(existing_file_path)
writer = pd.ExcelWriter(existing_file_path, engine='openpyxl') 
writer.book = book

# Your comma-separated values as a string
# csv_values = "PARTY_SITE_ID, STORE_NBR, STORE_NAME, ..."  # Replace ... with the remaining values

# Split the string into a list of values
# values_list = csv_values.split(', ')

# Create a DataFrame with one column containing the values
df = pd.DataFrame({'Columns in df': column_names_only})

# Write the DataFrame to the Excel file in the second column
df.to_excel(writer, sheet_name='Sheet1', startcol=1, index=False, header=False)

# Save the changes
writer.save()
writer.close()


In [28]:
import pandas as pd
from openpyxl import load_workbook

# Load the existing Excel file
existing_file_path = 'output_file.xlsx'
book = load_workbook(existing_file_path)


# Create a DataFrame with one column containing the values
df = pd.DataFrame({'Columns in df': column_names_only})

# Open the existing sheet in the workbook
writer = pd.ExcelWriter(existing_file_path, engine='openpyxl') 
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

# Write the DataFrame to the Excel file starting from the second column
start_col = writer.sheets['Sheet1'].max_column + 1 if 'Sheet1' in writer.sheets else 1
df.to_excel(writer, sheet_name='Sheet1', startcol=start_col, index=False, header=False)

# Save the changes
writer.save()
writer.close()


In [32]:

from openpyxl import load_workbook

# Load the existing Excel file
existing_file_path = 'output_file.xlsx'
book = load_workbook(existing_file_path)

# Create a DataFrame with one column containing the values
df = pd.DataFrame({'Columns from df': column_names_only})

# Open the existing sheet in the workbook
writer = pd.ExcelWriter(existing_file_path, engine='openpyxl') 
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

# Write the DataFrame to the Excel file starting from the second column
start_col = writer.sheets['Sheet1'].max_column + 1 if 'Sheet1' in writer.sheets else 1
df.to_excel(writer, sheet_name='Sheet1', startcol=start_col, index=False, header=False)

# Save the changes
writer.save()
writer.close()


In [34]:
import pandas as pd
from openpyxl import load_workbook

# Load the existing Excel file
existing_file_path = 'output_file.xlsx'
book = load_workbook(existing_file_path)

# Your list of column names from cust_df
# column_names_only = ["PARTY_SITE_ID", "STORE_NBR", "STORE_NAME", ...]  # Replace ... with the remaining values

# Create a DataFrame with one column containing the values
df = pd.DataFrame({'Columns from df': column_names_only})

# Open the existing sheet in the workbook
writer = pd.ExcelWriter(existing_file_path, engine='openpyxl') 
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

# Check if 'Sheet1' exists in the workbook
if 'Sheet1' in writer.sheets:
    sheet = writer.sheets['Sheet1']

    # Write the header 'Columns from df' in the first row of the second column
    sheet.cell(row=1, column=sheet.max_column + 1, value='Columns from df')

    # Iterate through each column name from the DataFrame
    for i, column_name in enumerate(column_names_only, start=2):
        # Check if the column name is already in the first column
        if column_name in sheet['A']:
            # If found, find the row number and write the column name in the adjacent column
            row_number = sheet['A'].index(column_name) + 1
            sheet.cell(row=row_number, column=sheet.max_column, value=column_name)
        else:
            # If not found, write the column name in the last row of the first column
            sheet.cell(row=sheet.max_row + 1, column=1, value=column_name)

# Save the changes
writer.save()
writer.close()


In [35]:
import pandas as pd
from openpyxl import load_workbook

# Load the existing Excel file
existing_file_path = 'output_file.xlsx'
book = load_workbook(existing_file_path)

# Your list of column names from cust_df
# column_names_only = ["PARTY_SITE_ID", "STORE_NBR", "STORE_NAME", ...]  # Replace ... with the remaining values

# Open the existing sheet in the workbook
writer = pd.ExcelWriter(existing_file_path, engine='openpyxl') 
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

# Check if 'Sheet1' exists in the workbook
if 'Sheet1' in writer.sheets:
    sheet = writer.sheets['Sheet1']

    # Write the header 'Columns from df' in the first row of the second column
    sheet.cell(row=1, column=sheet.max_column + 1, value='Columns from df')

    # Iterate through each column name from the DataFrame
    for i, column_name in enumerate(column_names_only, start=2):
        # Write the column name in the second column
        sheet.cell(row=i, column=sheet.max_column, value=column_name)

# Save the changes
writer.save()
writer.close()


In [45]:
# Given string of column names
Audit_columns = "PARTY_SITE_ID, STORE_NBR, STORE_NAME, STORE_STATUS, STORE_BILL_TO_FLAG, STORE_SHIP_TO_FLAG, ACCT_ID, ACCT_NBR, ACCT_NAME, ACCT_STATUS, ACCT_CUST_CLASS_CODE, ACCT_CREATION_DATE, CHAIN_NBR, CHAIN_NAME, CHAIN_SHORT_NAME, CHAIN_INACTIVE_DATE, START_CUST_NBR, STRT_CUST_NBR2, END_CUST_NBR, END_CUST_NBR2, PRIVATE_LABEL_OFFSET, SUB_AUTH_OFFST, AD_START_DAY_OF_WEEK, COMPANY_CODE, HQ_CUST_NBR, DIVISION_NBR, DIVISION_NAME, EBS_GL_CUST_GROUP_NBR, EBS_GL_CUST_GROUP_NAME, STORE_GROUP_NBR, STORE_GROUP_NAME, GROUP_BUY_NBR, STORE_CREATION_DATE, STORE_INACTIVE_DATE, STORE_LAST_UPDATE_DATE, LOCATION_ID, STORE_ADDRESS1, STORE_ADDRESS2, STORE_ADDRESS3, STORE_ADDRESS4, STORE_CITY, STORE_POSTAL_CODE, STORE_STATE, STORE_PROVINCE, STORE_COUNTY, STORE_COUNTRY, STORE_TELEPHONE_NBR, CUST_STORE_NBR, CUST_ACCT_PROFILE_ID, PAY_TERMS, PAY_TERM_DESC, PAY_TERM_START_DATE, PAY_TERM_END_DATE, COLLECTOR_NAME, STATEMENT_CYCLE_NAME, ALT_CUST_NBR, BASE_CUST_NBR, CUST_STATEMENT_FORMAT, PACA_FLAG, TOBACCO_BILLABLE_FLAG, TOBACCO_REBATE, ZONE_NBR, MILITARY_DODAAC, SALES_REPRESENTATIVE, REQUEST_ID, CREATION_DATE, CREATED_BY, LAST_UPDATE_DATE, LAST_UPDATED_BY, LAST_UPDATE_LOGIN, BILL_TO_STORE_NBR, BILL_TO_STORE_NAME, PSUEDOEPHEDRINE_FLAG, CALL_FOR_CUSTOMER, CIGARETTE_LICENSE, SMALL_PACK, CIGARETTE_VALIDATION, AWI_CDH_CUSTOMER_CLASS, GAS_REWARDS, MILES, PRINT_STMTS, AWI_CDH_RACK_SERVICE, RETAIL_TYPE, TAX_EXEMPT, ADV_FLAG_7, ADV_FLAG6, BB_AD_PLAN, AD_COMMENTS, ADV_FLAG5, CONFO_AD_PLAN, DEFAULT_TRIP, CATALOG10, AWI_PRINT_BULLETIN, AWI_SIGN_KIT, SURVEYS, ADV_FLAG8, AWI_AD_OFFSET, AGNE_CUSTOMER, CONFO, HBA_GM_FLAG, HBA_GM_DISC_OVERIDE, HBA_GM_DISC, ITEM_ALLOWANCE, COD, SERV_CHARGE, SINGLE_PICK, AWI_CUSTOMER_TYPE, MULTI_STORE_NUMBER, EIGHT_WEEK_BB, BB_MOD_PROGRAM, KPC_END, KPC_RETAIL_ZONE, KPC_START, AWI_PRODUCT, SPECIALTY_FOODS, D_COST_PLUS_BRACKET_FLAG, D_COST_PLUS_BRACKET_NUMBER, D_DEPARTMENT, D_MERCHANDISER, D_PRICING_FLAG, D_SALESMAN, D_SKID_ROUNDING_FLAG, SY_COST_PLUS_BRACKET_FLAG, SY_COST_PLUS_BRACKET_NUMBER, SY_DEPARTMENT, SY_MERCHANDISER, SY_PRICING_FLAG, SY_SALESMAN, SY_SKID_ROUNDING_FLAG, M_COST_PLUS_BRACKET_FLAG, M_COST_PLUS_BRACKET_NUMBER, M_DEPARTMENT, M_MERCHANDISER, M_PRICING_FLAG, M_SALESMAN, M_SKID_ROUNDING_FLAG, P_COST_PLUS_BRACKET_FLAG, P_COST_PLUS_BRACKET_NUMBER, P_DEPARTMENT, P_MERCHANDISER, P_PRICING_FLAG, P_SALESMAN, P_SKID_ROUNDING_FLAG, GM_COST_PLUS_BRACKET_FLAG, GM_COST_PLUS_BRACKET_NUMBER, GM_DEPARTMENT, GM_MERCHANDISER, GM_PRICING_FLAG, GM_SALESMAN, GM_SKID_ROUNDING_FLAG, C_COST_PLUS_BRACKET_FLAG, C_COST_PLUS_BRACKET_NUMBER, C_DEPARTMENT, C_MERCHANDISER, C_PRICING_FLAG, C_SALESMAN, C_SKID_ROUNDING_FLAG, F_COST_PLUS_BRACKET_FLAG, F_COST_PLUS_BRACKET_NUMBER, F_DEPARTMENT, F_MERCHANDISER, F_PRICING_FLAG, F_SALESMAN, F_SKID_ROUNDING_FLAG, RG_COST_PLUS_BRACKET_FLAG, RG_COST_PLUS_BRACKET_NUMBER, RG_DEPARTMENT, RG_MERCHANDISER, RG_PRICING_FLAG, RG_SALESMAN, RG_SKID_ROUNDING_FLAG, DSD_VENDOR, PRIMARY_CONTACT_NAME, PRIMARY_CONTACT_PHONE_NBR, PRIMARY_CONTACT_EMAIL_ADDR, CUSTOMER_GROUP, STORE_GROUP, FINANCE_CHARGES, INTEREST_RATE, PAYMENT_GRACE_DAYS, INTEREST_PERIOD_DAYS, STATEMENT_ADDRESS1, STATEMENT_CITY, STATEMENT_STATE, STATEMENT_ZIP, FINANCE_CHARGE_EFFECTIVE_DATE, SUMMARY_STMT_FORMAT, DELINQUENT_START_DATE, AR_SEGMENT_GROUP, AR_GROUP_NAME, IMPORTED_SOURCE, REMITTANCE_ADDR, STATEMENT_TOLRNCE, INVOICE_TOLRNCE, SALES_REP, RISK_TIER, GS_RETAIL_ACCTNG_REPORT, STATEMENT_LEVEL, RELATED_CUST_LOCKBOX_APPLLY, SEND_ACH_NOTIFTN, STOCK_NAME, MULTI_STORE, EFT, PICKUP_DAY, PAY_DUE_DAY, CREDIT_HOLD, PAYMENT_TERMS, CREDIT_GROUP, BLANK_CHECK_CUSTOMER, AD_PLAN_GROUP, AWI_PARTY_SITE_ID, AWI_MULTI_STORE_NBR, PATRONAGE_NBR, PRINT_STATEMENTS, STATEMENT_INSTRUCTIONS, SEND_TO_AWI_CDH_FLG, CM_BILLING_CYCLE, CM_TERMS, CM_NP_XFER_FLAG, CM_PRINT_STMT, CM_RET_ACCT_STMT, CM_STMT_COPIES, CM_GROUP_CODE, CM_PAYMENT_METHOD, CM_STMT_LABELS, CM_PAYMENT_DUE_DAYS, CM_SUB_STORE, CM_CUST_XREF_NO, STMT_ADJ_GROUP_CODE, CUST_CODE_XREF, SITE_PICK_SLIP_REQ, SITE_PICK_SLIP_REQ_DISP, SITE_RANK, ACC_SUBS, REJ_REASON, MAX_VOL_PALL, ITEM_PAL_ROUND, LOG_ORD_QTY, MIN_ORD_QTY, ENFORCE_DEPT, MAX_ORD_QTY, PO_REQ, PO_REQ_DISP, RET_PRC_REQ, RET_PRC_REQ_DISP, CUST_XREF, CUST_XREF_DISP, LEGACY_CUST_NBR, LEGACY_COMPANY, SHIP_TO_SITE_USE_ID, BILL_TO_SITE_USE_ID, HOSTING_DEPT, RETAILER_DEPT, CONTACT_INFO, STORE_MANAGER, STORE_OWNER, STORE_TYPE, ACCEPT_SUBS, INT_PO_REQ, LAST_BILL_DATE, DIVISION_ID, REGION, DELEGATE_AUTRIZTN_FLG, AISLE_LIMIT, ACCT_TYPE, OLD_STORE_NBR, GS_DASH_0, GS_DASH_0_DESCRIPTION, GS_DASH_1, GS_DASH_1_DESCRIPTION, GS_DASH_2, GS_DASH_2_DESCRIPTION, GS_DASH_3, GS_DASH_3_DESCRIPTION, GS_DASH_4, GS_DASH_4_DESCRIPTION, GS_DASH_5, GS_DASH_5_DESCRIPTION, GS_DASH_6, GS_DASH_6_DESCRIPTION, GS_DASH_7, GS_DASH_7_DESCRIPTION, GS_DASH_8, GS_DASH_8_DESCRIPTION, GS_DASH_9, GS_DASH_9_DESCRIPTION, PLATINUM_DEPT, EXCLUDE_PDF_BKP, GS_ADV_CRD_GROC_RPK_CAYU_PCT, GS_ADV_CRD_MEAT_BULK_PCT, GS_ADV_CRD_CORDER_REPACK_PCT, GS_ADV_CRD_FROZEN_PCT, GS_ADV_CRD_BAKERY_PCT, GS_ADV_CRD_REFRIG_PCT, GS_ADV_CRD_ICREAM_PCT, GS_SALESMAN_NBR, GS_SALESMAN_NAME, GS_CM_DROP_SHIP_FLAG, GS_CM_PAYMENT_METHOD, GS_SELECTION_SERVICE_FEE_FLAG, GS_FRZ_RBT_FLAG, GS_STMT_FROZ_ADJ_FACTOR, GS_STMT_MEAT_ADJ_FACTOR, GS_STMT_GROC_ADJ_FACTOR, GS_NOTE_PAYOFF_CREDIT_RATE, GS_SALES_ADJ_REBATE_RATE, GS_TRANSFER_BAL_DASH_NBR, GS_CUST_CROSS_REF_DASH_NBR, GS_REBATE_CHECK, GS_GROUP_CODE_DESC, GS_PORTAL_FOLDER_NBR, STORE_FAX_NBR, GS_AUD_GROUP, CREDIT_LIMIT, GS_AD_GROUP, GS_TPR_GROUP, GS_GBY_GROUP, GS_PRICING_ZONE, GS_MAP_CD, GS_MAP_CD_LTR, GS_VENDOR_PACK_IND, GS_SUPER_SERVE, GS_NABISCO, GS_WILL_CALL, SWELL, HQ_FLAG, GS_PRIMARY_SHIP_FLAG, GS_FUEL_ZONE, GS_FUEL_ZONE_DESC, GS_AD_GROUP_DESC, GS_PRICING_ZONE_DESC"
# Split the string into a list of column names
Audit_columns_list = Audit_columns.split(', ')

# Display the list of column names
print(len(Audit_columns_list),Audit_columns_list)


331 ['PARTY_SITE_ID', 'STORE_NBR', 'STORE_NAME', 'STORE_STATUS', 'STORE_BILL_TO_FLAG', 'STORE_SHIP_TO_FLAG', 'ACCT_ID', 'ACCT_NBR', 'ACCT_NAME', 'ACCT_STATUS', 'ACCT_CUST_CLASS_CODE', 'ACCT_CREATION_DATE', 'CHAIN_NBR', 'CHAIN_NAME', 'CHAIN_SHORT_NAME', 'CHAIN_INACTIVE_DATE', 'START_CUST_NBR', 'STRT_CUST_NBR2', 'END_CUST_NBR', 'END_CUST_NBR2', 'PRIVATE_LABEL_OFFSET', 'SUB_AUTH_OFFST', 'AD_START_DAY_OF_WEEK', 'COMPANY_CODE', 'HQ_CUST_NBR', 'DIVISION_NBR', 'DIVISION_NAME', 'EBS_GL_CUST_GROUP_NBR', 'EBS_GL_CUST_GROUP_NAME', 'STORE_GROUP_NBR', 'STORE_GROUP_NAME', 'GROUP_BUY_NBR', 'STORE_CREATION_DATE', 'STORE_INACTIVE_DATE', 'STORE_LAST_UPDATE_DATE', 'LOCATION_ID', 'STORE_ADDRESS1', 'STORE_ADDRESS2', 'STORE_ADDRESS3', 'STORE_ADDRESS4', 'STORE_CITY', 'STORE_POSTAL_CODE', 'STORE_STATE', 'STORE_PROVINCE', 'STORE_COUNTY', 'STORE_COUNTRY', 'STORE_TELEPHONE_NBR', 'CUST_STORE_NBR', 'CUST_ACCT_PROFILE_ID', 'PAY_TERMS', 'PAY_TERM_DESC', 'PAY_TERM_START_DATE', 'PAY_TERM_END_DATE', 'COLLECTOR_NAME', '

In [46]:
Customer_master_columns = "PARTY_SITE_ID, STORE_NBR, STORE_NAME, STORE_STATUS, STORE_BILL_TO_FLAG, STORE_SHIP_TO_FLAG, ACCT_ID, ACCT_NBR, ACCT_NAME, ACCT_STATUS, ACCT_CUST_CLASS_CODE, ACCT_CREATION_DATE, CHAIN_NBR, CHAIN_NAME, CHAIN_SHORT_NAME, CHAIN_INACTIVE_DATE, START_CUST_NBR, STRT_CUST_NBR2, END_CUST_NBR, END_CUST_NBR2, PRIVATE_LABEL_OFFSET, SUB_AUTH_OFFST, AD_START_DAY_OF_WEEK, COMPANY_CODE, HQ_CUST_NBR, DIVISION_NBR, DIVISION_NAME, EBS_GL_CUST_GROUP_NBR, EBS_GL_CUST_GROUP_NAME, STORE_GROUP_NBR, STORE_GROUP_NAME, GROUP_BUY_NBR, STORE_CREATION_DATE, STORE_INACTIVE_DATE, STORE_LAST_UPDATE_DATE, LOCATION_ID, STORE_ADDRESS1, STORE_ADDRESS2, STORE_ADDRESS3, STORE_ADDRESS4, STORE_CITY, STORE_POSTAL_CODE, STORE_STATE, STORE_PROVINCE, STORE_COUNTY, STORE_COUNTRY, STORE_TELEPHONE_NBR, CUST_STORE_NBR, CUST_ACCT_PROFILE_ID, PAY_TERMS, PAY_TERM_DESC, PAY_TERM_START_DATE, PAY_TERM_END_DATE, COLLECTOR_NAME, STATEMENT_CYCLE_NAME, ALT_CUST_NBR, BASE_CUST_NBR, CUST_STATEMENT_FORMAT, PACA_FLAG, TOBACCO_BILLABLE_FLAG, TOBACCO_REBATE, ZONE_NBR, MILITARY_DODAAC, SALES_REPRESENTATIVE, BILL_TO_STORE_NBR, BILL_TO_STORE_NAME, REQUEST_ID, CREATION_DATE, CREATED_BY, LAST_UPDATE_DATE, LAST_UPDATED_BY, LAST_UPDATE_LOGIN, PSUEDOEPHEDRINE_FLAG, CALL_FOR_CUSTOMER, CIGARETTE_LICENSE, SMALL_PACK, CIGARETTE_VALIDATION, AWI_CDH_CUSTOMER_CLASS, GAS_REWARDS, MILES, PRINT_STMTS, AWI_CDH_RACK_SERVICE, RETAIL_TYPE, TAX_EXEMPT, ADV_FLAG_7, ADV_FLAG6, BB_AD_PLAN, AD_COMMENTS, ADV_FLAG5, CONFO_AD_PLAN, DEFAULT_TRIP, CATALOG10, AWI_PRINT_BULLETIN, AWI_SIGN_KIT, SURVEYS, ADV_FLAG8, AWI_AD_OFFSET, AGNE_CUSTOMER, CONFO, HBA_GM_FLAG, HBA_GM_DISC_OVERIDE, HBA_GM_DISC, ITEM_ALLOWANCE, COD, SERV_CHARGE, SINGLE_PICK, AWI_CUSTOMER_TYPE, MULTI_STORE_NUMBER, EIGHT_WEEK_BB, BB_MOD_PROGRAM, KPC_END, KPC_RETAIL_ZONE, KPC_START, AWI_PRODUCT, SPECIALTY_FOODS, D_COST_PLUS_BRACKET_FLAG, D_COST_PLUS_BRACKET_NUMBER, D_DEPARTMENT, D_MERCHANDISER, D_PRICING_FLAG, D_SALESMAN, D_SKID_ROUNDING_FLAG, SY_COST_PLUS_BRACKET_FLAG, SY_COST_PLUS_BRACKET_NUMBER, SY_DEPARTMENT, SY_MERCHANDISER, SY_PRICING_FLAG, SY_SALESMAN, SY_SKID_ROUNDING_FLAG, M_COST_PLUS_BRACKET_FLAG, M_COST_PLUS_BRACKET_NUMBER, M_DEPARTMENT, M_MERCHANDISER, M_PRICING_FLAG, M_SALESMAN, M_SKID_ROUNDING_FLAG, P_COST_PLUS_BRACKET_FLAG, P_COST_PLUS_BRACKET_NUMBER, P_DEPARTMENT, P_MERCHANDISER, P_PRICING_FLAG, P_SALESMAN, P_SKID_ROUNDING_FLAG, GM_COST_PLUS_BRACKET_FLAG, GM_COST_PLUS_BRACKET_NUMBER, GM_DEPARTMENT, GM_MERCHANDISER, GM_PRICING_FLAG, GM_SALESMAN, GM_SKID_ROUNDING_FLAG, C_COST_PLUS_BRACKET_FLAG, C_COST_PLUS_BRACKET_NUMBER, C_DEPARTMENT, C_MERCHANDISER, C_PRICING_FLAG, C_SALESMAN, C_SKID_ROUNDING_FLAG, F_COST_PLUS_BRACKET_FLAG, F_COST_PLUS_BRACKET_NUMBER, F_DEPARTMENT, F_MERCHANDISER, F_PRICING_FLAG, F_SALESMAN, F_SKID_ROUNDING_FLAG, RG_COST_PLUS_BRACKET_FLAG, RG_COST_PLUS_BRACKET_NUMBER, RG_DEPARTMENT, RG_MERCHANDISER, RG_PRICING_FLAG, RG_SALESMAN, RG_SKID_ROUNDING_FLAG, DSD_VENDOR, PRIMARY_CONTACT_NAME, PRIMARY_CONTACT_PHONE_NBR, PRIMARY_CONTACT_EMAIL_ADDR, CUSTOMER_GROUP, STORE_GROUP, FINANCE_CHARGES, INTEREST_RATE, PAYMENT_GRACE_DAYS, INTEREST_PERIOD_DAYS, STATEMENT_ADDRESS1, STATEMENT_CITY, STATEMENT_STATE, STATEMENT_ZIP, FINANCE_CHARGE_EFFECTIVE_DATE, SUMMARY_STMT_FORMAT, DELINQUENT_START_DATE, AR_SEGMENT_GROUP, AR_GROUP_NAME, IMPORTED_SOURCE, REMITTANCE_ADDR, STATEMENT_TOLRNCE, INVOICE_TOLRNCE, SALES_REP, RISK_TIER, GS_RETAIL_ACCTNG_REPORT, STATEMENT_LEVEL, RELATED_CUST_LOCKBOX_APPLLY, SEND_ACH_NOTIFTN, STOCK_NAME, MULTI_STORE, EFT, PICKUP_DAY, PAY_DUE_DAY, CREDIT_HOLD, PAYMENT_TERMS, CREDIT_GROUP, BLANK_CHECK_CUSTOMER, AD_PLAN_GROUP, AWI_PARTY_SITE_ID, AWI_MULTI_STORE_NBR, PATRONAGE_NBR, PRINT_STATEMENTS, STATEMENT_INSTRUCTIONS, SEND_TO_AWI_CDH_FLG, CM_BILLING_CYCLE, CM_TERMS, CM_NP_XFER_FLAG, CM_PRINT_STMT, CM_RET_ACCT_STMT, CM_STMT_COPIES, CM_GROUP_CODE, CM_PAYMENT_METHOD, CM_STMT_LABELS, CM_PAYMENT_DUE_DAYS, CM_SUB_STORE, CM_CUST_XREF_NO, STMT_ADJ_GROUP_CODE, CUST_CODE_XREF, SITE_PICK_SLIP_REQ, SITE_PICK_SLIP_REQ_DISP, SITE_RANK, ACC_SUBS, REJ_REASON, MAX_VOL_PALL, ITEM_PAL_ROUND, LOG_ORD_QTY, MIN_ORD_QTY, ENFORCE_DEPT, MAX_ORD_QTY, PO_REQ, PO_REQ_DISP, RET_PRC_REQ, RET_PRC_REQ_DISP, CUST_XREF, CUST_XREF_DISP, LEGACY_CUST_NBR, LEGACY_COMPANY, SHIP_TO_SITE_USE_ID, BILL_TO_SITE_USE_ID, HOSTING_DEPT, RETAILER_DEPT, CONTACT_INFO, STORE_MANAGER, STORE_OWNER, STORE_TYPE, ACCEPT_SUBS, INT_PO_REQ, LAST_BILL_DATE, DIVISION_ID, REGION, DELEGATE_AUTRIZTN_FLG, AISLE_LIMIT, ACCT_TYPE, OLD_STORE_NBR, GS_DASH_0, GS_DASH_0_DESCRIPTION, GS_DASH_1, GS_DASH_1_DESCRIPTION, GS_DASH_2, GS_DASH_2_DESCRIPTION, GS_DASH_3, GS_DASH_3_DESCRIPTION, GS_DASH_4, GS_DASH_4_DESCRIPTION, GS_DASH_5, GS_DASH_5_DESCRIPTION, GS_DASH_6, GS_DASH_6_DESCRIPTION, GS_DASH_7, GS_DASH_7_DESCRIPTION, GS_DASH_8, GS_DASH_8_DESCRIPTION, GS_DASH_9, GS_DASH_9_DESCRIPTION, PLATINUM_DEPT, EXCLUDE_PDF_BKP, GS_ADV_CRD_GROC_RPK_CAYU_PCT, GS_ADV_CRD_MEAT_BULK_PCT, GS_ADV_CRD_CORDER_REPACK_PCT, GS_ADV_CRD_FROZEN_PCT, GS_ADV_CRD_BAKERY_PCT, GS_ADV_CRD_REFRIG_PCT, GS_ADV_CRD_ICREAM_PCT, GS_SALESMAN_NBR, GS_SALESMAN_NAME, GS_CM_DROP_SHIP_FLAG, GS_CM_PAYMENT_METHOD, GS_SELECTION_SERVICE_FEE_FLAG, GS_FRZ_RBT_FLAG, GS_STMT_FROZ_ADJ_FACTOR, GS_STMT_MEAT_ADJ_FACTOR, GS_STMT_GROC_ADJ_FACTOR, GS_NOTE_PAYOFF_CREDIT_RATE, GS_SALES_ADJ_REBATE_RATE, GS_TRANSFER_BAL_DASH_NBR, GS_CUST_CROSS_REF_DASH_NBR, GS_REBATE_CHECK, GS_GROUP_CODE_DESC, GS_PORTAL_FOLDER_NBR, STORE_FAX_NBR, GS_AUD_GROUP, CREDIT_LIMIT, GS_AD_GROUP, GS_TPR_GROUP, GS_GBY_GROUP, GS_PRICING_ZONE, GS_MAP_CD, GS_MAP_CD_LTR, GS_VENDOR_PACK_IND, GS_SUPER_SERVE, GS_NABISCO, GS_WILL_CALL, SWELL, HQ_FLAG, GS_PRIMARY_SHIP_FLAG, GS_FUEL_ZONE, GS_FUEL_ZONE_DESC, GS_AD_GROUP_DESC, GS_PRICING_ZONE_DESC"
# Split the string into a list of column names
Customer_master_list = Customer_master_columns.split(', ')

# Display the list of column names
print(len(Customer_master_list),Customer_master_list)


331 ['PARTY_SITE_ID', 'STORE_NBR', 'STORE_NAME', 'STORE_STATUS', 'STORE_BILL_TO_FLAG', 'STORE_SHIP_TO_FLAG', 'ACCT_ID', 'ACCT_NBR', 'ACCT_NAME', 'ACCT_STATUS', 'ACCT_CUST_CLASS_CODE', 'ACCT_CREATION_DATE', 'CHAIN_NBR', 'CHAIN_NAME', 'CHAIN_SHORT_NAME', 'CHAIN_INACTIVE_DATE', 'START_CUST_NBR', 'STRT_CUST_NBR2', 'END_CUST_NBR', 'END_CUST_NBR2', 'PRIVATE_LABEL_OFFSET', 'SUB_AUTH_OFFST', 'AD_START_DAY_OF_WEEK', 'COMPANY_CODE', 'HQ_CUST_NBR', 'DIVISION_NBR', 'DIVISION_NAME', 'EBS_GL_CUST_GROUP_NBR', 'EBS_GL_CUST_GROUP_NAME', 'STORE_GROUP_NBR', 'STORE_GROUP_NAME', 'GROUP_BUY_NBR', 'STORE_CREATION_DATE', 'STORE_INACTIVE_DATE', 'STORE_LAST_UPDATE_DATE', 'LOCATION_ID', 'STORE_ADDRESS1', 'STORE_ADDRESS2', 'STORE_ADDRESS3', 'STORE_ADDRESS4', 'STORE_CITY', 'STORE_POSTAL_CODE', 'STORE_STATE', 'STORE_PROVINCE', 'STORE_COUNTY', 'STORE_COUNTRY', 'STORE_TELEPHONE_NBR', 'CUST_STORE_NBR', 'CUST_ACCT_PROFILE_ID', 'PAY_TERMS', 'PAY_TERM_DESC', 'PAY_TERM_START_DATE', 'PAY_TERM_END_DATE', 'COLLECTOR_NAME', '

In [47]:
# Convert lists to sets
audit_columns_set = set(Audit_columns_list)
customer_master_set = set(Customer_master_list)

# Find columns present in Audit_columns_list but not in Customer_master_list
columns_only_in_audit = audit_columns_set - customer_master_set

# Find columns present in Customer_master_list but not in Audit_columns_list
columns_only_in_customer = customer_master_set - audit_columns_set

# Display the results
print("Columns present only in Audit_columns_list:", columns_only_in_audit)
print("Columns present only in Customer_master_list:", columns_only_in_customer)
#This will output the columns that are unique to each list. The - operator is used to find the set difference, and the result is then printed.



Columns present only in Audit_columns_list: set()
Columns present only in Customer_master_list: set()


In [43]:
audit_columns_set

{'ACCEPT_SUBS',
 'ACCT_CREATION_DATE',
 'ACCT_CUST_CLASS_CODE',
 'ACCT_ID',
 'ACCT_NAME',
 'ACCT_NBR',
 'ACCT_STATUS',
 'ACCT_TYPE',
 'ACC_SUBS',
 'ADV_FLAG5',
 'ADV_FLAG6',
 'ADV_FLAG8',
 'ADV_FLAG_7',
 'AD_COMMENTS',
 'AD_PLAN_GROUP',
 'AD_START_DAY_OF_WEEK',
 'AGNE_CUSTOMER',
 'AISLE_LIMIT',
 'ALT_CUST_NBR',
 'AR_GROUP_NAME',
 'AR_SEGMENT_GROUP',
 'AWI_AD_OFFSET',
 'AWI_CDH_CUSTOMER_CLASS',
 'AWI_CDH_RACK_SERVICE',
 'AWI_CUSTOMER_TYPE',
 'AWI_MULTI_STORE_NBR',
 'AWI_PARTY_SITE_ID',
 'AWI_PRINT_BULLETIN',
 'AWI_PRODUCT',
 'AWI_SIGN_KIT',
 'BASE_CUST_NBR',
 'BB_AD_PLAN',
 'BB_MOD_PROGRAM',
 'BILL_TO_SITE_USE_ID',
 'BILL_TO_STORE_NAME',
 'BILL_TO_STORE_NBR',
 'BLANK_CHECK_CUSTOMER',
 'CALL_FOR_CUSTOMER',
 'CATALOG10',
 'CHAIN_INACTIVE_DATE',
 'CHAIN_NAME',
 'CHAIN_NBR',
 'CHAIN_SHORT_NAME',
 'CIGARETTE_LICENSE',
 'CIGARETTE_VALIDATION',
 'CM_BILLING_CYCLE',
 'CM_CUST_XREF_NO',
 'CM_GROUP_CODE',
 'CM_NP_XFER_FLAG',
 'CM_PAYMENT_DUE_DAYS',
 'CM_PAYMENT_METHOD',
 'CM_PRINT_STMT',
 'CM_RET