In [13]:
import xml.etree.ElementTree as ET
import psycopg2
from datetime import datetime
import re

# XML file path
xml_file_path = 'D:\\W\\RELEX\\RELEX\\Broker and Vendor\\Inbound Files\\BROKER.xml'

# Open the XML file
with open(xml_file_path, 'r', encoding='utf-8') as file:
    xml_data = file.read()

# Database connection parameters
db_params = {
    "host": "gc-ue4-psql-cspo-dev01.nonprod.gcp.cswg.com",
    "database": "CSPODB",
    "port": 5432,
    "user": "cspoms",
    "password": "cspoms",
    "sslmode": "verify-ca",
    "sslcert": "D:\\W\\RELEX\\RELEX\\CSPOMS_DEV_Conn\\client-cert.pem",
    "sslkey": "D:\\W\\RELEX\\RELEX\\CSPOMS_DEV_Conn\\client-key.pem",
    "sslrootcert": "D:\\W\\RELEX\\RELEX\\CSPOMS_DEV_Conn\\server-ca.pem"
}

# Table name
table_name = "cspoms.XXPO_ORCL2CSPOMS_VEND_STG".upper()

# Split the XML data into lines
lines = xml_data.splitlines()

# Regular expression pattern to match the elements
pattern = re.compile(r"<([^>]+)>([^<]+)</\1>")
tag_to_column = {
    'BICEPSVENDOR': 'BIC_VENDOR_NUMBER',
    'BUYERNUMBER': 'BIC_BUYER_NUMBER',
    'CRPLEVEL': 'BIC_CRP_LEVEL',
    'STATUS': 'BIC_STATUS',
    'PORECOMMENDEDIND': 'BIC_PO_RECOMMENDED_IND',
    'AFETYPE1': 'BIC_AFE_TYPE1',
    'AFEFAXNUMBER1': 'BIC_AFE_FAX_NUMBER',
    'AFECONTACT1': 'BIC_AFE_CONTACT',
    'MINIMUMQUANTITY': 'BIC_MINIMUM_QUANTITY',
    'MINIMUMTYPE': 'BIC_MINIMUM_TYPE',
    'BKTQUANTITY1': 'BIC_BRACKET_QUANTITY',
    'CURRENTBKTNUMBER': 'BIC_CURRENT_BKT_NUMBER',
    'CURRENTBKTQUANTITY': 'BIC_CURRENT_BKT_QUANTITY',
    'CURRENTBKTTYPE': 'BIC_CURRENT_BKT_TYPE',
    'MAXIMUMQUANTITY': 'BIC_MAXIMUM_QUANTITY',
    'MAXIMUMTYPE': 'BIC_MAXIMUM_TYPE',
    'ORDERINTERVALWEEKS': 'BIC_ORDER_INTERVAL_WEEKS',
    'LEADTIMESTATEDWEEKS': 'BIC_LEAD_TIME_STATED_WEEKS',
    'FIXEDREVIEWDAY1': 'BIC_FIXED_REVIEW_DAY1',
    'FIXEDREVIEWDAY2': 'BIC_FIXED_REVIEW_DAY2',
    'FIXEDREVIEWDAY3': 'BIC_FIXED_REVIEW_DAY3',
    'FIXEDREVIEWDAY4': 'BIC_FIXED_REVIEW_DAY4',
    'FIXEDREVIEWDAY5': 'BIC_FIXED_REVIEW_DAY5',
    'FIXEDREVIEWDAY6': 'BIC_FIXED_REVIEW_DAY6',
    'FIXEDREVIEWDAY7': 'BIC_FIXED_REVIEW_DAY7',
    'TARGETSERVICELEVEL': 'BIC_TARGET_SERVICE_LEVEL',
    'NAME': 'BIC_NAME',
    'ADDRESS1': 'BIC_ADDRESS_1',
    'CITY': 'BIC_CITY',
    'STATE': 'BIC_STATE',
    'ZIP': 'BIC_ZIP',
    'PHONE': 'BIC_PHONE',
    'CONTACT': 'BIC_CONTACT',
    'TRANSFERMATCHTYPE': 'BIC_TRANSFER_MATCH_TYPE',
    'PAYABLEVENDORNBR': 'BIC_PAYABLE_VENDOR_NBR',
    'LEADTIMEWEEKSTYPE': 'BIC_LEAD_TIME_WEEKS_TYPE',
    'DUNSREMITNO': 'BIC_DUNS_REMIT_NO',
    'DUNSREMITSUFFIX': 'BIC_DUNS_REMIT_SUFFIX',
    'FACILITYSHIPTO': 'BIC_FACILITY_SHIP_TO',
    'PRESCHED': 'BIC_PRE_SCHED_FLAG',
    'BROKERNUMBER': 'BIC_BROKER_NUMBER',
    'FREIGHTALLOW': 'BIC_FREIGHT_ALLOW',
    'FREIGHTALLOWTYPE': 'BIC_FREIGHT_ALLOW_TYPE',
    'TERMSPERCENT': 'BIC_TERMS_PERCENT',
    'TERMSDAYS': 'BIC_TERMS_DAYS',
    'TERMSNETDAYS': 'BIC_TERMS_NET_DAYS',
    'TERMSBASE': 'BIC_TERMS_BASE',
    'BILLFORSHORTEDDEALSFLAG': 'BIC_BILL_FOR_SHORT_DEALS_FLAG',
    'TRANSITDAYS': 'BIC_TRANSIT_DAYS',
    'DUNSNO': 'BIC_DUNS_NO',
    '2NDMINQUANTITY': 'BIC_2ND_MIN_QUANTITY',
    '2NDMINTYPE': 'BIC_2ND_MIN_TYPE',
    '2NDMAXQUANTITY': 'BIC_2ND_MAX_QUANTITY',
    '2NDMAXTYPE': 'BIC_2ND_MAX_TYPE',
    'FLAGPREPAID': 'BIC_FLAG_PREPAID',
    'FLAGPREPAYANDADD': 'BIC_FLAG_PREPAY_AND_ADD',
    'FLAGFREIGHTBILL': 'BIC_FLAG_FREIGHT_BILL',
    'FLAGBACKHAUL': 'BIC_FLAG_BACKHAUL',
    'FLAGTRUCK': 'BIC_FLAG_TRUCK',
    'FLAGRAIL': 'BIC_FLAG_RAIL',
    'BACKHAUL': 'BIC_BACKHAUL',
    'BACKHAULTYPE': 'BIC_BACKHAUL_TYPE',
    'FREIGHTBILL': 'BIC_FREIGHTBILL',
    'ORDERSEQUENCE': 'BIC_ORDER_SEQUENCE',
    'SSAALLOWPERCENT': 'BIC_SSA_ALLOW_PERCENT',
    'SSAALLOWBASIS': 'BIC_SSA_ALLOW_BASIS',
    'SSAALLOWACCOUNT': 'BIC_SSA_ALLOW_ACCOUNT',
    'TRANSACTIONID': 'BIC_TRANSACTION_ID',
    'COUNTRY': 'BIC_COUNTRY',
    'ORDERFILTER': 'BIC_FILTER_VENDOR_FLAG',
    'LBLIMIT': 'BIC_LOAD_BLDG_LIMIT_FLAG',
    'WAREHOUSECODE': 'BIC_WAREHOUSE_CODE',
    'FLAGEXTBCKHL': 'BIC_EXTERNAL_BACKHAUL'
}
print('tag_to_column : ',len(tag_to_column))
# Loop through each line in the XML data
for line in lines:
    # Skip empty lines or lines containing only whitespace
    if not line.strip():
        continue

    # Define a dictionary to store values
    element_values = {}

    # Find all matches in the text data
    matches = re.findall(pattern, line)

    # Store the matches in the dictionary
    for tag, value in matches:
        element_values[tag] = value.strip()

    # Replace column names with the ones from tag_to_column
    mapped_values = {tag_to_column[key]: value for key, value in element_values.items() if key in tag_to_column}

    # Generate SQL INSERT statement
    columns_str = ",".join(mapped_values.keys())
    values_str = ",".join(f"'{v}'" for v in mapped_values.values())

    insert_query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({values_str});"
    print(insert_query)

    # Establish a database connection and execute the INSERT statement
    with psycopg2.connect(**db_params) as conn:
        with conn.cursor() as cursor:
            cursor.execute(insert_query)

    print(f"Record inserted: {mapped_values}")


INSERT INTO CSPOMS.XXPO_ORCL2CSPOMS_VEND_STG (BIC_VENDOR_NUMBER,BIC_BUYER_NUMBER,BIC_CRP_LEVEL,BIC_STATUS,BIC_PO_RECOMMENDED_IND,BIC_AFE_TYPE1,BIC_AFE_FAX_NUMBER,BIC_AFE_CONTACT,BIC_MINIMUM_QUANTITY,BIC_MINIMUM_TYPE,BIC_BRACKET_QUANTITY,BIC_CURRENT_BKT_NUMBER,BIC_CURRENT_BKT_QUANTITY,BIC_CURRENT_BKT_TYPE,BIC_MAXIMUM_QUANTITY,BIC_MAXIMUM_TYPE,BIC_ORDER_INTERVAL_WEEKS,BIC_LEAD_TIME_STATED_WEEKS,BIC_FIXED_REVIEW_DAY1,BIC_FIXED_REVIEW_DAY2,BIC_FIXED_REVIEW_DAY3,BIC_FIXED_REVIEW_DAY4,BIC_FIXED_REVIEW_DAY5,BIC_FIXED_REVIEW_DAY6,BIC_FIXED_REVIEW_DAY7,BIC_TARGET_SERVICE_LEVEL,BIC_NAME,BIC_ADDRESS_1,BIC_CITY,BIC_STATE,BIC_ZIP,BIC_PHONE,BIC_CONTACT,BIC_TRANSFER_MATCH_TYPE,BIC_PAYABLE_VENDOR_NBR,BIC_LEAD_TIME_WEEKS_TYPE,BIC_DUNS_REMIT_NO,BIC_DUNS_REMIT_SUFFIX,BIC_FACILITY_SHIP_TO,BIC_PRE_SCHED_FLAG,BIC_BROKER_NUMBER,BIC_FREIGHT_ALLOW,BIC_FREIGHT_ALLOW_TYPE,BIC_TERMS_PERCENT,BIC_TERMS_DAYS,BIC_TERMS_NET_DAYS,BIC_TERMS_BASE,BIC_BILL_FOR_SHORT_DEALS_FLAG,BIC_TRANSIT_DAYS,BIC_DUNS_NO,BIC_2ND_MIN_QUAN

InvalidTextRepresentation: invalid input syntax for type integer: "001.00"
LINE 1: ...01','U','0000001','0','0000001','U','0042000','L','001.00','...
                                                             ^


In [12]:
print(len(tag_to_column))
len(columns_str)
columns_str
# Count the number of columns
num_columns = len(columns_str.split(','))

print(f"Number of columns: {num_columns}")

73
Number of columns: 73


In [14]:
# Loop through each line in the XML data
for line in lines:
    # Skip empty lines or lines containing only whitespace
    if not line.strip():
        continue

    # Define a dictionary to store values
    element_values = {}

    # Find all matches in the text data
    matches = re.findall(pattern, line)

    # Store the matches in the dictionary
    for tag, value in matches:
        element_values[tag] = value.strip()

    # Replace column names with the ones from tag_to_column
    mapped_values = {tag_to_column[key]: value for key, value in element_values.items() if key in tag_to_column}

    # Print missing keys
    missing_keys = set(element_values.keys()) - set(tag_to_column.keys())
    if missing_keys:
        print(f"Missing keys in tag_to_column: {missing_keys}")

    # Generate SQL INSERT statement
    columns_str = ",".join(mapped_values.keys())
    values_str = ",".join(f"'{v}'" for v in mapped_values.values())

    insert_query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({values_str});"
    print(insert_query)

    # Establish a database connection and execute the INSERT statement
    with psycopg2.connect(**db_params) as conn:
        with conn.cursor() as cursor:
            cursor.execute(insert_query)

    print(f"Record inserted: {mapped_values}")


Missing keys in tag_to_column: {'VENDORTYPE', 'DUNSSUFFIX', 'SSAALLOWTYPE'}
INSERT INTO CSPOMS.XXPO_ORCL2CSPOMS_VEND_STG (BIC_VENDOR_NUMBER,BIC_BUYER_NUMBER,BIC_CRP_LEVEL,BIC_STATUS,BIC_PO_RECOMMENDED_IND,BIC_AFE_TYPE1,BIC_AFE_FAX_NUMBER,BIC_AFE_CONTACT,BIC_MINIMUM_QUANTITY,BIC_MINIMUM_TYPE,BIC_BRACKET_QUANTITY,BIC_CURRENT_BKT_NUMBER,BIC_CURRENT_BKT_QUANTITY,BIC_CURRENT_BKT_TYPE,BIC_MAXIMUM_QUANTITY,BIC_MAXIMUM_TYPE,BIC_ORDER_INTERVAL_WEEKS,BIC_LEAD_TIME_STATED_WEEKS,BIC_FIXED_REVIEW_DAY1,BIC_FIXED_REVIEW_DAY2,BIC_FIXED_REVIEW_DAY3,BIC_FIXED_REVIEW_DAY4,BIC_FIXED_REVIEW_DAY5,BIC_FIXED_REVIEW_DAY6,BIC_FIXED_REVIEW_DAY7,BIC_TARGET_SERVICE_LEVEL,BIC_NAME,BIC_ADDRESS_1,BIC_CITY,BIC_STATE,BIC_ZIP,BIC_PHONE,BIC_CONTACT,BIC_TRANSFER_MATCH_TYPE,BIC_PAYABLE_VENDOR_NBR,BIC_LEAD_TIME_WEEKS_TYPE,BIC_DUNS_REMIT_NO,BIC_DUNS_REMIT_SUFFIX,BIC_FACILITY_SHIP_TO,BIC_PRE_SCHED_FLAG,BIC_BROKER_NUMBER,BIC_FREIGHT_ALLOW,BIC_FREIGHT_ALLOW_TYPE,BIC_TERMS_PERCENT,BIC_TERMS_DAYS,BIC_TERMS_NET_DAYS,BIC_TERMS_BASE

InvalidTextRepresentation: invalid input syntax for type integer: "001.00"
LINE 1: ...01','U','0000001','0','0000001','U','0042000','L','001.00','...
                                                             ^


In [16]:
tag_to_column1 = {
    'BICEPSVENDOR': 'BIC_VENDOR_NUMBER',
    'BUYERNUMBER': 'BIC_BUYER_NUMBER',
    'CRPLEVEL': 'BIC_CRP_LEVEL',
    'STATUS': 'BIC_STATUS',
    'PORECOMMENDEDIND': 'BIC_PO_RECOMMENDED_IND',
    'AFETYPE1': 'BIC_AFE_TYPE1',
    'AFEFAXNUMBER1': 'BIC_AFE_FAX_NUMBER',
    'AFECONTACT1': 'BIC_AFE_CONTACT',
    'MINIMUMQUANTITY': 'BIC_MINIMUM_QUANTITY',
    'MINIMUMTYPE': 'BIC_MINIMUM_TYPE',
    'BKTQUANTITY1': 'BIC_BRACKET_QUANTITY',
    'CURRENTBKTNUMBER': 'BIC_CURRENT_BKT_NUMBER',
    'CURRENTBKTQUANTITY': 'BIC_CURRENT_BKT_QUANTITY',
    'CURRENTBKTTYPE': 'BIC_CURRENT_BKT_TYPE',
    'MAXIMUMQUANTITY': 'BIC_MAXIMUM_QUANTITY',
    'MAXIMUMTYPE': 'BIC_MAXIMUM_TYPE',
    'ORDERINTERVALWEEKS': 'BIC_ORDER_INTERVAL_WEEKS',
    'LEADTIMESTATEDWEEKS': 'BIC_LEAD_TIME_STATED_WEEKS',
    'FIXEDREVIEWDAY1': 'BIC_FIXED_REVIEW_DAY1',
    'FIXEDREVIEWDAY2': 'BIC_FIXED_REVIEW_DAY2',
    'FIXEDREVIEWDAY3': 'BIC_FIXED_REVIEW_DAY3',
    'FIXEDREVIEWDAY4': 'BIC_FIXED_REVIEW_DAY4',
    'FIXEDREVIEWDAY5': 'BIC_FIXED_REVIEW_DAY5',
    'FIXEDREVIEWDAY6': 'BIC_FIXED_REVIEW_DAY6',
    'FIXEDREVIEWDAY7': 'BIC_FIXED_REVIEW_DAY7',
    'TARGETSERVICELEVEL': 'BIC_TARGET_SERVICE_LEVEL',
    'NAME': 'BIC_NAME',
    'ADDRESS1': 'BIC_ADDRESS_1',
    'CITY': 'BIC_CITY',
    'STATE': 'BIC_STATE',
    'ZIP': 'BIC_ZIP',
    'PHONE': 'BIC_PHONE',
    'CONTACT': 'BIC_CONTACT',
    'TRANSFERMATCHTYPE': 'BIC_TRANSFER_MATCH_TYPE',
    'PAYABLEVENDORNBR': 'BIC_PAYABLE_VENDOR_NBR',
    'LEADTIMEWEEKSTYPE': 'BIC_LEAD_TIME_WEEKS_TYPE',
    'DUNSREMITNO': 'BIC_DUNS_REMIT_NO',
    'DUNSREMITSUFFIX': 'BIC_DUNS_REMIT_SUFFIX',
    'FACILITYSHIPTO': 'BIC_FACILITY_SHIP_TO',
    'PRESCHED': 'BIC_PRE_SCHED_FLAG',
    'BROKERNUMBER': 'BIC_BROKER_NUMBER',
    'FREIGHTALLOW': 'BIC_FREIGHT_ALLOW',
    'FREIGHTALLOWTYPE': 'BIC_FREIGHT_ALLOW_TYPE',
    'TERMSPERCENT': 'BIC_TERMS_PERCENT',
    'TERMSDAYS': 'BIC_TERMS_DAYS',
    'TERMSNETDAYS': 'BIC_TERMS_NET_DAYS',
    'TERMSBASE': 'BIC_TERMS_BASE',
    'BILLFORSHORTEDDEALSFLAG': 'BIC_BILL_FOR_SHORT_DEALS_FLAG',
    'TRANSITDAYS': 'BIC_TRANSIT_DAYS',
    'DUNSNO': 'BIC_DUNS_NO',
    '2NDMINQUANTITY': 'BIC2ND_MIN_QUANTITY',
    '2NDMINTYPE': 'BIC_2ND_MIN_TYPE',
    '2NDMAXQUANTITY': 'BIC_2ND_MAX_QUANTITY',
    '2NDMAXTYPE': 'BIC_D_MAX_TYPE',
    'FLAGPREPAID': 'BIC_FLAG_PREPAID',
    'FLAGPREPAYANDADD': 'BIC_FLAG_PREPAY_AND_ADD',
    'FLAGFREIGHTBILL': 'BIC_FLAG_FREIGHT_BILL',
    'FLAGBACKHAUL': 'BIC_FLAG_BACKHAUL',
    'FLAGTRUCK': 'BIC_FLAG_TRUCK',
    'FLAGRAIL': 'BIC_FLAG_RAIL',
    'BACKHAUL': 'BIC_BACKHAUL',
    'BACKHAULTYPE': 'BIC_BACKHAUL_TYPE',
    'FREIGHTBILL': 'BIC_FREIGHTBILL',
    'ORDERSEQUENCE': 'BIC_ORDER_SEQUENCE',
    'SSAALLOWPERCENT': 'BIC_SSA_ALLOW_PERCENT',
    'SSAALLOWBASIS': 'BIC_SSA_ALLOW_BASIS',
    'SSAALLOWACCOUNT': 'BIC_SSA_ALLOW_ACCOUNT',
    'TRANSACTIONID': 'BIC_TRANSACTION_ID',
    'COUNTRY': 'BIC_COUNTRY',
    'ORDERFILTER': 'BIC_FILTER_VENDOR_FLAG',
    'LBLIMIT': 'BIC_LOAD_BLDG_LIMIT_FLAG',
    'WAREHOUSECODE': 'BIC_WAREHOUSE_CODE',
    'FLAGEXTBCKHL': 'BIC_EXTERNAL_BACKHAUL'
}
len(tag_to_column1)

73

In [24]:
len(element_values)
element_values =
{'BICEPSVENDOR': '252008571',
 'BUYERNUMBER': 'ZZ'}
I want to break the Bicepsvendor value first 3 digits and asign a new key BIC_VENDOR_FACILITY for that value and rest of the value goes to BICEPSVENDOR 

SyntaxError: invalid syntax (<ipython-input-24-1e62e640dcdf>, line 2)