In [14]:
import cx_Oracle
import pandas as pd
import os
import pyarrow.parquet as pq

In [16]:
# import pandas as pd
# import cx_Oracle

# Source database connection
source_conn = cx_Oracle.connect(
    user="apps",
    password="apps",
    dsn="csebsd2db.cswg.com:1521/csebsd2_int"
)
source_cursor = source_conn.cursor()

# Read the query from the file
with open('CM_full_cursor.sql', 'r') as file:
    query = file.read()

source_cursor.execute(query)
df = source_cursor.fetchall()

# Create a DataFrame from the fetched data
columns_info = [(desc[0], desc[1]) for desc in source_cursor.description]
columns = [desc[0] for desc in source_cursor.description]
cust_df = pd.DataFrame(df, columns=columns)

# Display the data
print("The DataFrame of GTT:")
print(cust_df)

# Access 'PARTY_SITE_ID' column and convert it to a list
party_site_ids = cust_df['PARTY_SITE_ID'].tolist()

# Display the list of party_site_ids
print("Party Site IDs:")
print(party_site_ids)

print('len(party_site_ids):', len(party_site_ids))

# Define the batch size (e.g., 1000)
batch_size = 1000

# Calculate the number of batches
num_batches = (len(party_site_ids) + batch_size - 1) // batch_size
print(num_batches)

# Assuming you have a database connection and a cursor named 'source_cursor'
# Adjust the SQL query based on your database and table structure
query_template = "SELECT * FROM xxfin.xxar_customer_master WHERE party_site_id IN ({})"

# Create an empty list to store DataFrames from each batch
all_dfs = []

# Iterate through batches
for i in range(num_batches):
    # Get the start and end indices for the current batch
    start_idx = i * batch_size
    end_idx = (i + 1) * batch_size

    # Extract the party_site_ids for the current batch
    current_batch = party_site_ids[start_idx:end_idx]

    # Generate the SQL query for the current batch
    current_query = query_template.format(','.join(map(str, current_batch)))

    # Execute the query and fetch the results
    source_cursor.execute(current_query)
    results = source_cursor.fetchall()

    # Create a DataFrame from the results
    current_columns = [desc[0] for desc in source_cursor.description]
    current_df = pd.DataFrame(results, columns=current_columns)

    # Append the current DataFrame to the list
    all_dfs.append(current_df)

# Concatenate all DataFrames into a single DataFrame
audit_df = pd.concat(all_dfs, ignore_index=True)

# Display the final DataFrame
print(audit_df)

# Get the number of rows in the final DataFrame
print(f"Number of rows in the final DataFrame: {audit_df.shape[0]}")


The DataFrame of GTT:
      PARTY_SITE_ID  STORE_NBR                 STORE_NAME STORE_STATUS  \
0             25275      20054  BLACKIE'S FARM FRESH PROD       Active   
1            376902      25317         FAIRWAY E-COMMERCE       Active   
2            376998      25401     IMPERIAL BAGS PAPER CO       Active   
3            376999      25403     N & P FOOD SERVICES IN       Active   
4            377000      25415     PLAINFIELD TOB & CANDY       Active   
...             ...        ...                        ...          ...   
2392        3467192      45018      JIB #8203- UMI STREET       Active   
2393        3470193  947230-MX      SEVEN UP CO DSD 13200       Active   
2394        3470194  947244-MX    FRITO LAY INC DSD 80024       Active   
2395        3470195  947291-MX      BERRES BROTHER COFFEE       Active   
2396        3470196  947886-MX             ANHEUSER BUSCH       Active   

     STORE_BILL_TO_FLAG STORE_SHIP_TO_FLAG  ACCT_ID ACCT_NBR  \
0                     Y  

      PARTY_SITE_ID  STORE_NBR                   STORE_NAME STORE_STATUS  \
0             25275      20054    BLACKIE'S FARM FRESH PROD       Active   
1            376902      25317           FAIRWAY E-COMMERCE       Active   
2            376998      25401       IMPERIAL BAGS PAPER CO       Active   
3            376999      25403       N & P FOOD SERVICES IN       Active   
4            377000      25415       PLAINFIELD TOB & CANDY       Active   
...             ...        ...                          ...          ...   
2387        2678359    9854083                FRED'S MARKET       Active   
2388        2859183      30115  CEDAR KNOLLS FARMERS MARKET       Active   
2389        3040128      30124                       C-TOWN       Active   
2390        3048123      30125                TUSCANY FOODS       Active   
2391        3460192  999999994               TEST 999999994       Active   

     STORE_BILL_TO_FLAG STORE_SHIP_TO_FLAG  ACCT_ID   ACCT_NBR  \
0                    

In [21]:
# Get the table name
table_name = 'xxfin.xxar_customer_master_audit'

# Define the custom columns with hardcoded values
custom_columns = {
    'request_id': 777,  # Your hardcoded value
    'creation_date': 'SYSDATE',  # Your hardcoded value
    'created_by': '-999',  # Your hardcoded value
    'last_update_date': 'SYSDATE',  # Your hardcoded value
    'last_updated_by': '-999',  # Your hardcoded value
    'last_update_login': '-999'  # Your hardcoded value
}

# Add custom columns to the DataFrame
for col, value in custom_columns.items():
    cust_df[col] = value

# Create a SQL INSERT query dynamically based on DataFrame columns
columns_sql = ', '.join(cust_df.columns)
print(len(cust_df.columns))
values_placeholder = ', '.join([':' + col for col in cust_df.columns])
print(len(values_placeholder))
insert_query = f"INSERT INTO {table_name} ({columns_sql}) VALUES ({values_placeholder})"
print(insert_query)
# Use cx_Oracle to execute the INSERT query for each row in cust_df
# with source_conn.cursor() as cursor:
#     for index, row in cust_df.iterrows():
#         cursor.execute(insert_query, row.to_dict())
with source_conn.cursor() as cursor:
    for index, row in cust_df.iterrows():
        print(f"Inserting: {row.to_dict()}")
        cursor.execute(insert_query, row.to_dict())


# Commit the changes
source_conn.commit()

322
5867
INSERT INTO xxfin.xxar_customer_master_audit (PARTY_SITE_ID, STORE_NBR, STORE_NAME, STORE_STATUS, STORE_BILL_TO_FLAG, STORE_SHIP_TO_FLAG, ACCT_ID, ACCT_NBR, ACCT_NAME, ACCT_STATUS, ACCT_CUST_CLASS_CODE, ACCT_CREATION_DATE, CHAIN_NBR, CHAIN_NAME, CHAIN_SHORT_NAME, CHAIN_INACTIVE_DATE, START_CUST_NBR, STRT_CUST_NBR2, END_CUST_NBR, END_CUST_NBR2, PRIVATE_LABEL_OFFSET, SUB_AUTH_OFFST, AD_START_DAY_OF_WEEK, COMPANY_CODE, HQ_CUST_NBR, DIVISION_NBR, DIVISION_NAME, EBS_GL_CUST_GROUP_NBR, EBS_GL_CUST_GROUP_NAME, STORE_GROUP_NBR, STORE_GROUP_NAME, GROUP_BUY_NBR, STORE_CREATION_DATE, STORE_INACTIVE_DATE, STORE_LAST_UPDATE_DATE, LOCATION_ID, STORE_ADDRESS1, STORE_ADDRESS2, STORE_ADDRESS3, STORE_ADDRESS4, STORE_CITY, STORE_POSTAL_CODE, STORE_STATE, STORE_PROVINCE, STORE_COUNTY, STORE_COUNTRY, STORE_TELEPHONE_NBR, CUST_STORE_NBR, CUST_ACCT_PROFILE_ID, PAY_TERMS, PAY_TERM_DESC, PAY_TERM_START_DATE, PAY_TERM_END_DATE, COLLECTOR_NAME, STATEMENT_CYCLE_NAME, ALT_CUST_NBR, BASE_CUST_NBR, CUST_STA

DatabaseError: DPI-1043: invalid number

In [22]:
# Replace None values with an appropriate default value (e.g., 0 for numeric columns)
cust_df.fillna(0, inplace=True)

# Create a SQL INSERT query dynamically based on DataFrame columns
columns_sql = ', '.join(cust_df.columns)
values_placeholder = ', '.join([':' + col for col in cust_df.columns])
insert_query = f"INSERT INTO {table_name} ({columns_sql}) VALUES ({values_placeholder})"

# Use cx_Oracle to execute the INSERT query for each row in cust_df
with source_conn.cursor() as cursor:
    for index, row in cust_df.iterrows():
        cursor.execute(insert_query, row.to_dict())

# Commit the changes
source_conn.commit()


DatabaseError: ORA-00932: inconsistent datatypes: expected DATE got NUMBER

In [38]:
import datetime
# Use cx_Oracle to execute the INSERT query for each row in cust_df
# Use cx_Oracle to execute the INSERT query for each row in cust_df
# Use cx_Oracle to execute the INSERT query for each row in cust_df
# Use cx_Oracle to execute the INSERT query for each row in cust_df
# Use cx_Oracle to execute the INSERT query for each row in cust_df
# Use cx_Oracle to execute the INSERT query for each row in cust_df
with source_conn.cursor() as cursor:
    for index, row in cust_df.iterrows():
        # Apply the format_value function to each value in the row
        bind_variables = [format_value(row[col]) for col in cust_df.columns]

        # Create a SQL INSERT query dynamically with bind variables
        insert_query = f"INSERT INTO {table_name} ({columns_sql}) VALUES ({', '.join([':' + str(i) for i in range(1, len(bind_variables) + 1)])})"

        # Execute the query with bind variables
        cursor.execute(insert_query, bind_variables)

# Commit the changes
source_conn.commit()




DatabaseError: ORA-12899: value too large for column "XXFIN"."XXAR_CUSTOMER_MASTER_AUDIT"."STORE_BILL_TO_FLAG" (actual: 3, maximum: 1)

In [36]:
# Use cx_Oracle to execute the INSERT query for each row in cust_df
with source_conn.cursor() as cursor:
    for index, row in cust_df.iterrows():
        # Apply the format_value function to each value in the row
        bind_variables = [format_value(row[col]) for col in cust_df.columns]

        # Create a SQL INSERT query dynamically with bind variables
        insert_query = f"INSERT INTO {table_name} ({columns_sql}) VALUES ({', '.join([':' + str(i) for i in range(1, len(bind_variables) + 1)])})"
        
        print(f"Insert Query: {insert_query}")
        print(f"Bind Variables: {bind_variables}")

        # Execute the query with bind variables
        cursor.execute(insert_query, bind_variables)

# Commit the changes
source_conn.commit()


Insert Query: INSERT INTO xxfin.xxar_customer_master_audit (PARTY_SITE_ID, STORE_NBR, STORE_NAME, STORE_STATUS, STORE_BILL_TO_FLAG, STORE_SHIP_TO_FLAG, ACCT_ID, ACCT_NBR, ACCT_NAME, ACCT_STATUS, ACCT_CUST_CLASS_CODE, ACCT_CREATION_DATE, CHAIN_NBR, CHAIN_NAME, CHAIN_SHORT_NAME, CHAIN_INACTIVE_DATE, START_CUST_NBR, STRT_CUST_NBR2, END_CUST_NBR, END_CUST_NBR2, PRIVATE_LABEL_OFFSET, SUB_AUTH_OFFST, AD_START_DAY_OF_WEEK, COMPANY_CODE, HQ_CUST_NBR, DIVISION_NBR, DIVISION_NAME, EBS_GL_CUST_GROUP_NBR, EBS_GL_CUST_GROUP_NAME, STORE_GROUP_NBR, STORE_GROUP_NAME, GROUP_BUY_NBR, STORE_CREATION_DATE, STORE_INACTIVE_DATE, STORE_LAST_UPDATE_DATE, LOCATION_ID, STORE_ADDRESS1, STORE_ADDRESS2, STORE_ADDRESS3, STORE_ADDRESS4, STORE_CITY, STORE_POSTAL_CODE, STORE_STATE, STORE_PROVINCE, STORE_COUNTY, STORE_COUNTRY, STORE_TELEPHONE_NBR, CUST_STORE_NBR, CUST_ACCT_PROFILE_ID, PAY_TERMS, PAY_TERM_DESC, PAY_TERM_START_DATE, PAY_TERM_END_DATE, COLLECTOR_NAME, STATEMENT_CYCLE_NAME, ALT_CUST_NBR, BASE_CUST_NBR, CUS

DatabaseError: ORA-00932: inconsistent datatypes: expected DATE got NUMBER

In [37]:
def format_value(value):
    if value is None or pd.isna(value):
        return 'NULL'
    elif isinstance(value, str):
        return f"'{value}'"
    elif isinstance(value, datetime.date):
        formatted_date = value.strftime("%Y-%m-%d %H:%M:%S")
        return f"TO_DATE('{formatted_date}', 'YYYY-MM-DD HH24:MI:SS')"
    else:
        return str(value)
