In [11]:
try:
    import pyodbc
except ImportError:
    print("pyodbc module not found. Install it with 'pip install pyodbc'")
    exit(1)

# List all available ODBC drivers
drivers = pyodbc.drivers()

if drivers:
    print("ODBC drivers installed on your system:")
    for driver in drivers:
        print(driver)
else:
    print("No ODBC drivers found on your system.")


ODBC drivers installed on your system:
SQL Server


In [16]:
import pandas as pd
from sqlalchemy import create_engine

# Replace the connection string with your actual SQL Server details
connection_string = (
    'mssql+pyodbc://pos:pos@10.102.30.116/xstore?driver=SQL+Server'
)

# Create an SQLAlchemy engine
engine = create_engine(connection_string)

# Read the mapping CSV file
mapping_df = pd.read_csv(r'C:\Users\trieu.pham\Downloads\Mapping - 1.csv')

# Extract the list of tables from the "v7" column
tables = mapping_df['v7'].tolist()

template = "SELECT * FROM {table} WHERE employee_id between 100 and 106;"
template2 = "SELECT * FROM {table} ;"

# Location to export csv files
output_folder = r'C:\Users\trieu.pham\Downloads\Data Compare'
# Clear all csv files in the output folder
import os
import glob
files = glob.glob(output_folder + '/*.csv')
for f in files:
    os.remove(f)

for table in tables:
    # Try to check using the first template
    try:
        sql_query = template.format(table=table)
        df = pd.read_sql_query(sql_query, engine)
    except Exception as e:
        print(f"Failed to execute the query with the first template for table '{table}': {e}")
        # Export fail table to a CSV file in the output_folder, then skip to the next table
        csv_filename = f"{output_folder}/{table}_fail.csv"
        print("Trying the second template...")

        # Try to check using the second template
        try:
            sql_query = template2.format(table=table)
            df = pd.read_sql_query(sql_query, engine)
        except Exception as e:
            print(f"Failed to execute the query with the second template for table '{table}': {e}")
            continue

    # Export the DataFrame to a CSV file in the output_folder
    csv_filename = f"{output_folder}/{table}_v7.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Exported {table} to {csv_filename}")

# Close the database connection
engine.dispose()

print("All operations completed.")


Exported hrs_employee to C:\Users\trieu.pham\Downloads\Data Compare/hrs_employee_v7.csv
Failed to execute the query with the first template for table 'sec_privilege': (pyodbc.ProgrammingError) ('42S22', "[42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'employee_id'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'employee_id'. (207)")
[SQL: SELECT * FROM sec_privilege WHERE employee_id between 100 and 106;]
(Background on this error at: https://sqlalche.me/e/20/f405)
Trying the second template...
Exported sec_privilege to C:\Users\trieu.pham\Downloads\Data Compare/sec_privilege_v7.csv
All operations completed.


In [3]:
import pandas as pd
from sqlalchemy import create_engine

# Replace the connection string with your actual SQL Server details
connection_string = (
    'mssql+pyodbc://dtv:dtv@10.102.30.11/xstore?driver=SQL+Server'
)

# Create an SQLAlchemy engine
engine = create_engine(connection_string)

# Read the mapping CSV file
mapping_df = pd.read_csv(r'C:\Users\trieu.pham\Downloads\Mapping - 1.csv')

# Extract the list of tables from the "v7" column
tables = mapping_df['v23'].tolist()

template = "SELECT * FROM {table} WHERE employee_id between 100 and 106;"
template2 = "SELECT * FROM {table} ;"

# Location to export csv files
output_folder = r'C:\Users\trieu.pham\Downloads\Data Compare'

for table in tables:
    # Try to check using the first template
    try:
        sql_query = template.format(table=table)
        df = pd.read_sql_query(sql_query, engine)
    except Exception as e:
        print(f"Failed to execute the query with the first template for table '{table}': {e}")
        # Export fail table to a CSV file in the output_folder, then skip to the next table
        csv_filename = f"{output_folder}/{table}_fail.csv"
        print("Trying the second template...")

        # Try to check using the second template
        try:
            sql_query = template2.format(table=table)
            df = pd.read_sql_query(sql_query, engine)
        except Exception as e:
            print(f"Failed to execute the query with the second template for table '{table}': {e}")
            continue

    # Export the DataFrame to a CSV file in the output_folder
    csv_filename = f"{output_folder}/{table}_v23.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Exported {table} to {csv_filename}")

# Close the database connection
engine.dispose()

print("All operations completed.")


Failed to execute the query with the first template for table 'cat_cust_acct': (pyodbc.ProgrammingError) ('42S22', "[42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'employee_id'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'employee_id'. (207)")
[SQL: SELECT * FROM cat_cust_acct WHERE employee_id between 100 and 106;]
(Background on this error at: https://sqlalche.me/e/20/f405)
Trying the second template...
Exported cat_cust_acct to C:\Users\trieu.pham\Downloads\Data Compare/cat_cust_acct_v23.csv
Failed to execute the query with the first template for table 'cat_cust_acct_journal': (pyodbc.ProgrammingError) ('42S22', "[42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'employee_id'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'employee_id'. (207)")
[SQL: SELECT * FROM cat_cust_acct_journal WHERE employee_id between 100 and 106;

In [17]:
import pandas as pd

# Read the CSV files into DataFrames
df_v7 = pd.read_csv(r'C:\Users\trieu.pham\Downloads\Data Compare\sec_privilege_v7.csv')
df_v23 = pd.read_csv(r'C:\Users\trieu.pham\Downloads\Data Compare\sec_privilege_v23.csv')

# Merge the DataFrames on the 'privilege_type' column
merged_df = pd.merge(df_v7, df_v23, on='privilege_type', how='outer', suffixes=('_v7', '_v23'))

# Find differences
differences = merged_df[merged_df.filter(like='_v7').ne(merged_df.filter(like='_v23')).any(axis=1)]

# Output differences to a new CSV file
differences.to_csv('differences.csv', index=False)

# Display differences
print(differences)


     organization_id_v7               privilege_type  authentication_req_v7  \
0                   1.0  ACCEPT_WARRANTY_NOT_ON_FILE                    0.0   
1                   1.0           ACCESS_OTHER_TILLS                    0.0   
2                   NaN       ACTIVATE_SELF_CHECKOUT                    NaN   
3                   1.0                   ADD_COUPON                    0.0   
4                   1.0                 ADD_DISCOUNT                    0.0   
..                  ...                          ...                    ...   
204                 1.0                VOID_DISCOUNT                    0.0   
205                 1.0                    VOID_LINE                    0.0   
206                 1.0                  VOID_TENDER                    0.0   
207                 1.0            WORKSTATION_CLOSE                    0.0   
208                 1.0             WORKSTATION_OPEN                    0.0   

                                        description

In [2]:
# Get the name of file in specific folder
import os
import glob

# Location of specific folder
folder_path = r'C:\Users\trieu.pham\OneDrive - BTM Global Consulting\Work\Data Cust'

# Get all files in the folder
files = glob.glob(folder_path + '/*.dtsx')

# Get the name of file in files, remove extension
for f in files:
    file_name = os.path.splitext(os.path.basename(f))[0]
    print(file_name)


cat_cust_acct
cat_cust_acct_journal
cat_cust_item_acct
cat_cust_item_acct_activity
cat_cust_item_acct_detail
cat_cust_item_acct_journal
cat_delivery_modifier
crm_customer_Affiliation
crm_party
crm_party_cross_reference
crm_party_email
crm_party_locale_information
crm_party_telephone
