In [None]:
# Library list🤖
import os, sys, urllib.parse
from pathlib import Path
from typing import Optional, Dict, Any
import pandas as pd
import sqlalchemy as sa
from sqlalchemy.engine import Engine
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows

# --- Supabase Connection Information (Letungquan246) ---
db_host = "aws-0-ap-southeast-1.pooler.supabase.com"
db_port = "6543"
db_name = "postgres"
db_user = "postgres.rlruseexdmwoiplbuzsv"
raw_password = "Workforce@210997"
schema_name = "public"
user_credential = Path(os.environ['USERPROFILE']) / r'Concentrix Corporation//CNXVN - WFM Team - Documents//'
# URL-encode the password
encoded_password = urllib.parse.quote_plus(raw_password)
# Create connection string with the encoded password
connection_string = f"postgresql+psycopg2://{db_user}:{encoded_password}@{db_host}:{db_port}/{db_name}"
engine = None
try:
    print(f"🔄 Connecting to Supabase: {db_host}:{db_port}/{db_name} with user {db_user}...")
    engine = sa.create_engine(connection_string, pool_pre_ping=True)
    with engine.connect() as connection:
        print("✅ Successfully connected to Supabase!")
except Exception as e:
    print(f"❌ Supabase connection error:")
    print(f"   Connection String: postgresql+psycopg2://{db_user}:******@{db_host}:{db_port}/{db_name}") # Mask password when printing error
    print(f"   Error details: {e}")
    sys.exit(1) # Exit if connection fails

# --- Supabase Connection Information (Letungquan97) ---
db_user2 = "postgres.cgkltabxmucfnhzuesqj"
# URL-encode the password
encoded_password = urllib.parse.quote_plus(raw_password)
# Create connection string with the encoded password
connection_string = f"postgresql+psycopg2://{db_user2}:{encoded_password}@{db_host}:{db_port}/{db_name}"
engine2 = None
try:
    print(f"🔄 Connecting to Supabase: {db_host}:{db_port}/{db_name} with user {db_user2}...")
    engine2 = sa.create_engine(connection_string, pool_pre_ping=True)
    with engine2.connect() as connection:
        print("✅ Successfully connected to Supabase!")
except Exception as e:
    print(f"❌ Supabase connection error:")
    print(f"   Connection String: postgresql+psycopg2://{db_user2}:******@{db_host}:{db_port}/{db_name}") # Mask password when printing error
    print(f"   Error details: {e}")
    sys.exit(1) # Exit if connection fails

In [None]:
# Function Definition🛠️

# print_write_summary💡
def print_write_summary(schema_name: str, table_name: str, dataframe: pd.DataFrame, write_mode: str):
    print("-" * 30)
    print(f"🔄 Preparing to write data to table: \"{schema_name}\".\"{table_name}\"")
    print(f"   Schema: {schema_name}")
    print(f"   Table: {table_name}")
    print(f"   Number of data rows: {len(dataframe)}")
    print(f"   *** SELECTED WRITE MODE: '{write_mode}' ***")
    # Explain selected mode
    if write_mode == "replace":
        print("\n   ⚠️ Mode 'replace':")
        print(f"    - If table \"{schema_name}\".\"{table_name}\" exists, it will be DROPPED and recreated.")
        print(f"    - The new table structure will be based on the CSV file.")
        print(f"    - Required permissions: DROP, CREATE TABLE, INSERT.")
    elif write_mode == "append":
        print("\n   ℹ️ Mode 'append':")
        print(f"    - Existing data in table \"{schema_name}\".\"{table_name}\" will be CLEARED (TRUNCATE).")
        print(f"    - New data from CSV will be ADDED to the table.")
        print(f"    - Table structure must exist and be compatible with the CSV.")
        print(f"    - Required permissions: TRUNCATE, INSERT.")
    elif write_mode == "fail":
         print("\n   ℹ️ Mode 'fail':")
         print(f"    - If table \"{schema_name}\".\"{table_name}\" already exists, the script will stop and raise an error.")
         print(f"    - If the table does not exist, it will be created and data will be inserted.")
         print(f"    - Required permissions: CREATE TABLE, INSERT.")
    else:
         print(f"\n   ❓ Unknown write mode: '{write_mode}'. Behavior undefined.")
    print("-" * 30)

# validate_csv_exists💡
def validate_csv_exists(file_path: str, db_engine: Optional[Engine] = None):
    if not os.path.exists(file_path):
        print(f"❌ Error: CSV file not found at path:")
        print(f"   '{file_path}'")
        print("   Please double-check the file path.")
        if db_engine:
            print("   Disposing database engine connection pool...")
            db_engine.dispose() # Dispose engine if provided and file not found
        print("Exiting script due to missing file.")
        sys.exit(1) # Exit script with error code (1)

# write_dataframe_to_db💡
def write_dataframe_to_db(
    dataframe: pd.DataFrame,
    db_engine: Engine,
    schema_name: str,
    table_name: str,
    write_mode: str,
    db_user: str
):
    try:
        print(f"\n🔄 Starting data writing process (Mode: '{write_mode}')...")
        with db_engine.connect() as connection:
            # Start transaction to ensure all-or-nothing success
            with connection.begin():
            # === ADD TRUNCATE LOGIC WHEN write_mode == 'append' ===
                if write_mode == "append":
                    print(f"   [append mode] 🔄 Executing TRUNCATE on table \"{schema_name}\".\"{table_name}\"...")
                    try:
                        truncate_sql = sa.text(f'TRUNCATE TABLE "{schema_name}"."{table_name}" RESTART IDENTITY;')
                        connection.execute(truncate_sql)
                        print(f"   [append mode] ✅ TRUNCATE successful.")
                    except Exception as te:
                        print(f"   [append mode] ❌ Error during TRUNCATE: {te}")
                        print(f"      Please check if user '{db_user}' has TRUNCATE permission on table \"{schema_name}\".\"{table_name}\".")
                        raise # Re-raise error to stop transaction and report overall failure
            # === END OF TRUNCATE LOGIC ===
            # Determine if_exists for to_sql
                to_sql_if_exists = 'append' if write_mode == 'append' else write_mode
                print(f"   🔄 Writing data to table using df.to_sql (if_exists='{to_sql_if_exists}')...")
                dataframe.to_sql(
                    name=table_name,
                    con=connection,
                    schema=schema_name,
                    if_exists=to_sql_if_exists,
                    index=False,
                    chunksize=1000,
                    method='multi'
                    # dtype=your_dtype_map # (Advanced option)
                )
                print(f"   ✔️ Writing data using df.to_sql complete.")
    # Print success message based on mode
        print(f"\n✅ Complete: Processed successfully for table \"{schema_name}\".\"{table_name}\".")
        if write_mode == "replace":
            print(f"   ➡️ Table was created/replaced and data was written.")
        elif write_mode == "append":
            print(f"   ➡️ Table was emptied (TRUNCATE) and new data was written.")
        elif write_mode == "fail":
             print(f"   ➡️ Data was written (table might have been created if it didn't exist).")
    except Exception as e:
    # Update Error & permission hints
        print(f"\n❌ Error during data writing process to Supabase:")
        print(f"   Error details: {e}")
        print("\n   Suggestions for common issues:")
        permissions_needed = {'replace': 'DROP, CREATE TABLE, INSERT', 'append': 'TRUNCATE, INSERT', 'fail': 'CREATE TABLE (if needed), INSERT'}
        print(f"    - Permissions: Does user '{db_user}' have sufficient permissions ({permissions_needed.get(write_mode, 'UNKNOWN')}) on schema '{schema_name}' and table '{table_name}'?")
        print(f"    - Data Types: Are the data types inferred by Pandas from CSV compatible with PostgreSQL?")
        print(f"    - Column/Table Names: Do names contain special characters?")
        print(f"    - Invalid Data: Are there any values in the CSV unsuitable for the column type?")
        print(f"    - Table does not exist (for 'append' mode): Target table must exist before running TRUNCATE/INSERT.")
        print(f"    - Network connection error/Timeout.")
        raise e # Re-raise error to stop transaction and report overall failure

# handle_csv_read_error💡
def handle_csv_read_error(
    exception_obj: Exception,
    file_path: str,
    db_engine: Optional[Engine] = None
):
    # Error message
    print(f"❌ Error reading CSV file: {file_path}") 
    print(f"   Error details: {exception_obj}")
    # Check encoding error
    if "encoding" in str(exception_obj).lower():
        print("   Suggestion: The CSV file might not be UTF-8. Try other encodings like 'latin1' or check the original file.")
    # Dispose engine if provided
    if db_engine:
        print("   Disposing database engine connection pool...")
        db_engine.dispose()
    # Exit script
    print("Exiting script due to CSV reading error.")
    sys.exit(1)

# read_data_file💡
def read_data_file(
    file_path: str,
    db_engine: Optional[Engine] = None,
    excel_sheet_name: str = 'Sheet1'
) -> pd.DataFrame:
    print(f"🔄 Reading file: {file_path}...")
    df = None # Initialize df as None
    # Get file (extension)
    file_name, file_extension = os.path.splitext(file_path)
    file_extension_lower = file_extension.lower()
    # Read file
    if file_extension_lower == '.csv':  #IF CSV
        print(f"   Detected CSV file. Reading with pd.read_csv...")
        try:
            df = pd.read_csv(file_path, encoding='utf-8-sig')
        except Exception as csv_err:
            print(f"❌ Error reading CSV file '{file_path}': {csv_err}")
            if db_engine:
                print("   Disposing database engine connection pool...")
                db_engine.dispose()
            print("Exiting script.")
            sys.exit(1)
    elif file_extension_lower == '.xlsx':  #IF XLSX
        print(f"   Detected XLSX file. Reading sheet '{excel_sheet_name}' with pd.read_excel...")
        try:
            df = pd.read_excel(file_path, sheet_name=excel_sheet_name)
        except Exception as excel_err:
            print(f"❌ Error reading Excel file '{file_path}': {excel_err}")
            if db_engine:
                print("   Disposing database engine connection pool...")
                db_engine.dispose()
            print("Exiting script.")
            sys.exit(1) 
    else: # Undefined
        print(f"❌ Error: Unsupported file type: '{file_extension}'. This script only supports .csv and .xlsx files.")
        if db_engine:
            print("   Disposing database engine connection pool...")
            db_engine.dispose()
        print("Exiting script.")
        sys.exit(1)

    # --- Successfully read process ---
    if df is not None:
        print(f"✔️ Successfully read {len(df)} rows from {file_extension_lower} file.")
        print(f"   Original columns: {df.columns.tolist()}")
        # Standardized columns
        original_columns = df.columns.tolist()
        df.columns = [''.join(filter(lambda x: x.isalnum() or x == '_', str(col).lower().replace(' ', '_'))) for col in df.columns]
        print(f"   Standardized columns for SQL: {df.columns.tolist()}")
        return df
    else:
        print(f"❌ Error: DataFrame is None after attempting to read file '{file_path}'.")
        if db_engine: db_engine.dispose()
        sys.exit(1)

# convert_column_date💡
def convert_column_date(
    dataframe: pd.DataFrame,
    column_name: str,
    date_format: str = '%m/%d/%Y'
):
    # Check if column is exist
    if column_name in dataframe.columns:
        print(f"   🔄 Converting column '{column_name}' to datetime using format '{date_format}'...")
        try:      
            dataframe[column_name] = pd.to_datetime(dataframe[column_name], format=date_format, errors='coerce') # Convert
            print(f"   ✔️ Conversion attempt for column '{column_name}' finished.")
            # Check NA value
            nat_count = dataframe[column_name].isnull().sum()
            if nat_count > 0:
                print(f"      ⚠️ Warning: Found {nat_count} values in '{column_name}' that could not be converted using format '{date_format}' (set to Null/NaT). Please check the source data.")
        except ValueError as ve:
             print(f"   ❌ ValueError during conversion of column '{column_name}': {ve}. This might happen if the format string '{date_format}' doesn't match any data.")
        except Exception as e:
            print(f"   ❌ An unexpected error occurred during conversion of column '{column_name}': {e}.")
    else:
        # column is not exist
        print(f"   ⚠️ Warning: Column '{column_name}' not found in DataFrame. Skipping datetime conversion.")

In [None]:
# Model_EEAAO🧠

# --- 1. Table Information ---
write_mode = "replace" # <<<----- CHANGE MODE HERE IF NEEDED
# 'replace': Drop old table (DROP), create new table, insert data. || 'append': Clear old data (TRUNCATE), insert new data into existing table. || 'fail': Raise an error if the table already exists.
csv_file_path = user_credential / r'DataBase//TrainModel//EEAAO//EEAAO_MODEL.xlsx'
table_name = "Model_EEAAO"
sheet_to_read = "Query1"
validate_csv_exists(csv_file_path, engine)

# --- 2. Read File ---
try:
    df = read_data_file(csv_file_path, engine, excel_sheet_name=sheet_to_read)
# ==Edit Column===============================================

# ============================================================
    print("   Pandas inferred data types (dtypes):")
    df.info() # Provides an overview of the DataFrame
except Exception as e: # Print additional info if it might help debugging (e.g., encoding error)
    handle_csv_read_error(e, csv_file_path, engine)

# --- 3. Write Data to Supabase ---
print_write_summary(schema_name, table_name, df, write_mode)
try:
    write_dataframe_to_db(dataframe=df, db_engine=engine, schema_name=schema_name, table_name=table_name, write_mode=write_mode, db_user=db_user)    
    print("\nMain script: write_dataframe_to_db completed successfully.") # Successfully
except Exception as main_error:   
    print(f"\nMain script: An error occurred during database write operation: {main_error}") # Error
finally:
    if engine:
        engine.dispose()
        print("\nℹ️ Connection pool closed.")

In [None]:
# [Tony]_Outlook_Calendar💾

# --- 1. Table Information ---
write_mode = "replace" # <<<----- CHANGE MODE HERE IF NEEDED
# 'replace': Drop old table (DROP), create new table, insert data. || 'append': Clear old data (TRUNCATE), insert new data into existing table. || 'fail': Raise an error if the table already exists.
csv_file_path = user_credential / r'RTA_PersonalFile//Tony//Template//Meeting schedule.xlsx'
table_name = "Outlook_Calendar"
sheet_to_read = "Sheet1"
validate_csv_exists(csv_file_path, engine)

# --- 2. Read File ---
try:
    df = read_data_file(csv_file_path, engine, excel_sheet_name=sheet_to_read)
# ==Edit Column===============================================

# ============================================================
    print("   Pandas inferred data types (dtypes):")
    df.info() # Provides an overview of the DataFrame
except Exception as e: # Print additional info if it might help debugging (e.g., encoding error)
    handle_csv_read_error(e, csv_file_path, engine2)

# --- 3. Write Data to Supabase ---
print_write_summary(schema_name, table_name, df, write_mode)
try:
    write_dataframe_to_db(dataframe=df, db_engine=engine2, schema_name=schema_name, table_name=table_name, write_mode=write_mode, db_user=db_user2)    
    print("\nMain script: write_dataframe_to_db completed successfully.") # Successfully
except Exception as main_error:   
    print(f"\nMain script: An error occurred during database write operation: {main_error}") # Error
finally:
    if engine2:
        engine2.dispose()
        print("\nℹ️ Connection pool closed.")