## 1. Load the Schema Names From the TableName text file.

In [1]:
with open('data/output_data/TableNames.txt', 'r') as textfile:
    schemas_to_convert = [line.split('.')[0] for line in textfile]

schemas_to_convert

['CUSTOMERS_SCHEMA',
 'RANDOM_SCHEMA1',
 'PRODUCT_REVIEWS_SCHEMA',
 'RANDOM_SCHEMA3',
 'RANDOM_SCHEMA4',
 'PRODUCTS_SCHEMA',
 'PURCHASES_SCHEMA',
 'CATEGORIES_SCHEMA',
 'RANDOM_SCHEMA2']

## 2. Read the SQL File Content

In [2]:
with open('data/SampleComplexQuery.sql', 'r') as sqlfile:
    sql_content = sqlfile.read()

## 3. Using `re.sub` Replace Schemas

In [3]:
import re

sample_sql = """INSERT INTO customers_schema.All_Shannons_View AS SELECT * FROM CUSTOMERS_SCHEMA.customers WHERE customer_name LIKE '%Shannon%';"""
pattern = r"\b("+ "|".join(schemas_to_convert) + r")\."
new_sql = re.sub(pattern, "STAGING_SCHEMA.", sample_sql, flags=re.IGNORECASE)
new_sql

"INSERT INTO STAGING_SCHEMA.All_Shannons_View AS SELECT * FROM STAGING_SCHEMA.customers WHERE customer_name LIKE '%Shannon%';"

**1. Regular Expression:**

* **`r"\b(" + "|".join(schemas_to_convert) + r")\."`:** This is the heart of the pattern matching and replacement.
    * `r":` defines a raw string, preventing escape characters from being interpreted literally.
    * `\b`: Matches word boundaries, preventing partial matches within words (e.g., "account" won't match "customer_account").
    * `(`: Starts a capturing group.
    * `|`: Pipe operator separates alternative patterns within the group.
    * `" + "|".join(schemas_to_convert) + "` dynamically inserts each schema name from the list, creating a pattern matching any of them.
    * `")\."`: Matches the captured schema name followed by a dot.

**2. Replacement String:**

* `"`: Starts and ends the replacement string.
* `"STAGING_SCHEMA."`: The replacement string containing the desired target schema.

**3. Flags:**

* `re.IGNORECASE`: Optional flag set based on `case_sensitive` argument. Makes the matching case-insensitive.


## 4.Apply The Pattern To The SQL File Content

In [4]:
new_sql_content = re.sub(pattern, "STAGING_SCHEMA.", sql_content, flags=re.IGNORECASE)
print(new_sql_content[:1500])

   Query: Customer Purchase Analysis
   Created: 1993-07-15
   Created by: Team_A
   Purpose: Analyzing customer purchases and loyalty
*/

WITH customer_purchases AS (
    SELECT
        -- Customer purchase details
        customer_id,
        purchase_date,
        purchase_amount,
        -- Commented out: additional column below
        -- purchase_type, // Update: 1998-05-20 - Deprecated field, kept for historical compatibility
        ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY purchase_date DESC) AS purchase_number
        -- Update: 2005-11-30 - Improved performance by optimizing window function
    FROM STAGING_SCHEMA.PURCHASES
),
customer_loyalty AS (
    SELECT
        -- Customer loyalty details
        customer_id,
        SUM(purchase_amount) AS total_spent,
        CASE
            WHEN total_spent >= 1000 THEN 'Gold'
            WHEN total_spent >= 500 THEN 'Silver'
            ELSE 'Bronze'
        END AS loyalty_level
    FROM customer_purchases
    GROUP BY 

## 5. Putting It Together

In [5]:
import re
import os

def convert_schema_names(sql_file_path, schema_names_file_path, output_file_path):
    """
    Converts schema names in an SQL file based on a list of schema names.

    Args:
        sql_file_path (str): Path to the SQL file.
        schema_names_file_path (str): Path to the text file containing schema names.
        output_file_path (str): Path to the output file.
    """

    try:
        with open(schema_names_file_path, "r") as textfile:
            schemas_to_convert = [line.split('.')[0] for line in textfile]

        with open(sql_file_path, "r") as f:
            sql_content = f.read()

        pattern = r"\b("+ "|".join(schemas_to_convert) + r")\."

        new_sql_content = re.sub(pattern, "STAGING_SCHEMA.", sql_content, flags=re.IGNORECASE)

        # Create a backup of the original file (optional)
        os.makedirs(os.path.dirname(output_file_path), exist_ok=True)  # Ensure output directory exists
        if os.path.exists(output_file_path):
            os.rename(output_file_path, output_file_path + ".bak")

        with open(output_file_path, "w") as f:
            f.write(new_sql_content)

    except FileNotFoundError as e:
        print(f"Error: File not found: {e.filename}")
    except Exception as e:
        print(f"An error occurred: {e}")

sql_file_path = 'data/SampleComplexQuery.sql'
schema_names_file_path = 'data/output_data/TableNames.txt'
output_file_path = "data/output_data/modified_sql_file.sql"
convert_schema_names(sql_file_path, schema_names_file_path, output_file_path)
