## Create schema wise folder to store corresponding scripts

In [None]:
import pandas as pd
from pathlib import Path
df_definitions = pd.concat([pd.read_csv("object_definitions.csv"), pd.read_csv("UAT_object_definitions.csv")], ignore_index=True)

df_definitions = df_definitions.query("ObjectType == 'SQL_STORED_PROCEDURE'")
# # 1. Sort so that preferred database comes first
df_definitions = df_definitions.sort_values(by="DatabaseName", ascending=False)
df_definitions = df_definitions.drop_duplicates(subset=['Schema', 'Object'], keep='first')

# 1. Filter, Group, and Count
df_schema_counts = (
    df_definitions
    .query("ObjectType == 'SQL_STORED_PROCEDURE'")  # Filter data
    .groupby('Schema')                               # Group by Schema
    .size()                                          # Count rows per group
    .reset_index(name='Count')                       # Convert to DataFrame & name column
)

# 2. Sort by count (Optional, usually helpful for Excel)
df_schema_counts = df_schema_counts.sort_values(by='Count', ascending=False).reset_index()


Path("store_folders").mkdir(exist_ok=True)
for row in df_schema_counts.iloc[12:33].itertuples():
    stitched_path = f"store_folders/{row[0]}_`{row[2]}`_batch_complete_{row[3]}_fol"
    Path(stitched_path).mkdir(exist_ok = True)
    Path(f"{stitched_path}/{row[0]}_`{row[2]}`_batch_complete_{row[3]}").mkdir(exist_ok = True)
    with open(f"{stitched_path}/{row[0]}_{row[2]}_stats.json", "w", encoding='utf-8') as f:
        f.write('')

## Create Sql scripts schema wise

In [None]:
import pandas as pd
from pathlib import Path
import binascii
import re

df_definitions = pd.concat([pd.read_csv("object_definitions.csv"), pd.read_csv("UAT_object_definitions.csv")], ignore_index=True)

df_definitions = df_definitions.query("ObjectType == 'SQL_STORED_PROCEDURE'")
# # 1. Sort so that preferred database comes first
df_definitions = df_definitions.sort_values(by="DatabaseName", ascending=False)
df_definitions = df_definitions.drop_duplicates(subset=['Schema', 'Object'], keep='first')

Path("sql_scripts_py_generated/").mkdir(exist_ok=True)

def robust_clean_sql(sql_query):
    sql_text = str(sql_query)

    sql_text = sql_text.replace('\\n', '\n').replace('\\t', '\t')

    # Remove single-line comments (-- ...)
    sql_text = re.sub(r'--.*', '', sql_text)
    # Remove multi-line comments (/* ... */)
    sql_text = re.sub(r'/\*.*?\*/', '', sql_text, flags=re.DOTALL)

    # Replace multiple newlines with a single newline
    sql_text = re.sub(r'\n\s*\n', '\n', sql_text)
    # Collapse horizontal spaces (tabs/spaces) into one space
    sql_text = re.sub(r'[ \t]+', ' ', sql_text)
    
    return sql_text.strip()

for row in df_definitions.itertuples():
    schema_hex_suffix = binascii.hexlify(row[2].encode('utf-8')).decode('utf-8')
    name_hex_suffix = binascii.hexlify(row[3].encode('utf-8')).decode('utf-8')
    hex_suffix_path = f"sql_scripts_py_generated/{row[2]}---{schema_hex_suffix}"
    Path(hex_suffix_path).mkdir(exist_ok=True)
    cleaned_script = robust_clean_sql(row[5])
    try:
        #note:- any object name(row[3] here), containing `/` in it's name will have it replaced with below string `((forward_slash))`s
        with open(f"{hex_suffix_path}/{row[2]}.{row[3].replace("/","((forward_slash))")}---{name_hex_suffix}.sql","w",encoding='utf-8') as f:
            f.write(cleaned_script)
    except Exception as e:
        print(f"Error writing {row[2]}.{row[3]} script:\n",e)

In [None]:
# Check total number of created files
import os
count = 0
for path in os.listdir("sql_scripts_py_generated"):
    # print(path.split("---")[0],len(os.listdir(f"sql_scripts_py_generated/{path}")))
    count += len(os.listdir(f"sql_scripts_py_generated/{path}"))
count