From 914834ac113e34426a16a301482e30b52218a2fa Mon Sep 17 00:00:00 2001 From: Vishal Shenoy Date: Fri, 31 Jan 2025 10:45:08 -0800 Subject: [PATCH 1/5] init --- examples/sqlalchemy_soft_delete/README.md | 1 + examples/sqlalchemy_soft_delete/run.py | 79 +++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 examples/sqlalchemy_soft_delete/README.md create mode 100644 examples/sqlalchemy_soft_delete/run.py diff --git a/examples/sqlalchemy_soft_delete/README.md b/examples/sqlalchemy_soft_delete/README.md new file mode 100644 index 0000000..945c9b4 --- /dev/null +++ b/examples/sqlalchemy_soft_delete/README.md @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/examples/sqlalchemy_soft_delete/run.py b/examples/sqlalchemy_soft_delete/run.py new file mode 100644 index 0000000..579cd05 --- /dev/null +++ b/examples/sqlalchemy_soft_delete/run.py @@ -0,0 +1,79 @@ +import codegen +from codegen import Codebase +from codegen.sdk.core.detached_symbols.function_call import FunctionCall +from codegen.sdk.codebase.config import CodebaseConfig, GSFeatureFlags + +codebase = Codebase("./input_repo", config=CodebaseConfig(feature_flags=GSFeatureFlags(disable_graph=True))) + +# Values for soft delete models and join methods +soft_delete_models = { + "User", + "ProductWorkflow", + "TransactionCanonical", + "BillParametersLogEntry", + "SpendEventCanonical", + "TrackingCategory", + "Payee", + "Card", + "ApprovalInstance", + "Merchant", + "Transaction", +} +join_methods = {"join", "outerjoin", "innerjoin"} + +# Loop through all files and function calls +for file in codebase.files: + for call in file.function_calls: + # Get the arguments as a list + call_args = list(call.args) + + # Skip if the function call is not a join method + if str(call.name) not in join_methods: + continue + + # Skip if the function call has no arguments + if len(call_args) == 0: + continue + + # Get the model name from the first argument + model_name = str(call_args[0].value) + + # Skip if the model name is not in the soft delete models + if model_name not in soft_delete_models: + continue + + # Construct the deleted_at check expression + print(f"Found join method for model {model_name} in file {file.filepath}") + deleted_at_check = f"{model_name}.deleted_at.is_(None)" + + # If there is only one argument, add the deleted_at check + if len(call_args) == 1: + print(f"Adding deleted_at check to function call {call.source}") + call_args.append(deleted_at_check) + elif len(call_args) >= 2: + # Get the second argument + second_arg = call_args[1].value + + # Skip if the second argument is already the deleted_at check + if second_arg.source == deleted_at_check: + print(f"Skipping {file.filepath} because the deleted_at check is already present") + continue + + # If the second argument is an and_ call, add the deleted_at check if it's not already present + if isinstance(second_arg, FunctionCall) and second_arg.name == "and_": + if deleted_at_check in {str(x) for x in second_arg.args}: + print(f"Skipping {file.filepath} because the deleted_at check is already present") + continue + else: + print(f"Adding deleted_at check to and_ call in {file.filepath}") + second_arg.args.append(deleted_at_check) + else: + print(f"Adding deleted_at check to {file.filepath}") + call_args[1].edit(f"and_({second_arg.source}, {deleted_at_check})") + + # Check if the file imports and_ + if any("and_" in imp.name for imp in file.imports): + print(f"File {file.filepath} imports and_") + else: + print(f"File {file.filepath} does not import and_. Adding import.") + file.add_import_from_import_string("from sqlalchemy import and_") From 4082ee04f706a9fc4477dcbb56cdf43507b788f2 Mon Sep 17 00:00:00 2001 From: Vishal Shenoy Date: Mon, 3 Feb 2025 10:53:52 -0800 Subject: [PATCH 2/5] . --- examples/sqlalchemy_soft_delete/run.py | 154 +++++++++++++------------ 1 file changed, 80 insertions(+), 74 deletions(-) diff --git a/examples/sqlalchemy_soft_delete/run.py b/examples/sqlalchemy_soft_delete/run.py index 579cd05..88eda6a 100644 --- a/examples/sqlalchemy_soft_delete/run.py +++ b/examples/sqlalchemy_soft_delete/run.py @@ -1,79 +1,85 @@ import codegen from codegen import Codebase from codegen.sdk.core.detached_symbols.function_call import FunctionCall -from codegen.sdk.codebase.config import CodebaseConfig, GSFeatureFlags - -codebase = Codebase("./input_repo", config=CodebaseConfig(feature_flags=GSFeatureFlags(disable_graph=True))) - -# Values for soft delete models and join methods -soft_delete_models = { - "User", - "ProductWorkflow", - "TransactionCanonical", - "BillParametersLogEntry", - "SpendEventCanonical", - "TrackingCategory", - "Payee", - "Card", - "ApprovalInstance", - "Merchant", - "Transaction", -} -join_methods = {"join", "outerjoin", "innerjoin"} - -# Loop through all files and function calls -for file in codebase.files: - for call in file.function_calls: - # Get the arguments as a list - call_args = list(call.args) - - # Skip if the function call is not a join method - if str(call.name) not in join_methods: - continue - - # Skip if the function call has no arguments - if len(call_args) == 0: - continue - - # Get the model name from the first argument - model_name = str(call_args[0].value) - - # Skip if the model name is not in the soft delete models - if model_name not in soft_delete_models: - continue - - # Construct the deleted_at check expression - print(f"Found join method for model {model_name} in file {file.filepath}") - deleted_at_check = f"{model_name}.deleted_at.is_(None)" - - # If there is only one argument, add the deleted_at check - if len(call_args) == 1: - print(f"Adding deleted_at check to function call {call.source}") - call_args.append(deleted_at_check) - elif len(call_args) >= 2: - # Get the second argument - second_arg = call_args[1].value - - # Skip if the second argument is already the deleted_at check - if second_arg.source == deleted_at_check: - print(f"Skipping {file.filepath} because the deleted_at check is already present") +from codegen.sdk.enums import ProgrammingLanguage + + +def should_process_join_call(call, soft_delete_models, join_methods): + """Determine if a function call should be processed for soft delete conditions.""" + if str(call.name) not in join_methods: + return False + + call_args = list(call.args) + if not call_args: + return False + + model_name = str(call_args[0].value) + return model_name in soft_delete_models + + +def add_deleted_at_check(file, call, model_name): + """Add the deleted_at check to a join call.""" + call_args = list(call.args) + deleted_at_check = f"{model_name}.deleted_at.is_(None)" + + if len(call_args) == 1: + print(f"Adding deleted_at check to function call {call.source}") + call_args.append(deleted_at_check) + return + + second_arg = call_args[1].value + if second_arg.source == deleted_at_check: + print(f"Skipping {file.filepath} because the deleted_at check is already present") + return + + if isinstance(second_arg, FunctionCall) and second_arg.name == "and_": + if deleted_at_check in {str(x) for x in second_arg.args}: + print(f"Skipping {file.filepath} because the deleted_at check is already present") + return + print(f"Adding deleted_at check to and_ call in {file.filepath}") + second_arg.args.append(deleted_at_check) + else: + print(f"Adding deleted_at check to {file.filepath}") + call_args[1].edit(f"and_({second_arg.source}, {deleted_at_check})") + + ensure_and_import(file) + + +def ensure_and_import(file): + """Ensure the file has the necessary and_ import.""" + if not any("and_" in imp.name for imp in file.imports): + print(f"File {file.filepath} does not import and_. Adding import.") + file.add_import_from_import_string("from sqlalchemy import and_") + + +@codegen.function("sqlalchemy-soft-delete") +def process_soft_deletes(codebase): + """Process soft delete conditions for join methods in the codebase.""" + soft_delete_models = { + "User", + "Update", + "Proposal", + "Comment", + "Project", + "Team", + "SavedSession", + } + join_methods = {"join", "outerjoin", "innerjoin"} + + for file in codebase.files: + for call in file.function_calls: + if not should_process_join_call(call, soft_delete_models, join_methods): continue - # If the second argument is an and_ call, add the deleted_at check if it's not already present - if isinstance(second_arg, FunctionCall) and second_arg.name == "and_": - if deleted_at_check in {str(x) for x in second_arg.args}: - print(f"Skipping {file.filepath} because the deleted_at check is already present") - continue - else: - print(f"Adding deleted_at check to and_ call in {file.filepath}") - second_arg.args.append(deleted_at_check) - else: - print(f"Adding deleted_at check to {file.filepath}") - call_args[1].edit(f"and_({second_arg.source}, {deleted_at_check})") - - # Check if the file imports and_ - if any("and_" in imp.name for imp in file.imports): - print(f"File {file.filepath} imports and_") - else: - print(f"File {file.filepath} does not import and_. Adding import.") - file.add_import_from_import_string("from sqlalchemy import and_") + model_name = str(list(call.args)[0].value) + print(f"Found join method for model {model_name} in file {file.filepath}") + add_deleted_at_check(file, call, model_name) + + print("commit") + print(codebase.get_diff()) + + +if __name__ == "__main__": + codebase = Codebase.from_repo("hasgeek/funnel", programming_language=ProgrammingLanguage.PYTHON) + print(codebase.files) + process_soft_deletes(codebase) From 48f5302699bcfe8ca6d94bfd4ff313db4de9ee11 Mon Sep 17 00:00:00 2001 From: Vishal Shenoy Date: Mon, 3 Feb 2025 12:25:13 -0800 Subject: [PATCH 3/5] . --- examples/sqlalchemy_soft_delete/README.md | 140 +++++++++++++++++++++- examples/sqlalchemy_soft_delete/run.py | 26 +++- 2 files changed, 162 insertions(+), 4 deletions(-) diff --git a/examples/sqlalchemy_soft_delete/README.md b/examples/sqlalchemy_soft_delete/README.md index 945c9b4..49f815c 100644 --- a/examples/sqlalchemy_soft_delete/README.md +++ b/examples/sqlalchemy_soft_delete/README.md @@ -1 +1,139 @@ -. \ No newline at end of file +# SQLAlchemy Soft Delete Codemod + +This codemod automatically adds soft delete conditions to SQLAlchemy join queries in your codebase. It ensures that joins only include non-deleted records by adding appropriate `deleted_at` checks. + +## Overview + +The codemod analyzes your codebase and automatically adds soft delete conditions to SQLAlchemy join methods (`join`, `outerjoin`, `innerjoin`) for specified models. This helps prevent accidentally including soft-deleted records in query results. + +## How It Works + +The codemod processes your codebase in several steps: + +1. **Join Detection** + ```python + def should_process_join_call(call, soft_delete_models, join_methods): + if str(call.name) not in join_methods: + return False + + call_args = list(call.args) + if not call_args: + return False + + model_name = str(call_args[0].value) + return model_name in soft_delete_models + ``` + - Scans for SQLAlchemy join method calls (`join`, `outerjoin`, `innerjoin`) + - Identifies joins involving soft-deletable models + - Analyzes existing join conditions + +2. **Condition Addition** + ```python + def add_deleted_at_check(file, call, model_name): + call_args = list(call.args) + deleted_at_check = f"{model_name}.deleted_at.is_(None)" + + if len(call_args) == 1: + call_args.append(deleted_at_check) + return + + second_arg = call_args[1].value + if isinstance(second_arg, FunctionCall) and second_arg.name == "and_": + second_arg.args.append(deleted_at_check) + else: + call_args[1].edit(f"and_({second_arg.source}, {deleted_at_check})") + ``` + - Adds `deleted_at.is_(None)` checks to qualifying joins + - Handles different join condition patterns: + - Simple joins with no conditions + - Joins with existing conditions (combines using `and_`) + - Preserves existing conditions while adding soft delete checks + +3. **Import Management** + ```python + def ensure_and_import(file): + if not any("and_" in imp.name for imp in file.imports): + file.add_import_from_import_string("from sqlalchemy import and_") + ``` + - Automatically adds required SQLAlchemy imports (`and_`) + - Prevents duplicate imports + +## Configuration + +### Soft Delete Models + +The codemod processes joins for the following models: +```python +soft_delete_models = { + "User", + "Update", + "Proposal", + "Comment", + "Project", + "Team", + "SavedSession" +} +``` + +### Join Methods + +The codemod handles these SQLAlchemy join methods: +```python +join_methods = {"join", "outerjoin", "innerjoin"} +``` + +## Code Transformations + +### Simple Join +```python +# Before +query.join(User) + +# After +from sqlalchemy import and_ +query.join(User, User.deleted_at.is_(None)) +``` + +### Join with Existing Condition +```python +# Before +query.join(User, User.id == Post.user_id) + +# After +from sqlalchemy import and_ +query.join(User, and_(User.id == Post.user_id, User.deleted_at.is_(None))) +``` + +## Graph Disable Mode + +This codemod includes support for running without the graph feature enabled. This is useful for the faster processing of large codebases and reduced memory usage. + +To run in no-graph mode: +```python +codebase = Codebase( + str(repo_path), + programming_language=ProgrammingLanguage.PYTHON, + config=CodebaseConfig( + feature_flags=GSFeatureFlags(disable_graph=True) + ) +) +``` + +## Running the Conversion + +```bash +# Install Codegen +pip install codegen + +# Run the conversion +python run.py +``` + +## Learn More + +- [SQLAlchemy Documentation](https://docs.sqlalchemy.org/en/20/) +- [Codegen Documentation](https://docs.codegen.com) + +## Contributing + +Feel free to submit issues and enhancement requests! diff --git a/examples/sqlalchemy_soft_delete/run.py b/examples/sqlalchemy_soft_delete/run.py index 88eda6a..022af9d 100644 --- a/examples/sqlalchemy_soft_delete/run.py +++ b/examples/sqlalchemy_soft_delete/run.py @@ -2,6 +2,9 @@ from codegen import Codebase from codegen.sdk.core.detached_symbols.function_call import FunctionCall from codegen.sdk.enums import ProgrammingLanguage +import shutil +import subprocess +from pathlib import Path def should_process_join_call(call, soft_delete_models, join_methods): @@ -52,6 +55,13 @@ def ensure_and_import(file): file.add_import_from_import_string("from sqlalchemy import and_") +def clone_repo(repo_url: str, repo_path: Path) -> None: + """Clone a git repository to the specified path.""" + if repo_path.exists(): + shutil.rmtree(repo_path) + subprocess.run(["git", "clone", repo_url, str(repo_path)], check=True) + + @codegen.function("sqlalchemy-soft-delete") def process_soft_deletes(codebase): """Process soft delete conditions for join methods in the codebase.""" @@ -75,11 +85,21 @@ def process_soft_deletes(codebase): print(f"Found join method for model {model_name} in file {file.filepath}") add_deleted_at_check(file, call, model_name) + codebase.commit() print("commit") print(codebase.get_diff()) if __name__ == "__main__": - codebase = Codebase.from_repo("hasgeek/funnel", programming_language=ProgrammingLanguage.PYTHON) - print(codebase.files) - process_soft_deletes(codebase) + from codegen.sdk.core.codebase import Codebase + from codegen.sdk.codebase.config import CodebaseConfig, GSFeatureFlags + + repo_path = Path("/tmp/core") + repo_url = "https://github.com/hasgeek/funnel.git" + + try: + clone_repo(repo_url, repo_path) + codebase = Codebase(str(repo_path), programming_language=ProgrammingLanguage.PYTHON, config=CodebaseConfig(feature_flags=GSFeatureFlags(disable_graph=True))) + process_soft_deletes(codebase) + finally: + shutil.rmtree(repo_path) From 261755c7dc819e463039f86184d3a18335073c55 Mon Sep 17 00:00:00 2001 From: Vishal Shenoy Date: Mon, 3 Feb 2025 12:26:49 -0800 Subject: [PATCH 4/5] . --- examples/sqlalchemy_soft_delete/README.md | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/examples/sqlalchemy_soft_delete/README.md b/examples/sqlalchemy_soft_delete/README.md index 49f815c..8117371 100644 --- a/examples/sqlalchemy_soft_delete/README.md +++ b/examples/sqlalchemy_soft_delete/README.md @@ -84,24 +84,38 @@ join_methods = {"join", "outerjoin", "innerjoin"} ## Code Transformations -### Simple Join +### Simple Join with Model Reference ```python # Before -query.join(User) +query.join(Project, Session.project) # After from sqlalchemy import and_ -query.join(User, User.deleted_at.is_(None)) +query.join(Project, and_(Session.project, Project.deleted_at.is_(None))) ``` -### Join with Existing Condition +### Join with Column Equality ```python # Before -query.join(User, User.id == Post.user_id) +query.join(Project, Session.project_id == Project.id) # After from sqlalchemy import and_ -query.join(User, and_(User.id == Post.user_id, User.deleted_at.is_(None))) +query.join(Project, and_(Session.project_id == Project.id, Project.deleted_at.is_(None))) +``` + +### Multiple Joins in Query Chain +```python +# Before +Session.query.join(Project, Session.project)\ + .join(Account, Project.account)\ + .outerjoin(Proposal, Session.proposal) + +# After +from sqlalchemy import and_ +Session.query.join(Project, and_(Session.project, Project.deleted_at.is_(None)))\ + .join(Account, Project.account)\ + .outerjoin(Proposal, and_(Session.proposal, Proposal.deleted_at.is_(None))) ``` ## Graph Disable Mode From 0945cb7aaa4610175161bf6fb958649ea69f76d2 Mon Sep 17 00:00:00 2001 From: vishalshenoy <34020235+vishalshenoy@users.noreply.github.com> Date: Mon, 3 Feb 2025 20:28:05 +0000 Subject: [PATCH 5/5] Automated pre-commit update --- examples/sqlalchemy_soft_delete/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/sqlalchemy_soft_delete/README.md b/examples/sqlalchemy_soft_delete/README.md index 8117371..cccc0dc 100644 --- a/examples/sqlalchemy_soft_delete/README.md +++ b/examples/sqlalchemy_soft_delete/README.md @@ -15,11 +15,11 @@ The codemod processes your codebase in several steps: def should_process_join_call(call, soft_delete_models, join_methods): if str(call.name) not in join_methods: return False - + call_args = list(call.args) if not call_args: return False - + model_name = str(call_args[0].value) return model_name in soft_delete_models ``` @@ -32,11 +32,11 @@ The codemod processes your codebase in several steps: def add_deleted_at_check(file, call, model_name): call_args = list(call.args) deleted_at_check = f"{model_name}.deleted_at.is_(None)" - + if len(call_args) == 1: call_args.append(deleted_at_check) return - + second_arg = call_args[1].value if isinstance(second_arg, FunctionCall) and second_arg.name == "and_": second_arg.args.append(deleted_at_check)