From c627ece5c90886af072010f22974e639bbf3cb62 Mon Sep 17 00:00:00 2001 From: Vishal Shenoy Date: Wed, 12 Mar 2025 10:44:37 -0700 Subject: [PATCH 1/3] attributions --- .../examples/symbol-attributions/README.md | 93 +++++++++++++++++++ .../run.py} | 0 2 files changed, 93 insertions(+) create mode 100644 codegen-examples/examples/symbol-attributions/README.md rename codegen-examples/examples/{attributions/symbol_attribution.py => symbol-attributions/run.py} (100%) diff --git a/codegen-examples/examples/symbol-attributions/README.md b/codegen-examples/examples/symbol-attributions/README.md new file mode 100644 index 000000000..768bcc0fe --- /dev/null +++ b/codegen-examples/examples/symbol-attributions/README.md @@ -0,0 +1,93 @@ +# Symbol Attributions + +This example demonstrates how to analyze and track attribution information for symbols in a codebase, including identifying AI vs human contributions and tracking edit history. + +## What it does + +This script performs several key functions: + +1. **Codebase Analysis** + - Loads and parses all Python files in the repository + - Builds a dependency graph of symbols (classes, functions, etc.) + - Analyzes import relationships and dependencies + + ```python + repo_path = os.getcwd() + repo_config = RepoConfig.from_repo_path(repo_path) + repo_operator = RepoOperator(repo_config=repo_config) + + project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON) + codebase = Codebase(projects=[project]) + ``` + +2. **AI Impact Analysis** + - Identifies commits made by AI bots vs human contributors + - Calculates statistics on AI contributions: + - Percentage of AI commits + - Files with significant AI contribution + - Number of AI-touched symbols + - Identifies high-impact AI-written code + + ```python + ai_authors = ["devin[bot]", "codegen[bot]", "github-actions[bot]"] + add_attribution_to_symbols(codebase, ai_authors) + ``` + +3. **Symbol Attribution** + - Tracks edit history for each symbol in the codebase + - Records: + - Last editor of each symbol + - Complete editor history + - Whether the symbol was AI-authored + - Provides detailed attribution for most-used symbols + + ```python + symbols_with_usages = [] + for symbol in codebase.symbols: + if hasattr(symbol, "usages") and len(symbol.usages) > 0: + symbols_with_usages.append((symbol, len(symbol.usages))) + ``` + +## Example Output + +The script provides detailed analytics including: + +- Repository statistics (files, symbols, contributors) +- AI contribution summary (% of commits, impacted files) +- Top contributors list +- Detailed attribution for most-used symbols, showing: + - Symbol name and type + - File location + - Usage count + - Last editor + - Editor history + - AI authorship status + +## Usage + +Run the script in your repository: + +```bash +python run.py +``` + +The script will automatically: +- Use the current directory if it's a git repository +- Fall back to a sample repository if not in a git repo +- Generate comprehensive attribution analysis +- Save detailed results to `ai_impact_analysis.json` + +## Requirements + +- A Git repository +- Python codebase +- `codegen` installed + +## Learn More + +- [Codegen Symbols](https://docs.codegen.com/api-reference/core/Symbol#symbol) +- [Codegen Documentation](https://docs.codegen.com) + +## Contributing + +Feel free to submit issues and enhancement requests! \ No newline at end of file diff --git a/codegen-examples/examples/attributions/symbol_attribution.py b/codegen-examples/examples/symbol-attributions/run.py similarity index 100% rename from codegen-examples/examples/attributions/symbol_attribution.py rename to codegen-examples/examples/symbol-attributions/run.py From 4d731488712b164735fbe3a38d6d2fd134331bb8 Mon Sep 17 00:00:00 2001 From: vishalshenoy <34020235+vishalshenoy@users.noreply.github.com> Date: Wed, 12 Mar 2025 17:45:57 +0000 Subject: [PATCH 2/3] Automated pre-commit update --- .../examples/symbol-attributions/README.md | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/codegen-examples/examples/symbol-attributions/README.md b/codegen-examples/examples/symbol-attributions/README.md index 768bcc0fe..13a6fc68d 100644 --- a/codegen-examples/examples/symbol-attributions/README.md +++ b/codegen-examples/examples/symbol-attributions/README.md @@ -11,14 +11,14 @@ This script performs several key functions: - Builds a dependency graph of symbols (classes, functions, etc.) - Analyzes import relationships and dependencies - ```python - repo_path = os.getcwd() - repo_config = RepoConfig.from_repo_path(repo_path) - repo_operator = RepoOperator(repo_config=repo_config) +```python + repo_path = os.getcwd() + repo_config = RepoConfig.from_repo_path(repo_path) + repo_operator = RepoOperator(repo_config=repo_config) - project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON) - codebase = Codebase(projects=[project]) - ``` + project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON) + codebase = Codebase(projects=[project]) +``` 2. **AI Impact Analysis** - Identifies commits made by AI bots vs human contributors @@ -28,10 +28,10 @@ This script performs several key functions: - Number of AI-touched symbols - Identifies high-impact AI-written code - ```python - ai_authors = ["devin[bot]", "codegen[bot]", "github-actions[bot]"] - add_attribution_to_symbols(codebase, ai_authors) - ``` +```python + ai_authors = ["devin[bot]", "codegen[bot]", "github-actions[bot]"] + add_attribution_to_symbols(codebase, ai_authors) +``` 3. **Symbol Attribution** - Tracks edit history for each symbol in the codebase @@ -41,12 +41,12 @@ This script performs several key functions: - Whether the symbol was AI-authored - Provides detailed attribution for most-used symbols - ```python - symbols_with_usages = [] - for symbol in codebase.symbols: - if hasattr(symbol, "usages") and len(symbol.usages) > 0: - symbols_with_usages.append((symbol, len(symbol.usages))) - ``` +```python + symbols_with_usages = [] + for symbol in codebase.symbols: + if hasattr(symbol, "usages") and len(symbol.usages) > 0: + symbols_with_usages.append((symbol, len(symbol.usages))) +``` ## Example Output @@ -72,6 +72,7 @@ python run.py ``` The script will automatically: + - Use the current directory if it's a git repository - Fall back to a sample repository if not in a git repo - Generate comprehensive attribution analysis @@ -90,4 +91,4 @@ The script will automatically: ## Contributing -Feel free to submit issues and enhancement requests! \ No newline at end of file +Feel free to submit issues and enhancement requests! From 65dfcb75cbbf058e2f983592075a49eb67b04d69 Mon Sep 17 00:00:00 2001 From: Vishal Shenoy Date: Wed, 12 Mar 2025 15:01:42 -0700 Subject: [PATCH 3/3] update readme --- codegen-examples/examples/symbol-attributions/README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/codegen-examples/examples/symbol-attributions/README.md b/codegen-examples/examples/symbol-attributions/README.md index 13a6fc68d..54c56b432 100644 --- a/codegen-examples/examples/symbol-attributions/README.md +++ b/codegen-examples/examples/symbol-attributions/README.md @@ -12,12 +12,10 @@ This script performs several key functions: - Analyzes import relationships and dependencies ```python - repo_path = os.getcwd() - repo_config = RepoConfig.from_repo_path(repo_path) - repo_operator = RepoOperator(repo_config=repo_config) + from codegen import Codebase - project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage.PYTHON) - codebase = Codebase(projects=[project]) + # Initialize codebase object from directory + codebase = Codebase.from_repo("your-org/your-repo", language="python") ``` 2. **AI Impact Analysis**