From 8c6fb29b39464e4f04958c34e4aa592ba8f200d6 Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Thu, 8 Aug 2024 14:49:55 +0200 Subject: [PATCH 1/2] erase outputs of generated notebooks --- src/git_bob/_ai_github_utilities.py | 16 ++++++++-------- src/git_bob/_utilities.py | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/git_bob/_ai_github_utilities.py b/src/git_bob/_ai_github_utilities.py index 95f37474..d223cc32 100644 --- a/src/git_bob/_ai_github_utilities.py +++ b/src/git_bob/_ai_github_utilities.py @@ -174,10 +174,12 @@ def create_or_modify_file(repository, issue, filename, branch_name, issue_summar The name of the branch to create or modify the file in. issue_summary : str The summary of the issue to solve. + prompt_function : function + The function to generate the file modification content. """ Log().log(f"-> create_or_modify_file({repository}, {issue}, {filename}, {branch_name})") from ._github_utilities import get_repository_file_contents, write_file_in_new_branch, create_branch, check_if_file_exists, get_file_in_repository - from ._utilities import remove_outer_markdown, split_content_and_summary + from ._utilities import remove_outer_markdown, split_content_and_summary, erase_outputs_of_code_cells original_ipynb_file_content = None @@ -187,12 +189,7 @@ def create_or_modify_file(repository, issue, filename, branch_name, issue_summar if filename.endswith('.ipynb'): print("Removing outputs from ipynb file") original_ipynb_file_content = file_content - notebook = json.loads(file_content) - for cell in notebook['cells']: - if cell['cell_type'] == 'code': - cell['outputs'] = [] - cell['execution_count'] = None - file_content = json.dumps(notebook, indent=1) + file_content = erase_outputs_of_code_cells(file_content) file_content_instruction = f""" Modify the file "{filename}" to solve the issue #{issue}. Keep your modifications absolutely minimal. @@ -253,8 +250,11 @@ def create_or_modify_file(repository, issue, filename, branch_name, issue_summar else: # if code is different, any future results may be different, too print("codes no longer match") break - new_content = json.dumps(new_notebook, indent=1) + + elif filename.endswith('.ipynb'): + print("Erasing outputs in generated ipynb file") + new_content = erase_outputs_of_code_cells(new_content) print("New file content", new_content) print("Summary", commit_message) diff --git a/src/git_bob/_utilities.py b/src/git_bob/_utilities.py index 6d509e5e..1dfdecd1 100644 --- a/src/git_bob/_utilities.py +++ b/src/git_bob/_utilities.py @@ -118,3 +118,22 @@ def split_content_and_summary(text): new_content = remove_outer_markdown("\n".join(remaining_content)) return new_content.strip(), summary.strip() + + +def erase_outputs_of_code_cells(file_content): + """ + Erase outputs of code cells in a Jupyter notebook. + + Parameters + ---------- + notebook : str + The notebook content as a string. + """ + import json + notebook = json.loads(file_content) + for cell in notebook.get('cells', []): + if cell.get('cell_type') == 'code': + cell['outputs'] = [] + cell['execution_count'] = None + file_content = json.dumps(notebook, indent=1) + return file_content From 7096c3550741c57d497ed134ae1132a2c338d577 Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Thu, 8 Aug 2024 15:00:16 +0200 Subject: [PATCH 2/2] added test --- tests/test_utilities.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_utilities.py b/tests/test_utilities.py index e475df4c..8a5aeb96 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -20,3 +20,27 @@ def test_split_content_and_summary(): assert content.strip() == "blabla" assert summary == "summary" + + +def test_create_or_modify_file_ipynb(): + from git_bob._utilities import erase_outputs_of_code_cells + import json + + # Mock notebook content + notebook_content = { + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "outputs": [{"output_type": "stream", "text": "Hello, World!"}] + } + ] + } + + file_content = json.dumps(notebook_content) + modified_content = erase_outputs_of_code_cells(file_content) + + # Check if output is removed and execution_count is None + modified_notebook = json.loads(modified_content) + assert modified_notebook["cells"][0]["outputs"] == [] + assert modified_notebook["cells"][0]["execution_count"] is None