# Test SimpleCache File Operations

This notebook tests the file addition and deletion functionality of AutoCoderRAGAsyncUpdateQueue.

In [1]:
import os
import tempfile
import shutil
import time
from autocoder.rag.variable_holder import VariableHolder
from tokenizers import Tokenizer

VariableHolder.TOKENIZER_PATH = "/Users/allwefantasy/Downloads/tokenizer.json"
VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(VariableHolder.TOKENIZER_PATH)


from autocoder.rag.cache.simple_cache import AutoCoderRAGAsyncUpdateQueue


# Create a temporary directory for testing
test_dir = tempfile.mkdtemp()
print(f"Created test directory: {test_dir}")

Created test directory: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc


In [2]:
# Initialize the cache manager
cache_manager = AutoCoderRAGAsyncUpdateQueue(
    path=test_dir,
    ignore_spec=None,
    required_exts=[".txt", ".py"]
)

[32m2024-12-17 14:43:13.512[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mprocess_queue[0m:[36m134[0m - [1m/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt is detected to be removed[0m
[32m2024-12-17 14:44:14.746[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mprocess_queue[0m:[36m138[0m - [1m/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py is detected to be updated[0m
[32m2024-12-17 14:44:14.748[0m | [1mINFO    [0m | [36mautocoder.rag.utils[0m:[36mprocess_file_local[0m:[36m124[0m - [1mLoad file /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py in 0.0012199878692626953[0m


In [3]:
# Test file addition
def test_file_addition():
    # Create a test file
    test_file = os.path.join(test_dir, "test1.txt")
    with open(test_file, "w") as f:
        f.write("This is a test file")
    
    # Wait for the cache to update
    time.sleep(2)
    
    # Get cache and check if file is added
    cache = cache_manager.get_cache()
    print("\nCache after adding file:")
    for file_path, data in cache.items():
        print(f"File: {file_path}")
        print(f"Content: {data['content']}")

test_file_addition()

2024-12-17 14:43:06.436 | INFO     | autocoder.rag.utils:process_file_in_multi_process:66 - Load file /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt in 0.0004999637603759766
[32m2024-12-17 14:43:06.520[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m106[0m - [1m检查文件是否有更新.....[0m
[32m2024-12-17 14:43:06.522[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m119[0m - [1mfiles_to_process: [][0m
[32m2024-12-17 14:43:06.523[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m120[0m - [1mdeleted_files: set()[0m



Cache after adding file:
File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt
Content: [{'module_name': '##File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt', 'source_code': 'This is a test file', 'tag': '', 'tokens': 13, 'metadata': {}}]


In [6]:
# Test file deletion
def test_file_deletion():
    # Delete the test file
    test_file = os.path.join(test_dir, "test1.txt")
    os.remove(test_file)
    # trigger update
    cache = cache_manager.get_cache()
    
    # Wait for the cache to update
    time.sleep(4)
    
    # Get cache and verify file is removed
    cache = cache_manager.get_cache()
    print("\nCache after deleting file:")
    print(f"Files in cache: {list(cache.keys())}")

test_file_deletion()

FileNotFoundError: [Errno 2] No such file or directory: '/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt'

In [7]:
# Test file update
def test_file_update():
    # Create and update a test file
    test_file = os.path.join(test_dir, "test2.py")
    
    # Create initial file
    with open(test_file, "w") as f:
        f.write("def hello():\n    print('Hello')")
    
    # Wait for initial cache update
    time.sleep(2)
    print("\nCache after initial file creation:")
    cache = cache_manager.get_cache()
    for file_path, data in cache.items():
        print(f"File: {file_path}")
        print(f"Content: {data['content']}")
    
    # Update the file
    time.sleep(1)  # Ensure modification time is different
    with open(test_file, "w") as f:
        f.write("def hello():\n    print('Hello World')")
    
    # Wait for cache update
    time.sleep(2)
    print("\nCache after file update:")
    cache = cache_manager.get_cache()
    for file_path, data in cache.items():
        print(f"File: {file_path}")
        print(f"Content: {data['content']}")

test_file_update()


Cache after initial file creation:


2024-12-17 14:44:11.507 | INFO     | autocoder.rag.utils:process_file_in_multi_process:66 - Load file /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py in 0.0006647109985351562
[32m2024-12-17 14:44:11.574[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m106[0m - [1m检查文件是否有更新.....[0m
[32m2024-12-17 14:44:11.576[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m119[0m - [1mfiles_to_process: [][0m
[32m2024-12-17 14:44:11.577[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m120[0m - [1mdeleted_files: set()[0m


File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py
Content: [{'module_name': '##File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py', 'source_code': "def hello():\n    print('Hello')", 'tag': '', 'tokens': 17, 'metadata': {}}]


[32m2024-12-17 14:44:14.586[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m106[0m - [1m检查文件是否有更新.....[0m
[32m2024-12-17 14:44:14.588[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m119[0m - [1mfiles_to_process: [('/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py', 'test2.py', 1734417852.5824633, 'f2bb1a30c5b271b44c14ce8d7456ac0a')][0m
[32m2024-12-17 14:44:14.589[0m | [1mINFO    [0m | [36mautocoder.rag.cache.simple_cache[0m:[36mtrigger_update[0m:[36m120[0m - [1mdeleted_files: set()[0m



Cache after file update:
File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py
Content: [{'module_name': '##File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py', 'source_code': "def hello():\n    print('Hello')", 'tag': '', 'tokens': 17, 'metadata': {}}]


In [7]:
# Cleanup
def cleanup():
    cache_manager.stop()
    shutil.rmtree(test_dir)
    print(f"\nCleaned up test directory: {test_dir}")

cleanup()


Cleaned up test directory: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmps40ibmpk
