In [2]:
import csv
import os
import sys
import concurrent.futures
from tqdm import tqdm  # Importing the tqdm library for the progress bar

# Add the src directory to the Python path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))
# Now you can import llm_interface from llm_backend
from llm_backend import llm_interface

def is_default_or_suggest_better_name(file_type, element_type, element_name, model="llama3:instruct"):
    """
    Sends a code element to the LLM to determine if it's a default name
    and get a suggested better name if needed.
    """
    prompt = f"""
You are a code quality expert analyzing a {element_type} named '{element_name}' from a {file_type} file.
First, determine if this name is likely a default, temporary, or non-descriptive identifier (like a single letter,
abbreviated name, or auto-generated name).
Then, if it appears to be a default or non-descriptive name, suggest a better snake_case name that would improve
code readability based on common naming conventions for {file_type} files.
Return ONLY a single line in this exact format:
DEFAULT: [Yes/No], SUGGESTION: [suggested_snake_case_name or 'None if good']
Examples:
- For a function named 'a' -> DEFAULT: Yes, SUGGESTION: a
- For a variable named 'tmp' -> DEFAULT: Yes, SUGGESTION: tmp
- For a function named 'calculateTotal' -> DEFAULT: No, SUGGESTION: calculate_total
- For a well-named variable 'user_count' -> DEFAULT: No, SUGGESTION: user_count
    """
    # Call the LLM
    response = llm_interface(
        prompt=prompt,
        model=model,
        temperature=0.3,  # Lower temperature for more deterministic responses
        top_p=0.9,
        max_tokens=100
    )
    # Parse the response
    try:
        is_default = "Yes" in response.split("DEFAULT:")[1].split(",")[0]
        suggestion = response.split("SUGGESTION:")[1].strip()
        if suggestion.lower() == "none" or suggestion.lower() == "none if good":
            suggestion = ""
    except:
        # If parsing fails, assume it's not a default and provide no suggestion
        is_default = False
        suggestion = ""
    return is_default, suggestion


def process_code_elements_file(input_file, output_file, model="llama3:instruct"):
    """
    Process a file containing code elements and create an output file with suggestions.
    """
    with open(input_file, 'r') as infile:
        reader = csv.reader(infile)
        header = next(reader)  # Read the header

        # Create output file with extended header
        with open(output_file, 'w', newline='') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(header + ["Is Default", "Suggested Name"])

            # Use ThreadPoolExecutor for parallelizing the LLM calls
            with concurrent.futures.ThreadPoolExecutor() as executor:
                # Wrapping the reader with tqdm to show progress
                total_rows = sum(1 for row in reader)  # Count the total number of rows (excluding header)
                infile.seek(0)  # Go back to the start of the file to iterate over rows again
                next(reader)  # Skip the header again

                futures = {
                    executor.submit(is_default_or_suggest_better_name, row[0], row[1], row[2], model): row
                    for row in reader
                }

                # Show progress bar and process results as they complete
                for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Processing Code Elements", unit="rows"):
                    row = futures[future]
                    is_default, suggestion = future.result()
                    writer.writerow(row + [is_default, suggestion])
                    # Optionally, print a message for each processed element
                    # print(f"Processed: {row[1]} {row[2]} - Default: {is_default}, Suggestion: {suggestion}")

# For Jupyter notebook usage


def analyze_code_elements(input_file_path, output_file_path=None, model="llama3:instruct"):
    """
    Function to call from Jupyter notebook to analyze code elements
    """
    if output_file_path is None:
        output_file_path = input_file_path.replace('.csv', '_analyzed.csv')

    process_code_elements_file(input_file_path, output_file_path, model)
    print(f"Analysis complete. Results saved to {output_file_path}")


# If running as a script
if __name__ == "__main__":
    # Only use sys.argv if not in a Jupyter environment
    if not 'ipykernel' in sys.modules:
        if len(sys.argv) < 2:
            print("Usage: python script.py <input_file> [output_file] [model]")
            sys.exit(1)

        input_file = sys.argv[1]
        output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace('.csv', '_analyzed.csv')
        model = sys.argv[3] if len(sys.argv) > 3 else "llama3:instruct"

        process_code_elements_file(input_file, output_file, model)
        print(f"Analysis complete. Results saved to {output_file}")

In [4]:
# Example usage in Jupyter notebook
input_file = "code_elements_20250226_131211_js.csv"
analyze_code_elements(input_file)

Processing Code Elements: 100%|██████████| 3278/3278 [02:22<00:00, 22.93rows/s]

Analysis complete. Results saved to code_elements_20250226_131211_js_analyzed.csv



