In [None]:
import os
import zipfile
import subprocess
import tempfile
import shutil
import gradio as gr
from dotenv import load_dotenv
from openai import OpenAI
import json
import pandas as pd
import logging
import re
import tiktoken 
import time
import uuid


from datetime import datetime

from concurrent.futures import ThreadPoolExecutor, as_completed
from context.RepositoryContext import RepositoryContext


logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
print("Starting script execution...", flush=True)  # Ensure this prints

# Load API key and instantiate OpenAI client
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OpenAI API key not found. Ensure it is set in the .env file.")

client = OpenAI(api_key=api_key)

##############################
# LLM helper functions
##############################

def call_llm(messages, model="gpt-3.5-turbo", context: RepositoryContext = None):
    """
    Call the LLM model with given messages and optional context.
    
    Args:
        messages (list): List of message dictionaries with 'role' and 'content' keys
        model (str, optional): The LLM model to use. Defaults to "gpt-3.5-turbo"
        context (RepositoryContext, optional): Repository context object to store chat history. Defaults to None
        
    Returns:
        str: The LLM response text
    """
    logger.info("Sending LLM request")
    
    # Combine chat history with new messages if context exists
    if context and context.context.get("chat_history"):
        # Get the new user message if it exists
        new_message = next((msg for msg in messages if msg['role'] == 'user'), None)
        print("The new message is: ", new_message)
        # Generate messages using context, including chat history and new message
        messages = context.generate_messages(message=new_message)
    
    print("The messages are: ", messages)
    
    # Send request to LLM
    response = client.chat.completions.create(
        model=model,
        messages=messages
    )
    output = response.choices[0].message.content.strip()
    
    # Log the request and response
    if context:
        context.add_chat_message('assistant', output)
    
    logger.info("Getting the LLM response")
    print("The output is: ", output)
    
    return output

def prefilter_text(text):
    """
    Filtering of command-like lines. use it with caution: possibly failed to caputure uncommon commands
    - This method hasn't been used, but for the cost consideration of using LLM, just put it here in case we need it in the future
    
      - Merges multi-line commands that use a backslash (\) for line continuation.
      - Detects heredoc blocks with any delimiter.
      - Removes simple inline comments (anything after an unquoted #).
      - Captures inline variable assignments (allowing spaces around '=' and multiple assignments).
      - Recognizes subshell execution using both $(...) and backticks.
      - Detects alias definitions and function definitions (using both "function" and the shorthand name() {).
      - Captures control structures (if/elif/else/fi, for/while/until/do/done, case/esac) as blocks.
      - Captures command groups using braces { ... } or parentheses ( ... ).
      - Includes commands with pipes, logical operators, and redirections.
    """
    # First, merge lines that end with a backslash
    merged_lines = []
    buffer = ""
    for line in text.splitlines():
        # Remove trailing whitespace but keep indentation (for heredoc or block formatting)
        stripped = line.rstrip()
        if stripped.endswith("\\"):
            buffer += stripped[:-1] + " "
        else:
            buffer += stripped
            merged_lines.append(buffer)
            buffer = ""
    if buffer:
        merged_lines.append(buffer)

    # Remove simple inline comments (this is simplistic and may remove '#' in strings)
    def remove_inline_comments(line):
        # This naive approach splits on '#' if it is preceded by whitespace
        # For a robust solution, a proper shell parser would be needed.
        return re.split(r'\s+#', line, maxsplit=1)[0].strip()

    merged_lines = [remove_inline_comments(line) for line in merged_lines if line.strip()]

    filtered_lines = []
    heredoc_buffer = []
    in_heredoc = False
    heredoc_delimiter = None

    multi_line_block = []
    in_block = False
    # End tokens for control blocks including if/for/while/until/case
    block_end_tokens = {"fi", "done", "esac"}

    # Regex to capture inline variable assignments allowing extra spaces and multiple assignments.
    inline_assignment_pattern = re.compile(r"^(?:\w+\s*=\s*\S+\s+)+\S+")
    # Regex for function definitions: either function keyword or the shorthand pattern.
    function_pattern = re.compile(r"^(?:function\s+\w+\s*\{|[\w\-_]+\s*\(\)\s*\{)")
    # Regex for detecting command groups with braces or parentheses at the start.
    group_pattern = re.compile(r"^[\{\(].+[\}\)]\s*$")
    # Keywords for control structures that begin blocks (adding "until")
    block_start_keywords = re.compile(r"^(if|for|while|until|case|elif|else)\b")

    # Common execution keywords (a wide set of common commands and builtins)
    execution_keywords = (
        "python", "./", "bash", "sh ", "make", "npm", "yarn", "pip",
        "git", "docker", "gcc", "java", "go ", "node", "cargo", "ruby", "perl",
        "mvn", "gradle", "rustc", "flutter", "dotnet", "kubectl", "helm", "conda",
        "eval", "exec", "nohup", "trap", "xargs", "alias"
    )

    # Process each merged line.
    for line in merged_lines:
        line = line.strip()

        # Heredoc handling: detect start, then capture until a line exactly equals the delimiter.
        heredoc_match = re.search(r"<<\s*(\S+)", line)
        if heredoc_match and not in_heredoc:
            in_heredoc = True
            heredoc_delimiter = heredoc_match.group(1)
            heredoc_buffer.append(line)
            continue
        if in_heredoc:
            heredoc_buffer.append(line)
            # End the heredoc if the line (after stripping) equals the delimiter.
            if line.strip() == heredoc_delimiter:
                filtered_lines.extend(heredoc_buffer)
                heredoc_buffer = []
                in_heredoc = False
                heredoc_delimiter = None
            continue

        # Skip lines that are empty after comment removal
        if not line:
            continue

        # Handle multi-line control structures as blocks.
        if block_start_keywords.match(line) or line.endswith("do") or line.endswith("then"):
            in_block = True
            multi_line_block.append(line)
            continue
        if in_block:
            multi_line_block.append(line)
            # If the line exactly matches an end token, end the block.
            if line in block_end_tokens:
                filtered_lines.extend(multi_line_block)
                multi_line_block = []
                in_block = False
            continue

        # Capture inline variable assignments (even multiple assignments)
        if inline_assignment_pattern.match(line):
            filtered_lines.append(line)
            continue

        # Capture subshell executions: both $() and backticks.
        if "$(" in line or "`" in line:
            filtered_lines.append(line)
            continue

        # Capture alias definitions and function definitions.
        if line.startswith("alias ") or function_pattern.match(line):
            filtered_lines.append(line)
            continue

        # Capture command groups enclosed in braces or parentheses.
        if group_pattern.match(line):
            filtered_lines.append(line)
            continue

        # Capture lines that include any execution keywords.
        if any(tok in line for tok in execution_keywords):
            filtered_lines.append(line)
            continue

        # Capture general command lines that start with a word (including common commands like ls, pwd, etc.)
        if re.match(r"^[a-zA-Z0-9\-_]+\s", line):
            filtered_lines.append(line)
            continue

        # Capture lines with pipes, logical operators, or redirection operators.
        if any(op in line for op in ("|", "&&", ";", ">", ">>", "<")):
            filtered_lines.append(line)
            continue

    return "\n".join(filtered_lines)


def split_readme_into_chunks(README, max_token_limit=16000):
    """
    Splits the README into chunks while ensuring that entire lines (commands or text) remain intact.
    - group full lines into chunks without exceeding max_token_limit.
    - Ensures multi-line commands using '\' are merged before splitting.
    - Avoids breaking a line into separate chunks.
    """
    enc = tiktoken.get_encoding("cl100k_base") 

    # Merge multi-line commands (backslash `\` continuation)
    merged_lines = []
    buffer = ""

    for line in README.splitlines():
        stripped = line.rstrip()
        if stripped.endswith("\\"):  # If the line ends with '\', merge it
            buffer += stripped[:-1] + " "  # Remove '\' and add space
        else:
            buffer += stripped
            merged_lines.append(buffer)
            buffer = ""

    if buffer:
        merged_lines.append(buffer)


    chunks = []
    current_chunk = []
    current_token_count = 0

    for line in merged_lines:
        token_count = len(enc.encode(line)) 

        if current_token_count + token_count > max_token_limit:
            # If adding this line exceeds the limit, finalize the current chunk
            chunks.append("\n".join(current_chunk))
            current_chunk = [line]  # Start a new chunk with this line
            current_token_count = token_count
        else:
            current_chunk.append(line)
            current_token_count += token_count

    if current_chunk:
        chunks.append("\n".join(current_chunk))

    return chunks

##############################
# Existing Functions (File-based)
##############################

def get_command_lines_from_readme(README, context: RepositoryContext = None):
    """
    Extracts command-line instructions from large README text by:
    - Splitting into chunks if necessary
    - Sending each chunk to the LLM separately
    - Concatenating the extracted commands
    """
    logger.info("Extracting command lines from provided README.")
    if not README:
        logger.warning("No README provided for command extraction.")
        return "No README provided."

    chunks = split_readme_into_chunks(README, max_token_limit=16300)
    extracted_commands = []

    for idx, chunk in enumerate(chunks):
        logger.info(f"Processing chunk {idx + 1}/{len(chunks)}")

        prompt = f"""Extract only the valid, executable shell commands from the following README text. Follow these rules strictly:
            1. Return one valid shell command per line, without any additional commentary.
            2. Do not include any markdown formatting such as triple backticks, asterisks, or hyphens used for lists.
            3. Remove any extraneous characters, inline explanations, or documentation text.
            4. Preserve multi-line commands (using '\' for line continuation) and command sequences (with operators like && or ;).
            5. Output only commands that can be directly executed in a Unix-like shell.
            6. If a line does not represent a valid shell command (e.g., a link, descriptive text, or a markdown heading), skip it.

            For example, if the README contains:
                - `npm i -g @saleor/cli`
                - Some text: For installation, run npm i -g @saleor/cli`
            You should output:
            npm i -g @saleor/cli
            Now, extract the commands from the following text: {chunk}"""


        messages = [
            {"role": "user", "content": prompt}
        ]

        try:
            extracted_chunk = call_llm(messages, context=context)
            cleaned_chunk = extracted_chunk.replace("```bash", "").replace("```", "").strip()
            extracted_commands.append(cleaned_chunk)
        except Exception as e:
            logger.error(f"Error processing chunk {idx + 1}: {str(e)}")
            continue

    final_commands = "\n".join(extracted_commands).strip()
    logger.info("Completed extraction of command lines from provided text.")
    logger.debug(f"Extracted commands: {final_commands[:200]}{'...' if len(final_commands) > 200 else ''}")

    return final_commands

def run_ls(working_dir):
    """
    Runs 'ls -la' in the given working directory and returns the stdout output.
    """
    try:
        result = subprocess.run("ls -la", shell=True, capture_output=True, text=True, cwd=working_dir)
        return result.stdout
    except Exception as e:
        return f"Error running ls: {str(e)}"

def get_executable_command_from_ls(ls_output):
    """
    Uses ChatGPT to analyze the output of 'ls -la' and return the command(s) needed
    to run the primary executable file that can verify whether dependencies have been installed properly.
    """
    prompt = (
        "You are given the output of an 'ls -la' command executed in the root of a cloned repository:\n"
        f"{ls_output}\n\n"
        "Based on this listing, please provide the command-line instruction(s) that would run the primary executable "
        "or self-test of the application, which can help verify that all dependencies are properly installed. "
        "Only output the command(s) without any additional explanation or markdown formatting."
    )
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    command_output = response.choices[0].message.content.strip()
    # Remove any markdown formatting if present
    command_output = command_output.replace("```bash", "").replace("```", "").strip()
    return command_output

def run_executable(executable_command, working_dir):
    """
    Runs the given executable command in the specified working directory.
    Returns a generator that streams the execution log.
    """
    log_history = ""
    if not executable_command:
        log_history += "❌ No executable command provided.\n"
        yield log_history
        return

    log_history += f"Running executable command: {executable_command}\n"
    yield log_history

    try:
        process = subprocess.Popen(
            executable_command,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            cwd=working_dir
        )

        # Stream stdout line-by-line
        for line in iter(process.stdout.readline, ''):
            if not line:
                break
            log_history += line
            yield log_history

        stderr_output = process.stderr.read()
        if stderr_output:
            log_history += stderr_output
            log_history += "\n❌ Error while running the executable command.\n"
            yield log_history
            return

        exit_code = process.wait()
        if exit_code == 0:
            log_history += "\n✅ Executable command ran successfully.\n"
        else:
            log_history += f"\n❌ Executable command failed with exit code {exit_code}.\n"
        yield log_history

    except Exception as e:
        log_history += f"\n❌ Exception while running executable command: {str(e)}\n"
        yield log_history



##############################
# New Functions for GitHub URL-based Execution
##############################

def clone_repo(github_url):
    """
    Clones the GitHub repository into a directory under /home/ec2-user.
    Returns the path to the cloned repository.
    """
    base_dir = "/home/ec2-user/repo_temp/repos"
    os.makedirs(base_dir, exist_ok=True)
    
    repo_name = github_url.split('/')[-1]
    unique_id = f"{int(time.time())}_{uuid.uuid4().hex[:8]}"
    repo_dir = os.path.join(base_dir, f"{repo_name}_{unique_id}")
    
    cmd = f"git clone {github_url} {repo_dir}"
    result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    
    if result.returncode != 0:
        if os.path.exists(repo_dir):
            shutil.rmtree(repo_dir)
        raise Exception(f"Failed to clone repository: {result.stderr}")
    
    return repo_dir

def get_command_lines_from_text(text):
    """
    Uses GPT to extract command-line instructions from a given text.
    """
    if not text:
        return "No text provided."
    prompt = (
        "You are given the contents of a README file below. "
        "Please extract and print only the command-line instructions. "
        "Ignore all other text. Remove triple backticks, etc.\n\n"
        f"{text}"
    )
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    extracted_commands = response.choices[0].message.content
    cleaned_commands = extracted_commands.replace("```bash", "").replace("```", "").strip()
    return cleaned_commands

def execute_and_analyze_command(command, repo_dir, context: RepositoryContext):
    """
    Execute a single command and let GPT analyze the output.
    """
    log_output = ""
    try:
        
        process = subprocess.Popen(
            command, 
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            cwd=repo_dir
        )
        stdout, stderr = process.communicate(timeout=60)
        log_output = stdout + stderr
        print("The log output is: ", log_output)
        if process.returncode != 0:
            print(f"Command failed with return code: {process.returncode}")
            
        # Update system message in context
        system_prompt = """You are a helpful assistant that analyzes command outputs and suggests next steps.
            When analyzing command failures:
            1. For directory operations (cd, ls, etc):
            - First try to list directory contents
            - Then suggest creating directory if needed
            2. For port conflicts:
            - Suggest using a different port
            - Or provide command to kill existing process
            3. Mark as critical_failure only if:
            - Required files are missing and can't be created
            - Dependencies can't be installed
            - System resources are unavailable
            4. Mark dependency_setup as true ONLY when ALL of these are completed:
            - Virtual environment is created AND activated (if needed)
            - ALL required packages are installed (pip, conda, npm, etc.)
            - ALL configuration files are in place
            - No remaining dependency-related commands in the instruction list
            
            IMPORTANT: dependency_setup must be false if:
            - There are any remaining package installation commands (pip, conda, etc.)
            - Any installation command failed
            - Not all commands in the original command list have been executed
            - The final verification command hasn't been run successfully
            """
        context.update_system_message(system_prompt)
        
        # Create user message
        user_message = {
            "role": "user",
            "content": f"""
                Analyze this command output and suggest the next command to run:
                Command executed: {command}
                Output:
                {log_output}

                Respond in this JSON format:
                {{
                    "success": true/false,
                    "critical_failure": true/false,
                    "dependency_setup": true/false,
                    "analysis": "brief analysis of what happened",
                    "next_command": "executable shell command or null if no further action needed",
                    "alternative_command": "executable shell command or null if no alternative needed"
                }}

                Remember: 
                1. For next_command and alternative_command, only provide actual executable shell commands, not descriptions.
                2. Set dependency_setup to true only when all dependencies are properly installed and environment is ready.

                Example of good responses:
                {{
                    "success": true,
                    "critical_failure": false,
                    "dependency_setup": true,
                    "analysis": "Successfully installed all required packages",
                    "next_command": null,
                    "alternative_command": null
                }}
                {{
                    "success": false,
                    "critical_failure": false,
                    "dependency_setup": false,
                    "analysis": "The cd command failed because directory doesn't exist",
                    "next_command": null,
                    "alternative_command": "ls -la"
                }}
            """
        }
        
        # Call LLM with the user message
        response = call_llm([user_message], context=context)
        analysis = json.loads(response)
        
        return (
            analysis["success"], 
            log_output, 
            analysis.get("next_command"),
            analysis.get("critical_failure", False),
            analysis.get("alternative_command"),
            analysis.get("dependency_setup", False)  # 添加新的返回值
        )
        
    except Exception as e:
        return (False, f"Error executing command: {str(e)}\n{log_output}", None, True, None, False)

def run_from_github(github_url, README):
    """
    Clones a GitHub repository, reads the README file, extracts command-line instructions,
    executes them live, and returns the full log.
    """
    log_history = ""  # Initialize log accumulator
    repo_dir = None 

    # Set up context
    context = RepositoryContext(github_url)
    
    # Step 1: Clone the repository
    try:
        log_history += f"Cloning repository: {github_url}\n"
        yield log_history  # Update UI
        repo_dir = clone_repo(github_url)
        log_history += f"Repository cloned to {repo_dir}\n"
        yield log_history
   
        # Step 2: Extract command-line instructions using GPT
        log_history += "Extracting command lines from README...\n"
        yield log_history
        commands = get_command_lines_from_readme(README, context=context)
        log_history += f"22222Extracted commands:\n{commands}\n"
        
        # Add chat history after command extraction
        log_history += "\n=== LLM Chat History ===\n"
        for msg in context.context['chat_history']:
            log_history += f"{msg['role'].upper()}: {msg['content']}\n"
        log_history += "=== End Chat History ===\n"
        yield log_history

        # Execute commands in a loop with GPT analysis
        command_list = commands.strip().split('\n')
        current_command = 0
        print("The command list is: ", command_list)
        
        while current_command < len(command_list):
            cmd = command_list[current_command].strip()
            if not cmd:
                current_command += 1
                continue
                
            log_history += f"\n---\nExecuting command: {cmd}\n"
            yield log_history
            
            success, output, next_cmd, critical_failure, alternative_cmd, dependency_set_up = execute_and_analyze_command(cmd, repo_dir, context)
            log_history += output
            
            # Add chat history after each command analysis
            log_history += "\n=== Command Analysis Chat History ===\n"
            recent_messages = context.context['chat_history'][-2:]
            for msg in recent_messages:
                log_history += f"{msg['role'].upper()}: {msg['content']}\n"
            log_history += "=== End Analysis History ===\n"
            
            # Check if dependencies are set up
            if dependency_set_up:
                log_history += "\n✅ Dependencies successfully set up. Environment is ready.\n"
                yield log_history
                break
            
            if not success:
                if alternative_cmd:
                    log_history += f"⚠️ Command failed. Trying alternative command: {alternative_cmd}\n"
                    command_list.insert(current_command + 1, alternative_cmd)
                elif critical_failure:
                    log_history += "❌ Critical failure. Stopping execution.\n"
                    yield log_history
                    break
                else:
                    log_history += "⚠️ Command failed but continuing execution.\n"
                
            if next_cmd:
                command_list.insert(current_command + 1, next_cmd)
                
            current_command += 1
            yield log_history
            
        if dependency_set_up:
            log_history += "\n✅ Final Verification: The executable command ran successfully.\n"
        else:
            log_history += "\n⚠️ Finished executing all commands but dependencies might not be fully set up.\n"
        
        # Store the chat history
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        with open(f"chat_history/chat_history_{timestamp}.txt", "w") as f:
            f.write(json.dumps(context.context['chat_history']))
        yield log_history

    except Exception as e:
        log_history += f"❌ Error cloning repository: {str(e)}\n"
        yield log_history

    finally:
        if repo_dir:
            shutil.rmtree(repo_dir, ignore_errors=True)
            log_history += f"\n🧹 Cleaned up repository: {repo_dir}\n"
            yield log_history

def get_final_log(generator):
    """
    Consumes a generator and returns the final log history.
    This function manually calls next() until StopIteration is raised,
    then returns the value carried by StopIteration if available, otherwise the last yielded log.
    """
    final = ""
    while True:
        try:
            final = next(generator)
        except StopIteration as e:
            # If the generator returns a value, use it; otherwise use the last yielded value.
            if e.value is not None:
                final = e.value
            break
    return final

def process_single_repo(url, readme):
    """
    Runs run_from_github for a single repository and returns a tuple (url, (final_log, success_bool)).
    """
    print("Process single repo, url:", url)
    log = run_from_github(url, readme)
    final_log = get_final_log(log)
    success = "\n✅ Final Verification: The executable command ran successfully.\n" in final_log
    return url, [final_log, success]

def process_repos(repo_dict, max_workers=4):
    """
    Accepts a dictionary in one of two formats:
      - {github_url: readme_string}
      - {github_url: [readme_string, ...]}
    
    For each GitHub URL, it concurrently runs run_from_github(github_url, readme) and collects the final log.
    It returns a dictionary where each key is the GitHub URL and each value is a tuple:
       (final_log, True)  if the success string is found,
       (final_log, False) otherwise.
    """
    results = {}
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Create a future for each repository.
        future_to_url = {}
        for url, value in repo_dict.items():
            readme = value[0] if isinstance(value, list) else value
            future = executor.submit(process_single_repo, url, readme)
            future_to_url[future] = url
            
        # Collect results as they complete.
        for future in as_completed(future_to_url):
            try:
                url, result = future.result()
                results[url] = result
            except Exception as e:
                # In case of error, store the error message in the results.
                url = future_to_url[future]
                results[url] = (f"Error processing repo: {str(e)}", False)
                
                
    return results


def save_results_to_file(results, filename):
    """
    Saves the results dictionary to a JSON file.
    
    Args:
        results (dict): Dictionary of results.
        filename (str): File path to save the results.
    """
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=4, ensure_ascii=False)


with open('repo_set/simple_repos.json', 'r', encoding='utf-8') as f:
    repo_dict = json.load(f)
print("Repo set loaded:", repo_dict)
repo_dict = dict(list(repo_dict.items())[:10])
results = process_repos(repo_dict)
save_results_to_file(results, "results.json")

Starting script execution...


2025-02-27 05:37:36,923 - INFO - Extracting command lines from provided README.
2025-02-27 05:37:36,925 - INFO - Processing chunk 1/1
2025-02-27 05:37:36,926 - INFO - Sending LLM request
2025-02-27 05:37:36,930 - DEBUG - Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': "Extract only the valid, executable shell commands from the following README text. Follow these rules strictly:\n            1. Return one valid shell command per line, without any additional commentary.\n            2. Do not include any markdown formatting such as triple backticks, asterisks, or hyphens used for lists.\n            3. Remove any extraneous characters, inline explanations, or documentation text.\n            4. Preserve multi-line commands (using '' for line continuation) and command sequences (with operators like && or ;).\n            5. Output only commands that can be directly executed in a Unix-like shell.\n       

Repo set loaded: {'https://github.com/cudbg/Kitana-e2e': ['\n\n# Kitana e2e \n\n\n## Data Augmentation for Kitana\nThis repository contains the scalable e2e implementation for data augmentation for Kitana. The code is written in Python and contains sample data, sample execution code, and the data augmentation code.\n\nPlease follow the instructions below to run the code.\n\n### Instructions\n1. Clone the repository\n2. Make sure you are in the correct directory:\n```bash\ncd kitana-e2e\n```\n3. Run the following command to install the required libraries:\n```bash\n# If you are using python venv.\npython3 -m venv venv\nsource venv/bin/activate\npip install -r requirements.txt\n```\n\n```bash\n# If you are using conda, there is a environment.yml file in the repository.\nconda env create -f environment.yml\n```\n3. Run the following command to execute the code:\n```bash\npython sample_execution.py\n``` \n## Project Structure\n- **`api/`** - Contains the interfaces for external modules to 

2025-02-27 05:37:36,931 - DEBUG - Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
2025-02-27 05:37:36,932 - DEBUG - connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=5.0 socket_options=None
2025-02-27 05:37:36,946 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7fc030141130>
2025-02-27 05:37:36,947 - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x7fc030327a40> server_hostname='api.openai.com' timeout=5.0
2025-02-27 05:37:36,961 - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7fc03015a850>
2025-02-27 05:37:36,962 - DEBUG - send_request_headers.started request=<Request [b'POST']>
2025-02-27 05:37:36,963 - DEBUG - send_request_headers.complete
2025-02-27 05:37:36,963 - DEBUG - send_request_body.started request=<Request [b'POST']>
2025-02-27 05:37:36,964 - DEBUG - send_request_body.complete
2025-02-27 05:37:36,964 - DEBUG - receive_respo

The messages are:  [{'role': 'user', 'content': "Extract only the valid, executable shell commands from the following README text. Follow these rules strictly:\n            1. Return one valid shell command per line, without any additional commentary.\n            2. Do not include any markdown formatting such as triple backticks, asterisks, or hyphens used for lists.\n            3. Remove any extraneous characters, inline explanations, or documentation text.\n            4. Preserve multi-line commands (using '' for line continuation) and command sequences (with operators like && or ;).\n            5. Output only commands that can be directly executed in a Unix-like shell.\n            6. If a line does not represent a valid shell command (e.g., a link, descriptive text, or a markdown heading), skip it.\n\n            For example, if the README contains:\n                - `npm i -g @saleor/cli`\n                - Some text: For installation, run npm i -g @saleor/cli`\n            Y

2025-02-27 05:37:37,382 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:37 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'302'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9941'), (b'x-ratelimit-remaining-tokens', b'199283'), (b'x-ratelimit-reset-requests', b'8m22.926s'), (b'x-ratelimit-reset-tokens', b'215ms'), (b'x-request-id', b'req_637d11898571b5cce06194fbdc331a68'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'cf-cache-status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=S9DPIG4uzQIgRE.OUMjNHeODxsjtc5.B5T6K0JVoFnk-1740634657-1.0.1.1-0AkqxhAqVNKGR5lGjkCbmYlod.

The output is:  pip install Flask
python3 server.py
The command list is:  ['pip install Flask', 'python3 server.py']


2025-02-27 05:37:37,725 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:37 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'570'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9940'), (b'x-ratelimit-remaining-tokens', b'198707'), (b'x-ratelimit-reset-requests', b'8m31.494s'), (b'x-ratelimit-reset-tokens', b'387ms'), (b'x-request-id', b'req_839cb3d4f0608226526513f97b21f3d2'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'cf-cache-status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=3CnFY_4qzeE14n1rGyTwtGUEz0BErbEfpk1FyEtd5B0-1740634657-1.0.1.1-SFD1567PezjryI1l67JIj4zWdd

The output is:  cd kitana-e2e
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
conda env create -f environment.yml
python sample_execution.py
The command list is:  ['cd kitana-e2e', 'python3 -m venv venv', 'source venv/bin/activate', 'pip install -r requirements.txt', 'conda env create -f environment.yml', 'python sample_execution.py']
The log output is:  /bin/sh: line 1: cd: kitana-e2e: No such file or directory

Command failed with return code: 1
The new message is:  {'role': 'user', 'content': '\n                Analyze this command output and suggest the next command to run:\n                Command executed: cd kitana-e2e\n                Output:\n                /bin/sh: line 1: cd: kitana-e2e: No such file or directory\n\n\n                Respond in this JSON format:\n                {\n                    "success": true/false,\n                    "critical_failure": true/false,\n                    "dependency_setup": true/false,\n               

2025-02-27 05:37:38,298 - INFO - Sending LLM request
2025-02-27 05:37:38,306 - DEBUG - Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
2025-02-27 05:37:38,307 - DEBUG - send_request_headers.started request=<Request [b'POST']>
2025-02-27 05:37:38,309 - DEBUG - send_request_headers.complete
2025-02-27 05:37:38,309 - DEBUG - send_request_body.started request=<Request [b'POST']>
2025-02-27 05:37:38,310 - DEBUG - send_request_body.complete
2025-02-27 05:37:38,311 - DEBUG - receive_response_headers.started request=<Request [b'POST']>


The log output is:  Defaulting to user installation because normal site-packages is not writeable



2025-02-27 05:37:38,544 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:38 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'659'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9939'), (b'x-ratelimit-remaining-tokens', b'199165'), (b'x-ratelimit-reset-requests', b'8m39.406s'), (b'x-ratelimit-reset-tokens', b'250ms'), (b'x-request-id', b'req_fae5a610ade074e9119958fd23efbfa8'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'cf-cache-status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'9185d2f319f42306-ORD'), (b'Conte

The output is:  {
    "success": false,
    "critical_failure": false,
    "dependency_setup": false,
    "analysis": "The 'cd' command failed because the directory 'kitana-e2e' doesn't exist.",
    "next_command": null,
    "alternative_command": null
}


2025-02-27 05:37:39,062 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:39 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'626'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9938'), (b'x-ratelimit-remaining-tokens', b'198911'), (b'x-ratelimit-reset-requests', b'8m47.5s'), (b'x-ratelimit-reset-tokens', b'326ms'), (b'x-request-id', b'req_72bfb78e7952da206052724277268e7e'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'9185d2f6795dcf43-CMH'), (b'Content

The output is:  {
    "success": true,
    "critical_failure": false,
    "dependency_setup": false,
    "analysis": "Flask and its dependencies are already installed successfully.",
    "next_command": null,
    "alternative_command": null
}
The log output is:   * Serving Flask app 'server'
 * Debug mode: on
Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.

Command failed with return code: 1
The new message is:  {'role': 'user', 'content': '\n                Analyze this command output and suggest the next command to run:\n                Command executed: python3 server.py\n                Output:\n                 * Serving Flask app \'server\'\n * Debug mode: on\nAddress already in use\nPort 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.\n\n\n                Respond in this JSON format:\n                {\n            

2025-02-27 05:37:40,090 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:40 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'702'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9938'), (b'x-ratelimit-remaining-tokens', b'199103'), (b'x-ratelimit-reset-requests', b'8m55.174s'), (b'x-ratelimit-reset-tokens', b'269ms'), (b'x-request-id', b'req_666a8f3b6bf74ee6072041f248f48691'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'9185d2fc88e42306-ORD'), (b'Conte

The output is:  {
    "success": true,
    "critical_failure": false,
    "dependency_setup": false,
    "analysis": "The server failed to start due to port 5000 being already in use.",
    "next_command": null,
    "alternative_command": "python3 server.py --port 5001"
}


2025-02-27 05:37:40,969 - INFO - Sending LLM request
2025-02-27 05:37:40,976 - DEBUG - Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': "You are a helpful assistant that analyzes command outputs and suggests next steps.\n            When analyzing command failures:\n            1. For directory operations (cd, ls, etc):\n            - First try to list directory contents\n            - Then suggest creating directory if needed\n            2. For port conflicts:\n            - Suggest using a different port\n            - Or provide command to kill existing process\n            3. Mark as critical_failure only if:\n            - Required files are missing and can't be created\n            - Dependencies can't be installed\n            - System resources are unavailable\n            4. Mark dependency_setup as true ONLY when ALL of these are completed:\n            - Virtual environment is created AND

The log output is:  
The new message is:  {'role': 'user', 'content': '\n                Analyze this command output and suggest the next command to run:\n                Command executed: python3 -m venv venv\n                Output:\n                \n\n                Respond in this JSON format:\n                {\n                    "success": true/false,\n                    "critical_failure": true/false,\n                    "dependency_setup": true/false,\n                    "analysis": "brief analysis of what happened",\n                    "next_command": "executable shell command or null if no further action needed",\n                    "alternative_command": "executable shell command or null if no alternative needed"\n                }\n\n                Remember: \n                1. For next_command and alternative_command, only provide actual executable shell commands, not descriptions.\n                2. Set dependency_setup to true only when all dependencies are p

2025-02-27 05:37:41,987 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:41 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'890'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9937'), (b'x-ratelimit-remaining-tokens', b'199117'), (b'x-ratelimit-reset-requests', b'9m2.109s'), (b'x-ratelimit-reset-tokens', b'264ms'), (b'x-request-id', b'req_e019572f461b23bd188a693255ea2951'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'cf-cache-status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'9185d30728952306-ORD'), (b'Conten

The output is:  {
    "success": true,
    "critical_failure": false,
    "dependency_setup": false,
    "analysis": "Successfully created a virtual environment named 'venv'.",
    "next_command": "source venv/bin/activate",
    "alternative_command": null
}
The log output is:  
The new message is:  {'role': 'user', 'content': '\n                Analyze this command output and suggest the next command to run:\n                Command executed: source venv/bin/activate\n                Output:\n                \n\n                Respond in this JSON format:\n                {\n                    "success": true/false,\n                    "critical_failure": true/false,\n                    "dependency_setup": true/false,\n                    "analysis": "brief analysis of what happened",\n                    "next_command": "executable shell command or null if no further action needed",\n                    "alternative_command": "executable shell command or null if no alternative ne

2025-02-27 05:37:42,750 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:42 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'616'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9936'), (b'x-ratelimit-remaining-tokens', b'199055'), (b'x-ratelimit-reset-requests', b'9m9.712s'), (b'x-ratelimit-reset-tokens', b'283ms'), (b'x-request-id', b'req_a7ad3402f44bcd8c7e812fa71a2cefb6'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'9185d30dad7e2306-ORD'), (b'Conten

The output is:  {
    "success": true,
    "critical_failure": false,
    "dependency_setup": false,
    "analysis": "Activated the virtual environment successfully.",
    "next_command": "pip install -r requirements.txt",
    "alternative_command": null
}


2025-02-27 05:37:43,814 - INFO - Sending LLM request
2025-02-27 05:37:43,827 - DEBUG - Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
2025-02-27 05:37:43,828 - DEBUG - send_request_headers.started request=<Request [b'POST']>
2025-02-27 05:37:43,829 - DEBUG - send_request_headers.complete
2025-02-27 05:37:43,830 - DEBUG - send_request_body.started request=<Request [b'POST']>
2025-02-27 05:37:43,831 - DEBUG - send_request_body.complete
2025-02-27 05:37:43,831 - DEBUG - receive_response_headers.started request=<Request [b'POST']>


The log output is:  Defaulting to user installation because normal site-packages is not writeable



2025-02-27 05:37:44,621 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Thu, 27 Feb 2025 05:37:44 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-05klseuvvqopbuycy1u6wjop'), (b'openai-processing-ms', b'671'), (b'openai-version', b'2020-10-01'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9935'), (b'x-ratelimit-remaining-tokens', b'196057'), (b'x-ratelimit-reset-requests', b'9m16.531s'), (b'x-ratelimit-reset-tokens', b'1.182s'), (b'x-request-id', b'req_5f5bab7d916328302ac3b7eab80e3061'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'cf-cache-status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'9185d318fd962306-ORD'), (b'Cont

The output is:  {
    "success": true,
    "critical_failure": false,
    "dependency_setup": true,
    "analysis": "Successfully installed all required packages.",
    "next_command": null,
    "alternative_command": null
}
