# RAG Answer Generation

To run this notebook successfully, you need to configure three main components:

### 1. Dataset Selection

- **Location**: Update the `dataset_folder`
- **Files**: Ensure your dataset contains a `questions.json` file with the question data
- **Sample Size**: Modify `NUM_SAMPLES` to control how many questions to process

### 2. RAGnaroX System Execution

- **Endpoint Configuration**: Set the RAG system endpoint in your `.env` file:
  ```
  RAG_API_URL=http://127.0.0.1:10000/v1/chat/completions
  RAG_API_METHOD=GET
  ```
- **System Requirements**: Ensure RAGnaroX is running and accessible at the specified endpoint
- **API Format**: The system expects REST API calls with JSON payloads containing user questions

### 3. SSH Connection for System Logging

- **Remote Server**: Configure SSH credentials in `.env` file:
  ```
  SSH_PASSWORD=your_ssh_password
  ```
- **Server Details**: Update `ssh_user_host` and `remote_dir` variables in cell 4
- **Logging Script**: Ensure `log_system.sh` exists on the remote server for performance monitoring


In [None]:
import os
import json
import requests
import time
import subprocess
from dotenv import load_dotenv
load_dotenv()
# Set your dataset folder here (e.g. 'single-hop-RAG-dataset', 'multi-hop-RAG-dataset', 'lang-RAG-dataset')
dataset_folder = 'datasets/single-hop-RAG-dataset/'  # Change as needed
input_json = ('questions.json')  # Change as needed
output_json = ('5-doc_single_qwen4B.json')
log_filename = output_json.replace('.json', '_log.csv')
input_json_path = os.path.join(dataset_folder, input_json)
output_json_path = os.path.join(dataset_folder, output_json)
answer_file_path = os.path.abspath('agent_files/answer.json')
answer_archive_path = os.path.join(dataset_folder, 'answer_generated_all.json')
logging_path = os.path.join(dataset_folder, 'logging.json')

NUM_SAMPLES = 1000  # Change as needed
RAG_API_URL = os.environ.get(
    'RAG_API_URL', 'http://127.0.0.1:10000/v1/chat/completions')
RAG_API_METHOD = os.environ.get('RAG_API_METHOD', 'GET').upper()

In [None]:
def call_rest_api(question):

    # Archive and remove any existing local agent answer file
    if os.path.exists(answer_file_path):
        with open(answer_file_path, 'r', encoding='utf-8') as f:
            existing_data = json.load(f)
        with open(answer_archive_path, 'a', encoding='utf-8') as f:
            json.dump(existing_data, f, indent=2, ensure_ascii=False)
            f.write('\n')
        os.remove(answer_file_path)
    headers = {'Content-Type': 'application/json'}
    payload = {'messages': [{'role': 'user', 'content': question}]}
    start_time = time.time()
    try:
        response = requests.request(
            method=RAG_API_METHOD,
            url=RAG_API_URL,
            headers=headers,
            data=json.dumps(payload),
            timeout=60
        )
        response.raise_for_status()
        response_json = response.json()
    except Exception as e:
        print(f'Request failed: {e}')
        return {'response': '', 'retrieved_contexts': [], 'response_time': None}
    end_time = time.time()
    response_time = end_time - start_time
    try:
        with open(logging_path, 'a', encoding='utf-8') as log_file:
            json.dump(response_json, log_file, indent=2, ensure_ascii=False)
            log_file.write('\n')
    except Exception:
        pass

    # Updated parsing for nested 'Result' key
    message = response_json.get('content', {}).get('Result', {})
    answer = message.get('content', '') if isinstance(message, dict) else ''
    rag_entries = message.get(
        'rag_entries', []) if isinstance(message, dict) else []
    retrieved_contexts = [entry.get('text', '')
                          for entry in rag_entries if isinstance(entry, dict)]
    print(f'Retrieved {len(retrieved_contexts)} chunks for question\n')
    return {'response': answer, 'retrieved_contexts': retrieved_contexts, 'response_time': response_time}

In [None]:
def delete_remote_csv_files(ssh_user_host, ssh_password, remote_dir):
    """Delete all .csv files in the remote directory via SSH before generating new logs."""
    import subprocess
    remote_delete_cmd = f"find {remote_dir} -maxdepth 1 -name '*.csv' -delete"
    try:
        subprocess.run([
            "sshpass", "-p", ssh_password,
            "ssh", ssh_user_host, remote_delete_cmd
        ], check=True)
        print(f"Deleted all .csv files in remote directory: {remote_dir}")
    except Exception as e:
        print(f"Failed to delete remote .csv files: {e}")


def start_remote_logging(ssh_user_host, ssh_password, remote_dir, log_filename):
    """Start remote system logging script via SSH."""
    import subprocess
    remote_cmd = f'export LC_ALL=C LANG=C; cd {remote_dir} && bash log_system.sh -o {log_filename}'
    try:
        proc = subprocess.Popen([
            "sshpass", "-p", ssh_password,
            "ssh", ssh_user_host, remote_cmd
        ])
        print('Started remote logging.')
        return proc
    except Exception as e:
        print(f"Failed to start remote logging: {e}")
        return None


def stop_remote_logging(ssh_user_host, ssh_password, log_filename):
    """Stop remote system logging script via SSH."""
    import subprocess
    stop_cmd = f"pkill -f 'bash log_system.sh -o {log_filename}'"
    try:
        subprocess.run([
            "sshpass", "-p", ssh_password,
            "ssh", ssh_user_host, stop_cmd
        ], check=True)
        print('System logging stopped.')
    except Exception as e:
        print(f"Failed to stop remote logging: {e}")


def copy_remote_log_to_local(ssh_user_host, ssh_password, remote_dir, log_filename):
    """Copy the generated CSV log file from remote to local results folder."""
    import subprocess
    import os
    local_log_path = os.path.join('results_final', log_filename)
    remote_log_path = os.path.join(remote_dir, log_filename)
    scp_cmd = [
        "sshpass", "-p", ssh_password,
        "scp", f"{ssh_user_host}:{remote_log_path}", local_log_path
    ]
    try:
        subprocess.run(scp_cmd, check=True)
        print(f"Copied remote log file to {local_log_path}")
    except Exception as e:
        print(f"Failed to copy remote log file: {e}")


def update_answers_via_rest(input_path, output_path, num_examples=NUM_SAMPLES):
    """Generate answers via REST API and handle remote logging."""
    import os
    import json
    ssh_user_host = "host_URL"
    ssh_password = os.environ.get(
        "SSH_PASSWORD", "your_password")  # Now loads from .env
    remote_dir = "/workspace/rag_systems_perforamnce_logs/rag_evaluation/"

    # Step 1: Clean up remote logs
    delete_remote_csv_files(ssh_user_host, ssh_password, remote_dir)

    # Step 2: Start remote logging
    log_proc = start_remote_logging(
        ssh_user_host, ssh_password, remote_dir, log_filename)

    # Step 3: Generate answers
    try:
        with open(input_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        updated_data = []
        for i, entry in enumerate(data[:num_examples]):
            question = entry.get('user_input', '')
            print(f'🔄 Generating answer for entry {i + 1}/{num_examples}')
            generated = call_rest_api(question)
            updated_entry = entry.copy()
            updated_entry['response'] = generated.get('response', '')
            updated_entry['retrieved_contexts'] = generated.get(
                'retrieved_contexts', [])
            updated_entry['response_time'] = generated.get(
                'response_time', None)
            updated_data.append(updated_entry)
        # Save results
        os.makedirs('results', exist_ok=True)
        results_path = os.path.join(
            'results_final', os.path.basename(output_path))
        with open(results_path, 'w', encoding='utf-8') as f:
            json.dump(updated_data, f, indent=2, ensure_ascii=False)
    except Exception as e:
        print(f'Error during answer generation: 100{e}')

    # Step 4: Stop remote logging and copy log file
    stop_remote_logging(ssh_user_host, ssh_password, log_filename)
    copy_remote_log_to_local(
        ssh_user_host, ssh_password, remote_dir, log_filename)

In [None]:
# save logs and generated answers to results_final folder
os.makedirs('results_final', exist_ok=True)
update_answers_via_rest(input_json_path, output_json_path, NUM_SAMPLES)