In [1]:
# Import necessary libraries for data handling, machine learning, and evaluation.
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from typing import List, Dict

# --- Shared Classical Model and Training Logic ---
class ClassicalResearchModel:
    """A classical model to tackle the three research directions."""
    
    def __init__(self, random_state=42):
        # This is the constructor for our class. It runs when we create a new model instance.
        # Initialize a Logistic Regression model. `random_state` ensures results are reproducible.
        # `solver='liblinear'` is a good choice for small datasets.
        self.model = LogisticRegression(random_state=random_state, solver='liblinear')
        # A flag to track whether the model has been trained yet.
        self.is_trained = False

    def train(self, X_train: np.ndarray, y_train: np.ndarray, maxiter=20):
        """Simulates iterative training and reports the loss at each step."""
        # Print a message indicating that the training process is starting.
        print(f"Training for {maxiter} iterations...")
        # The .fit() method trains the model on the provided data.
        self.model.fit(X_train, y_train)
        
        # This loop simulates the iterative loss reporting you see in quantum optimizers.
        for i in range(maxiter):
            # For this classical model, the loss will be stable after the first fit.
            # In a true iterative optimizer, this value would change with each step.
            
            # Get the model's predicted probabilities for the positive class (1).
            y_pred_prob = self.model.predict_proba(X_train)[:, 1]
            # Calculate the Mean Squared Error between predicted probabilities and true labels. This is our loss.
            loss = np.mean((y_pred_prob - y_train)**2)
            # Print the loss for the current iteration, formatted to 4 decimal places.
            print(f"  Iteration {i+1}/{maxiter}: Loss = {loss:.4f}")
            
        # Set the flag to True, indicating the model is now trained and ready for prediction.
        self.is_trained = True

    def predict(self, X_test: np.ndarray) -> np.ndarray:
        """Makes predictions on new, unseen data using the trained model."""
        # Check if the model has been trained before allowing predictions.
        if not self.is_trained:
            # If not trained, raise an error to prevent incorrect usage.
            raise RuntimeError("Model must be trained before prediction.")
        # Use the trained model to predict class labels (0 or 1) for the test data.
        return self.model.predict(X_test)

# --- Data Generation and Model Training for Each Direction ---

# --- Direction 1: Thematic Correlator ---
def generate_data_direction1(num_samples=100):
    """Generates a synthetic dataset for the Thematic Correlator task."""
    docs, labels = [], []
    for _ in range(num_samples):
        sim_A = np.random.rand()
        sim_B = np.random.rand()
        # Define the condition for relevance: the document is only relevant (1) if BOTH concepts are strongly present.
        label = 1 if sim_A > 0.6 and sim_B > 0.6 else 0
        docs.append({'sim_concept_A': sim_A, 'sim_concept_B': sim_B})
        labels.append(label)
    return np.array([list(doc.values()) for doc in docs]), np.array(labels)

# --- Direction 2: Ambiguity Resolution ---
def generate_data_direction2(num_samples=100):
    """Generates a synthetic dataset for the Ambiguity Resolution task."""
    docs, labels = [], []
    for _ in range(num_samples):
        has_correct_context = np.random.choice([0, 1])
        label = has_correct_context
        has_wrong_context = 1 - has_correct_context
        docs.append({'context_score': has_correct_context, 'penalty_score': 1 - has_wrong_context})
        labels.append(label)
    return np.array([list(doc.values()) for doc in docs]), np.array(labels)

# --- Direction 3: Structural Analyzer ---
def generate_data_direction3(num_samples=100):
    """Generates a synthetic dataset for the Structural Analyzer task."""
    docs, labels = [], []
    for _ in range(num_samples):
        sentiment_arc = np.random.rand()
        narrative_flow = np.random.rand()
        # Define a complex, non-linear rule for relevance based on structural features.
        label = 1 if (sentiment_arc > 0.7 and narrative_flow > 0.7) or \
                     (sentiment_arc < 0.2 and narrative_flow < 0.2) else 0
        docs.append({'sentiment_arc': sentiment_arc, 'narrative_flow': narrative_flow})
        labels.append(label)
    return np.array([list(doc.values()) for doc in docs]), np.array(labels)

# --- Main Execution Block ---
if __name__ == "__main__":
    
    # --- Execute and Evaluate Direction 1 ---
    print("--- Direction 1: Classical Thematic Correlator ---")
    X1, y1 = generate_data_direction1()
    X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.3, random_state=42)
    model1 = ClassicalResearchModel()
    model1.train(X1_train, y1_train)
    y1_pred = model1.predict(X1_test)
    accuracy1 = accuracy_score(y1_test, y1_pred)
    print(f"\nFinal Accuracy for Thematic Correlator: {accuracy1:.2%}\n")
    
    # --- Execute and Evaluate Direction 2 ---
    print("\n--- Direction 2: Classical Ambiguity Resolution ---")
    X2, y2 = generate_data_direction2()
    X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.3, random_state=42)
    model2 = ClassicalResearchModel()
    model2.train(X2_train, y2_train, maxiter=5)
    y2_pred = model2.predict(X2_test)
    accuracy2 = accuracy_score(y2_test, y2_pred)
    print(f"\nFinal Accuracy for Ambiguity Resolution: {accuracy2:.2%}\n")
    
    # --- Execute and Evaluate Direction 3 ---
    print("\n--- Direction 3: Classical Structural Analyzer ---")
    X3, y3 = generate_data_direction3()
    X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y3, test_size=0.3, random_state=42)
    model3 = ClassicalResearchModel()
    model3.train(X3_train, y3_train)
    y3_pred = model3.predict(X3_test)
    accuracy3 = accuracy_score(y3_test, y3_pred)
    print(f"\nFinal Accuracy for Structural Analyzer: {accuracy3:.2%}\n")



--- Direction 1: Classical Thematic Correlator ---
Training for 20 iterations...
  Iteration 1/20: Loss = 0.0936
  Iteration 2/20: Loss = 0.0936
  Iteration 3/20: Loss = 0.0936
  Iteration 4/20: Loss = 0.0936
  Iteration 5/20: Loss = 0.0936
  Iteration 6/20: Loss = 0.0936
  Iteration 7/20: Loss = 0.0936
  Iteration 8/20: Loss = 0.0936
  Iteration 9/20: Loss = 0.0936
  Iteration 10/20: Loss = 0.0936
  Iteration 11/20: Loss = 0.0936
  Iteration 12/20: Loss = 0.0936
  Iteration 13/20: Loss = 0.0936
  Iteration 14/20: Loss = 0.0936
  Iteration 15/20: Loss = 0.0936
  Iteration 16/20: Loss = 0.0936
  Iteration 17/20: Loss = 0.0936
  Iteration 18/20: Loss = 0.0936
  Iteration 19/20: Loss = 0.0936
  Iteration 20/20: Loss = 0.0936

Final Accuracy for Thematic Correlator: 80.00%


--- Direction 2: Classical Ambiguity Resolution ---
Training for 5 iterations...
  Iteration 1/5: Loss = 0.0096
  Iteration 2/5: Loss = 0.0096
  Iteration 3/5: Loss = 0.0096
  Iteration 4/5: Loss = 0.0096
  Iteration 5

In [14]:
# Ultra-Fast Quantum RAG Research - Optimized for IBM Free Trial (10 minutes)
import numpy as np
import time
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.optimize import minimize

# --- Qiskit Imports ---
from qiskit_ibm_runtime import QiskitRuntimeService, SamplerV2 as Sampler
from qiskit.circuit.library import zz_feature_map, real_amplitudes
from qiskit.circuit import QuantumCircuit
from qiskit.compiler import transpile

class QuantumResearchModelUltraFast:
    """Ultra-optimized for 10-minute free trial - ALL 3 DIRECTIONS"""
    
    def __init__(self, backend_name="ibm_brisbane", shots=256, maxiter=2, random_seed=42):
        np.random.seed(random_seed)
        
        # EXTREME OPTIMIZATION FOR FREE TRIAL
        self.backend_name = backend_name
        self.shots = shots          # Ultra-low: 256 shots (16x faster than 4096)
        self.maxiter = maxiter      # Ultra-low: 2 iterations max
        
        self.service = None
        self.sampler = None
        self.backend_obj = None
        self.optimal_weights = None
        self.isa_circuit = None
        
    def log(self, message):
        timestamp = datetime.now().strftime("%H:%M:%S")
        print(f"[{timestamp}] {message}")

    def connect(self):
        self.log("⚡ Connecting to IBM Quantum...")
        self.service = QiskitRuntimeService()
        self.backend_obj = self.service.backend(self.backend_name)
        self.sampler = Sampler(mode=self.backend_obj)
        self.log(f"✓ Connected: {self.backend_obj.num_qubits} qubits, {self.backend_name}")

    def _build_and_transpile_circuit(self, feature_dim):
        self.log(f"Building minimal circuit ({feature_dim} qubits, 1 rep each)")
        
        # ULTRA-MINIMAL CIRCUITS
        feature_map = zz_feature_map(feature_dim, reps=1)  # Absolute minimum
        ansatz = real_amplitudes(feature_dim, reps=1)      # Absolute minimum
        
        pqc = QuantumCircuit(feature_dim)
        pqc.compose(feature_map, inplace=True)
        pqc.compose(ansatz, inplace=True)
        pqc.measure_all(inplace=True)
        
        # FASTEST TRANSPILATION
        self.isa_circuit = transpile(pqc, backend=self.backend_obj, optimization_level=0)
        self.log(f"✓ Circuit ready: depth={pqc.depth()}, params={ansatz.num_parameters}")
        return ansatz.num_parameters

    def _objective_function(self, weights, X_train, y_train):
        pubs = [(self.isa_circuit, np.concatenate((x_i, weights))) for x_i in X_train]
        self.log(f"Submitting {len(pubs)} circuits ({self.shots} shots)")
        
        job = self.sampler.run(pubs, shots=self.shots)
        result = job.result()
        
        probabilities = []
        for pub_result in result:
            counts = pub_result.data.meas.get_counts()
            prob_1 = counts.get('1', 0) / self.shots
            probabilities.append(prob_1)
        
        loss = np.mean((np.array(probabilities) - y_train)**2)
        self.log(f"Loss: {loss:.4f}")
        return loss

    def train(self, X_train, y_train):
        feature_dim = X_train.shape[1]
        num_params = self._build_and_transpile_circuit(feature_dim)
        initial_weights = np.random.uniform(0, 2 * np.pi, num_params)
        
        self.log(f"Training: {len(X_train)} samples, {num_params} params, {self.maxiter} max iters")
        
        opt_result = minimize(
            fun=lambda w: self._objective_function(w, X_train, y_train),
            x0=initial_weights,
            method='COBYLA',
            options={'maxiter': self.maxiter}
        )
        
        self.optimal_weights = opt_result.x
        self.log("✓ Training complete")

    def predict(self, X_test):
        pubs = [(self.isa_circuit, np.concatenate((x_i, self.optimal_weights))) for x_i in X_test]
        self.log(f"Prediction: {len(pubs)} circuits ({self.shots} shots)")
        
        job = self.sampler.run(pubs, shots=self.shots)
        result = job.result()
        
        probabilities = []
        for pub_result in result:
            counts = pub_result.data.meas.get_counts()
            prob_1 = counts.get('1', 0) / self.shots
            probabilities.append(prob_1)
        
        predictions = (np.array(probabilities) > 0.5).astype(int)
        self.log(f"✓ Predicted {len(predictions)} samples")
        return predictions

# ULTRA-SMALL DATA GENERATORS (Designed for speed)
def generate_data_direction1(num_samples=8):  # Thematic Correlator
    docs, labels = [], []
    for _ in range(num_samples):
        sim_A, sim_B = np.random.rand(), np.random.rand()
        label = 1 if sim_A > 0.6 and sim_B > 0.6 else 0
        docs.append({'sim_A': sim_A, 'sim_B': sim_B})
        labels.append(label)
    return np.array([list(d.values()) for d in docs]), np.array(labels)

def generate_data_direction2(num_samples=8):  # Ambiguity Resolution
    docs, labels = [], []
    for _ in range(num_samples):
        context = np.random.choice([0, 1])
        label = context
        docs.append({'context': context, 'penalty': 1 - context})
        labels.append(label)
    return np.array([list(d.values()) for d in docs]), np.array(labels)

def generate_data_direction3(num_samples=8):  # Structural Analyzer
    docs, labels = [], []
    for _ in range(num_samples):
        arc, flow = np.random.rand(), np.random.rand()
        label = 1 if (arc > 0.7 and flow > 0.7) or (arc < 0.2 and flow < 0.2) else 0
        docs.append({'arc': arc, 'flow': flow})
        labels.append(label)
    return np.array([list(d.values()) for d in docs]), np.array(labels)

# MAIN EXECUTION - ALL 3 DIRECTIONS IN UNDER 10 MINUTES
if __name__ == "__main__":
    print("🚀 ULTRA-FAST QUANTUM RAG EXPERIMENT - ALL 3 DIRECTIONS")
    print("🕒 Target: Complete in under 10 minutes (Free Trial Optimized)")
    print("=" * 70)
    
    # Ultra-fast configuration
    quantum_model = QuantumResearchModelUltraFast(
        shots=256,      # Ultra-low for maximum speed
        maxiter=2       # Bare minimum iterations
    )
    
    # Connect once, use for all directions
    quantum_model.connect()
    
    # Define all three research directions
    directions = [
        ("Thematic Correlator", generate_data_direction1),
        ("Ambiguity Resolution", generate_data_direction2), 
        ("Structural Analyzer", generate_data_direction3),
    ]
    
    results = {}
    experiment_start = time.time()
    
    # Execute all three directions sequentially
    for i, (name, data_generator) in enumerate(directions, 1):
        print(f"\n🧪 DIRECTION {i}/3: {name.upper()}")
        print("-" * 50)
        
        direction_start = time.time()
        
        try:
            # Generate ultra-small dataset
            X, y = data_generator(8)  # Only 8 samples total!
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.5, random_state=42
            )
            
            print(f"📊 Dataset: {len(X_train)} train, {len(X_test)} test samples")
            
            # Train and predict
            quantum_model.train(X_train, y_train)
            y_pred = quantum_model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            
            direction_end = time.time()
            direction_time = direction_end - direction_start
            
            # Store results
            results[name] = {
                'accuracy': accuracy,
                'time': direction_time
            }
            
            print(f"🎯 RESULT: {accuracy:.1%} accuracy in {direction_time:.1f}s")
            
        except Exception as e:
            print(f"❌ Error in {name}: {e}")
            results[name] = {'accuracy': None, 'time': 0}
    
    # Final summary
    experiment_end = time.time()
    total_time = experiment_end - experiment_start
    
    print(f"\n📊 QUANTUM EXPERIMENT SUMMARY")
    print("=" * 50)
    print(f"⏱️  Total Time: {total_time:.1f} seconds ({total_time/60:.2f} minutes)")
    print(f"💰 Trial Usage: ~{total_time/60:.1f} minutes of your 10-minute limit")
    
    for name, result in results.items():
        if result['accuracy'] is not None:
            print(f"🔬 {name:<20}: {result['accuracy']:.1%} ({result['time']:.1f}s)")
        else:
            print(f"❌ {name:<20}: Failed")
    
    # Budget check
    if total_time < 600:  # 10 minutes = 600 seconds
        remaining = 600 - total_time
        print(f"\n✅ SUCCESS! You have {remaining:.0f} seconds ({remaining/60:.1f} min) remaining!")
        print("🎉 All 3 quantum directions completed within free trial limits!")
    else:
        print(f"\n⚠️  WARNING: Exceeded 10-minute limit by {(total_time-600)/60:.1f} minutes")
    
    print(f"\n🏁 Quantum RAG research experiment complete!")


🚀 ULTRA-FAST QUANTUM RAG EXPERIMENT - ALL 3 DIRECTIONS
🕒 Target: Complete in under 10 minutes (Free Trial Optimized)
[00:29:13] ⚡ Connecting to IBM Quantum...
[00:29:18] ✓ Connected: 127 qubits, ibm_brisbane

🧪 DIRECTION 1/3: THEMATIC CORRELATOR
--------------------------------------------------
📊 Dataset: 4 train, 4 test samples
[00:29:18] Building minimal circuit (2 qubits, 1 rep each)
[00:29:18] ✓ Circuit ready: depth=9, params=4
[00:29:18] Training: 4 samples, 4 params, 2 max iters
[00:29:18] Submitting 4 circuits (256 shots)
[00:29:24] Loss: 0.2500
[00:29:24] Submitting 4 circuits (256 shots)


  warn(f'{solver}: Invalid MAXFUN; it should be at least {min_maxfun_str}; it is set to {maxfun}')


[00:29:30] Loss: 0.2500
[00:29:30] Submitting 4 circuits (256 shots)
[00:29:36] Loss: 0.2500
[00:29:36] Submitting 4 circuits (256 shots)
[00:29:41] Loss: 0.2500
[00:29:41] Submitting 4 circuits (256 shots)
[00:29:47] Loss: 0.2500
[00:29:47] Submitting 4 circuits (256 shots)
[00:29:52] Loss: 0.2500
[00:29:52] ✓ Training complete
[00:29:52] Prediction: 4 circuits (256 shots)
[00:29:58] ✓ Predicted 4 samples
🎯 RESULT: 100.0% accuracy in 39.7s

🧪 DIRECTION 2/3: AMBIGUITY RESOLUTION
--------------------------------------------------
📊 Dataset: 4 train, 4 test samples
[00:29:58] Building minimal circuit (2 qubits, 1 rep each)
[00:29:58] ✓ Circuit ready: depth=9, params=4
[00:29:58] Training: 4 samples, 4 params, 2 max iters
[00:29:58] Submitting 4 circuits (256 shots)
[00:30:03] Loss: 1.0000
[00:30:03] Submitting 4 circuits (256 shots)


  warn(f'{solver}: Invalid MAXFUN; it should be at least {min_maxfun_str}; it is set to {maxfun}')


[00:30:09] Loss: 1.0000
[00:30:09] Submitting 4 circuits (256 shots)
[00:30:14] Loss: 1.0000
[00:30:14] Submitting 4 circuits (256 shots)
[00:30:19] Loss: 1.0000
[00:30:19] Submitting 4 circuits (256 shots)
[00:30:24] Loss: 1.0000
[00:30:24] Submitting 4 circuits (256 shots)
[00:30:30] Loss: 1.0000
[00:30:30] ✓ Training complete
[00:30:30] Prediction: 4 circuits (256 shots)
[00:30:36] ✓ Predicted 4 samples
🎯 RESULT: 75.0% accuracy in 38.1s

🧪 DIRECTION 3/3: STRUCTURAL ANALYZER
--------------------------------------------------
📊 Dataset: 4 train, 4 test samples
[00:30:36] Building minimal circuit (2 qubits, 1 rep each)
[00:30:36] ✓ Circuit ready: depth=9, params=4
[00:30:36] Training: 4 samples, 4 params, 2 max iters
[00:30:36] Submitting 4 circuits (256 shots)
[00:30:42] Loss: 0.2500
[00:30:42] Submitting 4 circuits (256 shots)


  warn(f'{solver}: Invalid MAXFUN; it should be at least {min_maxfun_str}; it is set to {maxfun}')


[00:30:47] Loss: 0.2500
[00:30:47] Submitting 4 circuits (256 shots)
[00:30:53] Loss: 0.2500
[00:30:53] Submitting 4 circuits (256 shots)
[00:30:59] Loss: 0.2500
[00:30:59] Submitting 4 circuits (256 shots)
[00:31:04] Loss: 0.2500
[00:31:04] Submitting 4 circuits (256 shots)
[00:31:10] Loss: 0.2500
[00:31:10] ✓ Training complete
[00:31:10] Prediction: 4 circuits (256 shots)
[00:31:16] ✓ Predicted 4 samples
🎯 RESULT: 100.0% accuracy in 39.7s

📊 QUANTUM EXPERIMENT SUMMARY
⏱️  Total Time: 117.5 seconds (1.96 minutes)
💰 Trial Usage: ~2.0 minutes of your 10-minute limit
🔬 Thematic Correlator : 100.0% (39.7s)
🔬 Ambiguity Resolution: 75.0% (38.1s)
🔬 Structural Analyzer : 100.0% (39.7s)

✅ SUCCESS! You have 483 seconds (8.0 min) remaining!
🎉 All 3 quantum directions completed within free trial limits!

🏁 Quantum RAG research experiment complete!


In [16]:
"""
===============================================================================
QUANTUM-ENHANCED RAG SYSTEM RESEARCH FRAMEWORK
===============================================================================
Author: Research Implementation for Quantum Advantage in Information Retrieval
Date: August 2025
Target Platform: IBM Quantum Platform (Free Trial Optimized)
Backend: ibm_brisbane (127-qubit quantum processor)

RESEARCH OBJECTIVE:
Investigate potential quantum advantage in three key RAG system components:
1. Thematic Correlator - Multi-concept document relevance assessment
2. Ambiguity Resolution - Context-aware disambiguation 
3. Structural Analyzer - Non-linear document structure evaluation

OPTIMIZATION PROFILE:
- Execution Time: <10 minutes (free trial compatible)
- Shots per Circuit: 256 (speed-accuracy balance)
- Optimizer Iterations: 10 (COBYLA warning-free minimum)
- Dataset Size: 24 samples per direction (error-free minimum)

TECHNICAL ARCHITECTURE:
- Variational Quantum Classifier (VQC) with ZZ feature maps
- Real Amplitudes ansatz for expressibility
- COBYLA optimization with proper convergence criteria
- Hardware-optimized transpilation pipeline
===============================================================================
"""

# ============================================================================
# IMPORTS AND DEPENDENCIES
# ============================================================================

import numpy as np
import time
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.optimize import minimize

# Qiskit Runtime and Circuit Libraries
from qiskit_ibm_runtime import QiskitRuntimeService, SamplerV2 as Sampler
from qiskit.circuit.library import zz_feature_map, real_amplitudes
from qiskit.circuit import QuantumCircuit
from qiskit.compiler import transpile


# ============================================================================
# QUANTUM RESEARCH MODEL CLASS
# ============================================================================

class QuantumResearchModelUltraFast:
    """
    Ultra-optimized Variational Quantum Classifier for RAG system research.
    
    Designed specifically for IBM Quantum free trial constraints while 
    maintaining statistical validity for quantum advantage assessment.
    
    Key Features:
    - Hardware-optimized circuit transpilation
    - COBYLA optimizer with proper convergence settings
    - Comprehensive execution logging and timing
    - Memory-efficient batch processing
    
    Parameters:
    -----------
    backend_name : str, default="ibm_brisbane"
        Target quantum backend for execution
    shots : int, default=256
        Number of measurement samples per circuit (speed-optimized)
    maxiter : int, default=10
        Maximum optimizer iterations (COBYLA minimum compliance)
    random_seed : int, default=42
        Reproducibility seed for quantum experiments
    """
    
    def __init__(self, backend_name="ibm_brisbane", shots=256, maxiter=10, random_seed=42):
        # Reproducibility Configuration
        np.random.seed(random_seed)
        
        # Hardware and Execution Parameters
        self.backend_name = backend_name      # Target quantum processor
        self.shots = shots                    # Measurement repetitions per circuit
        self.maxiter = maxiter               # Optimization iteration limit
        
        # Runtime State Variables
        self.service = None                  # IBM Quantum service connection
        self.sampler = None                  # Quantum primitive for circuit execution
        self.backend_obj = None              # Backend object reference
        self.optimal_weights = None          # Trained circuit parameters
        self.isa_circuit = None             # Hardware-compiled quantum circuit
        
    def log(self, message):
        """
        Timestamped logging for experimental tracking and debugging.
        
        Parameters:
        -----------
        message : str
            Log message to output with timestamp
        """
        timestamp = datetime.now().strftime("%H:%M:%S")
        print(f"[{timestamp}] {message}")

    def connect(self):
        """
        Establish connection to IBM Quantum Platform using saved credentials.
        
        Initializes the quantum service, retrieves backend specifications,
        and prepares the sampler primitive for circuit execution.
        
        Raises:
        -------
        RuntimeError
            If connection to quantum backend fails
        """
        self.log("⚡ Initializing IBM Quantum connection...")
        
        # Initialize service with saved account credentials
        self.service = QiskitRuntimeService()
        
        # Retrieve target quantum backend
        self.backend_obj = self.service.backend(self.backend_name)
        
        # Initialize sampler primitive for quantum circuit execution
        self.sampler = Sampler(mode=self.backend_obj)
        
        # Log successful connection with hardware specifications
        self.log(f"✓ Connected: {self.backend_obj.num_qubits} qubits, {self.backend_name}")

    def _build_and_transpile_circuit(self, feature_dim):
        """
        Construct and compile parameterized quantum circuit for VQC.
        
        Creates a hybrid classical-quantum circuit with:
        - ZZ Feature Map: Encodes classical data with entangling gates
        - Real Amplitudes Ansatz: Trainable rotation gates for optimization
        - Measurement Layer: Projects quantum state to classical bits
        
        Parameters:
        -----------
        feature_dim : int
            Number of features in input data (determines qubit count)
            
        Returns:
        --------
        int
            Number of trainable parameters in the ansatz circuit
        """
        self.log(f"🔧 Building quantum circuit ({feature_dim} qubits, minimal depth)")
        
        # Feature Encoding Layer - Maps classical data to quantum states
        # Using single repetition for speed optimization in free trial
        feature_map = zz_feature_map(feature_dim, reps=1)
        
        # Variational Ansatz - Trainable quantum neural network layer
        # Single repetition maintains expressibility while minimizing depth
        ansatz = real_amplitudes(feature_dim, reps=1)
        
        # Construct complete parameterized quantum circuit
        pqc = QuantumCircuit(feature_dim)
        pqc.compose(feature_map, inplace=True)  # Data encoding
        pqc.compose(ansatz, inplace=True)       # Trainable layer
        pqc.measure_all(inplace=True)           # Classical output extraction
        
        # Hardware Compilation - Translate abstract circuit to backend-specific gates
        self.log("🚀 Transpiling for hardware compatibility...")
        self.isa_circuit = transpile(
            pqc, 
            backend=self.backend_obj, 
            optimization_level=0  # Fastest compilation for time-critical execution
        )
        
        # Log circuit specifications for performance analysis
        self.log(f"✓ Circuit compiled: depth={pqc.depth()}, parameters={ansatz.num_parameters}")
        
        return ansatz.num_parameters

    def _objective_function(self, weights, X_train, y_train):
        """
        Quantum objective function for variational optimization.
        
        Executes parameterized quantum circuits on hardware and computes
        mean squared error loss between quantum predictions and true labels.
        
        Parameters:
        -----------
        weights : np.ndarray
            Current variational parameters for quantum circuit
        X_train : np.ndarray
            Training feature vectors for data encoding
        y_train : np.ndarray
            Training labels for supervised learning
            
        Returns:
        --------
        float
            Mean squared error loss for current parameters
        """
        # Prepare parameterized circuits - bind data and current weights
        pubs = [
            (self.isa_circuit, np.concatenate((x_i, weights))) 
            for x_i in X_train
        ]
        
        self.log(f"🎯 Submitting {len(pubs)} circuits ({self.shots} shots each)")
        
        # Execute quantum circuits on hardware
        job = self.sampler.run(pubs, shots=self.shots)
        result = job.result()
        
        # Extract quantum measurement probabilities
        probabilities = []
        for pub_result in result:
            # Get measurement counts for each circuit
            counts = pub_result.data.meas.get_counts()
            # Calculate probability of measuring '1' state
            prob_1 = counts.get('1', 0) / self.shots
            probabilities.append(prob_1)
        
        # Compute supervised learning loss (Mean Squared Error)
        loss = np.mean((np.array(probabilities) - y_train)**2)
        
        self.log(f"📊 Current Loss: {loss:.4f}")
        return loss

    def train(self, X_train, y_train):
        """
        Train the variational quantum classifier using COBYLA optimization.
        
        Constructs quantum circuit, initializes random parameters, and
        iteratively optimizes to minimize classification loss on training data.
        
        Parameters:
        -----------
        X_train : np.ndarray, shape (n_samples, n_features)
            Training feature vectors
        y_train : np.ndarray, shape (n_samples,)
            Training binary labels (0 or 1)
            
        Returns:
        --------
        bool
            True if optimization converged successfully
        """
        # Circuit Construction Phase
        feature_dim = X_train.shape[1]
        num_params = self._build_and_transpile_circuit(feature_dim)
        
        # Parameter Initialization - Random starting point in parameter space
        initial_weights = np.random.uniform(0, 2 * np.pi, num_params)
        
        # Optimizer Configuration - Ensure COBYLA compliance
        # COBYLA requires minimum num_parameters + 2 function evaluations
        min_required = num_params + 2
        effective_maxiter = max(self.maxiter, min_required, 10)
        
        self.log(f"🎓 Training Configuration:")
        self.log(f"   • Samples: {len(X_train)} training examples")
        self.log(f"   • Parameters: {num_params} variational weights")
        self.log(f"   • Max Iterations: {effective_maxiter} (COBYLA compliant)")
        
        # Variational Optimization - Find optimal circuit parameters
        self.log("🔄 Starting COBYLA optimization...")
        opt_result = minimize(
            fun=lambda w: self._objective_function(w, X_train, y_train),
            x0=initial_weights,
            method='COBYLA',
            options={'maxiter': effective_maxiter}
        )
        
        # Store optimized parameters for inference
        self.optimal_weights = opt_result.x
        
        self.log("✅ Training phase completed")
        return opt_result.success

    def predict(self, X_test):
        """
        Generate predictions using trained quantum classifier.
        
        Parameters:
        -----------
        X_test : np.ndarray, shape (n_samples, n_features)
            Test feature vectors for classification
            
        Returns:
        --------
        np.ndarray, shape (n_samples,)
            Binary predictions (0 or 1) for test samples
            
        Raises:
        -------
        RuntimeError
            If model has not been trained (optimal_weights is None)
        """
        # Validation check for trained model
        if self.optimal_weights is None:
            raise RuntimeError("Model must be trained before prediction. Call train() first.")
        
        # Prepare inference circuits with optimized parameters
        pubs = [
            (self.isa_circuit, np.concatenate((x_i, self.optimal_weights))) 
            for x_i in X_test
        ]
        
        self.log(f"🔮 Inference: {len(pubs)} circuits ({self.shots} shots each)")
        
        # Execute quantum inference on hardware
        job = self.sampler.run(pubs, shots=self.shots)
        result = job.result()
        
        # Convert quantum measurements to binary predictions
        probabilities = []
        for pub_result in result:
            counts = pub_result.data.meas.get_counts()
            prob_1 = counts.get('1', 0) / self.shots
            probabilities.append(prob_1)
        
        # Apply decision threshold (0.5) for binary classification
        predictions = (np.array(probabilities) > 0.5).astype(int)
        
        self.log(f"✅ Generated predictions for {len(predictions)} samples")
        return predictions


# ============================================================================
# DATA GENERATION FUNCTIONS FOR RESEARCH DIRECTIONS
# ============================================================================

def generate_data_direction1(num_samples=24):
    """
    Direction 1: Quantum Thematic Correlator
    
    Generates synthetic documents for multi-concept relevance assessment.
    A document is relevant if it has high similarity to BOTH concepts simultaneously.
    This tests quantum superposition advantage in handling concept correlations.
    
    Parameters:
    -----------
    num_samples : int, default=24
        Number of document samples to generate (increased to avoid stratification errors)
        
    Returns:
    --------
    tuple (np.ndarray, np.ndarray)
        Feature matrix (concept similarities) and relevance labels
    """
    # Ensure balanced class distribution by generating until both classes present
    max_attempts = 10
    for attempt in range(max_attempts):
        docs, labels = [], []
        
        for _ in range(num_samples):
            # Generate random similarity scores to two different concepts
            sim_concept_A = np.random.rand()  # Similarity to first concept
            sim_concept_B = np.random.rand()  # Similarity to second concept
            
            # Relevance rule: High similarity to BOTH concepts required
            # This creates non-linear decision boundaries ideal for quantum processing
            label = 1 if (sim_concept_A > 0.6 and sim_concept_B > 0.6) else 0
            
            docs.append({
                'sim_concept_A': sim_concept_A, 
                'sim_concept_B': sim_concept_B
            })
            labels.append(label)
        
        # Check if both classes are represented
        if len(np.unique(labels)) >= 2:
            break
    
    # Convert to numpy arrays for scikit-learn compatibility
    return np.array([list(d.values()) for d in docs]), np.array(labels)


def generate_data_direction2(num_samples=24):
    """
    Direction 2: Quantum Ambiguity Resolution
    
    Generates documents with context-dependent relevance patterns.
    Tests quantum coherence in maintaining multiple interpretation states
    until context collapse determines final relevance.
    
    Parameters:
    -----------
    num_samples : int, default=24
        Number of document samples to generate (increased for stability)
        
    Returns:
    --------
    tuple (np.ndarray, np.ndarray)
        Feature matrix (context indicators) and relevance labels
    """
    # Ensure balanced class distribution
    max_attempts = 10
    for attempt in range(max_attempts):
        docs, labels = [], []
        
        for _ in range(num_samples):
            # Binary context indicator (presence/absence of disambiguating context)
            has_correct_context = np.random.choice([0, 1])
            
            # Direct context-relevance mapping
            # This tests quantum advantage in context-sensitive processing
            label = has_correct_context
            
            docs.append({
                'context_score': has_correct_context,
                'penalty_score': 1 - has_correct_context  # Inverse relationship
            })
            labels.append(label)
        
        # Check if both classes are represented
        if len(np.unique(labels)) >= 2:
            break
    
    return np.array([list(d.values()) for d in docs]), np.array(labels)


def generate_data_direction3(num_samples=24):
    """
    Direction 3: Quantum Structural Analyzer
    
    Generates documents with complex structural patterns requiring non-linear
    analysis. Tests quantum advantage in processing multi-modal structural features
    with XOR-like decision boundaries.
    
    Parameters:
    -----------
    num_samples : int, default=24
        Number of document samples to generate (increased for reliability)
        
    Returns:
    --------
    tuple (np.ndarray, np.ndarray)
        Feature matrix (structural indicators) and relevance labels
    """
    # Ensure balanced class distribution with retry mechanism
    max_attempts = 10
    for attempt in range(max_attempts):
        docs, labels = [], []
        
        for _ in range(num_samples):
            # Structural feature extraction (normalized 0-1)
            sentiment_arc = np.random.rand()     # Document emotional progression
            narrative_flow = np.random.rand()    # Logical structure coherence
            
            # Non-linear relevance rule: Both very high OR both very low
            # This XOR-like pattern is naturally suited to quantum interference
            label = 1 if (
                (sentiment_arc > 0.7 and narrative_flow > 0.7) or 
                (sentiment_arc < 0.2 and narrative_flow < 0.2)
            ) else 0
            
            docs.append({
                'sentiment_arc': sentiment_arc,
                'narrative_flow': narrative_flow
            })
            labels.append(label)
        
        # Check if both classes are represented
        if len(np.unique(labels)) >= 2:
            break
    
    return np.array([list(d.values()) for d in docs]), np.array(labels)


# ============================================================================
# MAIN EXPERIMENTAL EXECUTION FRAMEWORK
# ============================================================================

if __name__ == "__main__":
    # ========================================================================
    # EXPERIMENT HEADER AND CONFIGURATION
    # ========================================================================
    
    print("🚀 QUANTUM-ENHANCED RAG RESEARCH FRAMEWORK")
    print("=" * 70)
    print("🎯 Objective: Assess quantum advantage in information retrieval tasks")
    print("⚡ Platform: IBM Quantum (Free Trial Optimized)")
    print("🕒 Time Limit: <10 minutes total execution")
    print("🔬 Directions: 3 core RAG system components")
    print("=" * 70)
    
    # Initialize quantum processing system with optimized parameters
    quantum_model = QuantumResearchModelUltraFast(
        shots=256,      # Speed-accuracy balance for free trial
        maxiter=10      # COBYLA-compliant minimum iterations
    )
    
    # Establish quantum hardware connection
    quantum_model.connect()
    
    # ========================================================================
    # RESEARCH DIRECTION DEFINITIONS
    # ========================================================================
    
    research_directions = [
        {
            'name': 'Thematic Correlator',
            'generator': generate_data_direction1,
            'description': 'Multi-concept correlation analysis with quantum superposition'
        },
        {
            'name': 'Ambiguity Resolution', 
            'generator': generate_data_direction2,
            'description': 'Context-dependent disambiguation via quantum coherence'
        },
        {
            'name': 'Structural Analyzer',
            'generator': generate_data_direction3, 
            'description': 'Non-linear structural pattern recognition using quantum interference'
        }
    ]
    
    # ========================================================================
    # EXPERIMENTAL EXECUTION LOOP
    # ========================================================================
    
    experimental_results = {}
    total_experiment_start = time.time()
    
    for direction_idx, direction in enumerate(research_directions, 1):
        # Direction initialization
        print(f"\n📊 DIRECTION {direction_idx}/3: {direction['name'].upper()}")
        print("-" * 60)
        print(f"🎯 Hypothesis: {direction['description']}")
        
        direction_start_time = time.time()
        
        try:
            # ================================================================
            # DATA PREPARATION PHASE
            # ================================================================
            
            # Generate synthetic dataset for current research direction
            X, y = direction['generator'](24)  # Increased sample size
            
            # Split into training and testing sets WITHOUT stratification
            # This avoids the "least populated class" error for small datasets
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, 
                test_size=0.5, 
                random_state=42
                # Removed stratify=y to prevent class imbalance errors
            )
            
            print(f"📈 Dataset Configuration:")
            print(f"   • Training samples: {len(X_train)}")
            print(f"   • Testing samples: {len(X_test)}")
            print(f"   • Feature dimensions: {X_train.shape[1]}")
            print(f"   • Train class distribution: {np.bincount(y_train)}")
            print(f"   • Test class distribution: {np.bincount(y_test)}")
            
            # ================================================================
            # QUANTUM TRAINING PHASE
            # ================================================================
            
            print(f"\n🎓 Initiating quantum training phase...")
            training_success = quantum_model.train(X_train, y_train)
            
            # ================================================================
            # QUANTUM INFERENCE PHASE
            # ================================================================
            
            print(f"\n🔮 Executing quantum inference...")
            y_pred = quantum_model.predict(X_test)
            
            # ================================================================
            # PERFORMANCE EVALUATION
            # ================================================================
            
            # Calculate classification accuracy
            accuracy = accuracy_score(y_test, y_pred)
            
            # Measure execution time for this direction
            direction_end_time = time.time()
            direction_duration = direction_end_time - direction_start_time
            
            # Store comprehensive results
            experimental_results[direction['name']] = {
                'accuracy': accuracy,
                'execution_time': direction_duration,
                'training_success': training_success,
                'predictions': y_pred.tolist(),
                'true_labels': y_test.tolist()
            }
            
            # Report direction results
            print(f"\n🎯 DIRECTION {direction_idx} RESULTS:")
            print(f"   • Quantum Accuracy: {accuracy:.1%}")
            print(f"   • Execution Time: {direction_duration:.1f} seconds")
            print(f"   • Training Convergence: {'✅ Success' if training_success else '⚠️  Partial'}")
            
        except Exception as experimental_error:
            # Error handling and logging
            print(f"❌ Direction {direction_idx} Error: {experimental_error}")
            experimental_results[direction['name']] = {
                'accuracy': None,
                'execution_time': 0,
                'training_success': False,
                'error': str(experimental_error)
            }
    
    # ========================================================================
    # COMPREHENSIVE RESULTS ANALYSIS
    # ========================================================================
    
    total_experiment_end = time.time()
    total_execution_time = total_experiment_end - total_experiment_start
    
    print(f"\n📊 QUANTUM EXPERIMENT COMPREHENSIVE SUMMARY")
    print("=" * 70)
    
    # Execution Time Analysis
    print(f"⏱️  TEMPORAL ANALYSIS:")
    print(f"   • Total Execution: {total_execution_time:.1f} seconds ({total_execution_time/60:.2f} minutes)")
    print(f"   • Free Trial Usage: {total_execution_time/60:.1f} minutes of 10-minute allocation")
    
    # Performance Results Summary
    print(f"\n🎯 PERFORMANCE RESULTS:")
    successful_experiments = 0
    total_quantum_accuracy = 0
    
    for direction_name, results in experimental_results.items():
        if results['accuracy'] is not None:
            status_icon = "✅" if results['training_success'] else "⚠️"
            print(f"   {status_icon} {direction_name:<20}: {results['accuracy']:.1%} "
                  f"({results['execution_time']:.1f}s)")
            successful_experiments += 1
            total_quantum_accuracy += results['accuracy']
        else:
            print(f"   ❌ {direction_name:<20}: Execution Failed")
    
    # Statistical Summary
    if successful_experiments > 0:
        average_accuracy = total_quantum_accuracy / successful_experiments
        print(f"\n📈 STATISTICAL ANALYSIS:")
        print(f"   • Successful Directions: {successful_experiments}/3")
        print(f"   • Average Quantum Accuracy: {average_accuracy:.1%}")
    
    # Resource Utilization Assessment
    print(f"\n💰 RESOURCE UTILIZATION:")
    if total_execution_time < 600:  # 10 minutes
        remaining_time = 600 - total_execution_time
        efficiency = (successful_experiments / 3) * 100
        print(f"   • Time Remaining: {remaining_time:.0f} seconds ({remaining_time/60:.1f} minutes)")
        print(f"   • Execution Efficiency: {efficiency:.0f}%")
        print(f"   • Status: ✅ Within Free Trial Limits")
        
        if successful_experiments == 3:
            print(f"   • Achievement: 🎉 All directions completed successfully!")
    else:
        overrun = total_execution_time - 600
        print(f"   • Time Overrun: {overrun:.1f} seconds ({overrun/60:.1f} minutes)")
        print(f"   • Status: ⚠️ Exceeded Free Trial Allocation")
    
    # Research Conclusions and Next Steps
    print(f"\n🔬 RESEARCH INSIGHTS:")
    print(f"   • Quantum Hardware: Real {quantum_model.backend_name} execution")
    print(f"   • Measurement Regime: {quantum_model.shots} shots per circuit")
    print(f"   • Optimization Method: Variational quantum training")
    print(f"   • Circuit Architecture: ZZ feature maps with Real Amplitudes ansatz")
    
    print(f"\n📋 RECOMMENDED NEXT STEPS:")
    print(f"   1. Compare quantum results against classical baselines")
    print(f"   2. Analyze statistical significance of performance differences")
    print(f"   3. Investigate quantum advantage patterns across directions")
    print(f"   4. Scale experiments with extended quantum runtime allocation")
    
    print(f"\n🏁 QUANTUM-ENHANCED RAG RESEARCH COMPLETED")
    print(f"🚀 Framework ready for quantum advantage analysis!")
    print("=" * 70)


🚀 QUANTUM-ENHANCED RAG RESEARCH FRAMEWORK
🎯 Objective: Assess quantum advantage in information retrieval tasks
⚡ Platform: IBM Quantum (Free Trial Optimized)
🕒 Time Limit: <10 minutes total execution
🔬 Directions: 3 core RAG system components
[00:41:53] ⚡ Initializing IBM Quantum connection...
[00:41:59] ✓ Connected: 127 qubits, ibm_brisbane

📊 DIRECTION 1/3: THEMATIC CORRELATOR
------------------------------------------------------------
🎯 Hypothesis: Multi-concept correlation analysis with quantum superposition
📈 Dataset Configuration:
   • Training samples: 12
   • Testing samples: 12
   • Feature dimensions: 2
   • Train class distribution: [10  2]
   • Test class distribution: [12]

🎓 Initiating quantum training phase...
[00:41:59] 🔧 Building quantum circuit (2 qubits, minimal depth)
[00:41:59] 🚀 Transpiling for hardware compatibility...
[00:41:59] ✓ Circuit compiled: depth=9, parameters=4
[00:41:59] 🎓 Training Configuration:
[00:41:59]    • Samples: 12 training examples
[00:41:59