#### 1. Importing all the libraries

In [4]:
import os
import sys
import shutil
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.collections import LineCollection
from math import ceil, log2

# For classical learner
import torch
import torch.nn as nn
import torch.optim as optim

# For quantum learner
import pennylane as qml
from pennylane import numpy as np

from sklearn import tree, metrics
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier

import pennylane as qml

import time


#### 2. Define the Functions

In [2]:
# Function that create the episode data - sample randomaly
def get_data(episode_size,policy,mode):
    global dataset
    if mode=='train':
        if policy==0:
             dataset=data.sample(n=episode_size)
        else:
            dataset=data
    else:
        dataset = pd.read_csv(location + '/' + file +'_test_int.csv', index_col=0)
    return dataset

In [3]:
# Function that separate the episode data into features and label
def data_separate (dataset):
    global X
    global y    
    X = dataset.iloc[:,0:dataset.shape[1]-1]  # all rows, all the features and no labels
    y = dataset.iloc[:, -1]  # all rows, label only
    return X,y

In [4]:
# Function that split the episode data into train and test
def data_split(X,y):
    global X_train_main
    global X_test_main   
    global y_train
    global y_test  
    from sklearn.model_selection import train_test_split
    X_train_main, X_test_main, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=4)
    return X_train_main, X_test_main, y_train, y_test

In [5]:
# Function that chooses exploration or explotation method
def exploration_explotation(epsilon):
    global exploration 
    if np.random.rand() < epsilon:  
        exploration=1
    else:
        exploration=0    
    return exploration

In [6]:
# Function that returns all available actions in the state given as an argument: 
def available_actions(number_of_columns,columns,initial_state,current_state,trashold, exploration):
    global exclude
    global all_columns
#    exclude=[]
    all_columns=np.arange(number_of_columns+1)
    # remove columns that have been already selected
    exclude=columns.copy()
    # remove the initial_state and the current_state
    exclude.extend([initial_state, current_state])
    available_act = list(set(all_columns)-set(exclude))
    # remove actions that have negetiv Q value
    if exploration==0:
        index = np.where(Q[current_state,available_act] > trashold)[1]
        available_act= [available_act[i] for i in index.tolist()]
    return available_act

In [7]:
def sample_next_action(current_state, Q, available_act, exploration):
    global available_act_q_value
    available_act_q_value = [float(q) for q in np.array(Q[current_state, available_act]).reshape(-1)]
    
    if exploration == 1: 
        # Random selection
        next_action = int(np.random.choice(available_act, 1).item())
    else: 
        # Greedy selection according to max value
        maxQ = max(available_act_q_value)
        count = available_act_q_value.count(maxQ)
        
        if count > 1:
            max_columns = [i for i in range(len(available_act_q_value)) if available_act_q_value[i] == maxQ]
            i = int(np.random.choice(max_columns, 1).item())
        else:
            i = available_act_q_value.index(maxQ)
        
        next_action = available_act[i]  
    
    return next_action


In [8]:
# function that update a list with all selected columns in the episode
def update_columns(action, columns):
    update_columns=columns
    update_columns.append(action)
    return update_columns

In [9]:
def update_X_train_X_test(columns, X_train_main, X_test_main):
    # Ensure columns are valid indices
    valid_columns = [col for col in columns if col < X_train_main.shape[1]]

    if not valid_columns:
        print("⚠️ Warning: No valid columns selected. Returning empty DataFrame.")
        return pd.DataFrame(), pd.DataFrame()

    X_train = X_train_main.iloc[:, valid_columns]
    X_test = X_test_main.iloc[:, valid_columns]
    
    return X_train, X_test

In [None]:
# Function that run the learner and get the error to the current episode columns list
def Learner(X_train, X_test,y_train, y_test):
    global learner
    global y_pred
    if learner_model == 'DT':
        learner = tree.DecisionTreeClassifier()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)
    elif learner_model == 'KNN':
        learner = KNeighborsClassifier(metric='hamming',n_neighbors=5)
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)        
    elif learner_model == 'SVM':
        learner = SVC()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)        
    elif learner_model == 'NB':
        learner = MultinomialNB()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)
    elif learner_model == 'AB':
        learner = AdaBoostClassifier()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)
    elif learner_model == 'GB':
        learner = GradientBoostingClassifier()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)  
    elif learner_model == 'VQC':
        learner = QuantumLearner(num_layers=2)
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)
    elif learner_model == 'ANN':
        learner = ClassicalLearner()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)
    accuracy=metrics.accuracy_score(y_test, y_pred)
    error=1-accuracy
    return error


In [11]:
def q_update(current_state, action, learning_rate, reward):
    # next_state = current action
    max_index = np.where(Q[action,] == np.max(Q[action,]))[0]  # Use [0] instead of [1] for 1D arrays
    
    if max_index.shape[0] > 1:
        # Resolve tie by selecting one randomly
        max_index = int(np.random.choice(max_index, size=1).item())
    else:
        max_index = int(max_index[0])  # Convert the first element to a scalar

    max_value = Q[action, max_index]

    # Update the Q matrix
    if Q[current_state, action] == 1:
        Q[current_state, action] = learning_rate * reward
    else:
        Q[current_state, action] = Q[current_state, action] + learning_rate * (
            reward + (discount_factor * max_value) - Q[current_state, action]
        )


### Experiment mangment

#### 3. Define the parameters 

In [32]:
## for run time ##
N_features=5
N_data=1

#Experiment: 
experiment='test'
number_of_experiment=1

# Dataset parameters #
location = 'Datasets/adult'
outputlocation='Datasets'
file='adult' #adult #diabetic_data #no_show
#np.random.seed(3)

# Q learning parameter # 
learning_rate=0.005
discount_factor = 0.01 #0
epsilon = 0.1

# Learner and episode parameters #
learner_model = 'VQC' #DT #KNN #SVM
episode_size=10
internal_trashold=0
external_trashold=0
filename= file +'_int.csv'

#Experiments folder management: 
#if not os.path.exists('/Experiments'):
#    os.makedirs('/Experiments') 
if not os.path.exists('Experiments/'+ str(experiment)):
    os.makedirs('Experiments/'+ str(experiment))
else:
    shutil.rmtree('Experiments/'+ str(experiment))          #removes all the subdirectories!
    os.makedirs('Experiments/'+ str(experiment))
#writer = pd.ExcelWriter('Experiments/'+ str(experiment) + '/df.xlsx') 

text_file = open('Experiments/'+ str(experiment) +'/parameters.txt', "w")
text_file.write('experiment: ' + str(experiment)+ '\n')
text_file.write('number of experiments: ' + str(number_of_experiment)+ '\n')
text_file.write('file: ' + str(file)+ '\n')
text_file.write('learner model: ' + str(learner_model)+ '\n')
text_file.write('episode size: ' + str(episode_size)+ '\n')
#text_file.write('numbers of epocs: ' + str(epocs)+ '\n')
text_file.write('internal trashold: ' + str(internal_trashold)+ '\n')
text_file.write('external trashold: ' + str(external_trashold)+ '\n')
 
text_file.close()

In [13]:
# Classical Learner based on a simple ANN
class ClassicalLearner(nn.Module):
    def __init__(self, num_layers=2, hidden_size=5):
        super().__init__()
        self.layers = None
        self.sigmoid = nn.Sigmoid()

    def initialize_layers(self, input_size, num_layers, hidden_size=5):
        layers = [input_size] + [hidden_size] * (num_layers - 1) + [1]
        self.layers = nn.ModuleList([nn.Linear(layers[i], layers[i+1], dtype=torch.float64) for i in range(len(layers) - 1)])
    
    def forward(self, x):
        x = x.to(torch.float64)
        for layer in self.layers[:-1]:
            x = torch.relu(layer(x))
        return self.sigmoid(self.layers[-1](x))
    
    def fit(self, X_train, y_train, num_it=50, lr=0.01):
        input_size = X_train.shape[1]
        self.initialize_layers(input_size, num_layers=2)
        optimizer = optim.Adam(self.parameters(), lr=lr)
        y_train = torch.tensor(y_train.values, dtype=torch.float64).reshape(-1, 1)
        
        for epoch in range(num_it):
            optimizer.zero_grad()
            y_pred = self.forward(torch.tensor(X_train.values, dtype=torch.float64)).reshape(-1, 1)
            loss = nn.BCELoss()(y_pred, y_train)
            loss.backward()
            optimizer.step()
        
        return self
    
    def predict(self, X_test):
        with torch.no_grad():
            X_test_tensor = torch.tensor(X_test.values, dtype=torch.float64)
            y_pred = self.forward(X_test_tensor).reshape(-1, 1)
            return (y_pred.numpy().flatten() > 0.5).astype(int)


In [112]:
class QuantumLearner:
    def __init__(self, num_layers=2):
        self.num_layers = num_layers
        self.weights = None  # Weights will be initialized dynamically
        self.bias = None  # Bias will be initialized dynamically
        self.device = None  # Device will be initialized dynamically
        self.circuit = None  # Circuit will be built dynamically

    def _build_circuit(self, num_features):
        """Build the quantum circuit with parameterized weights."""
        self.device = qml.device("default.qubit", wires=num_features)

        @qml.qnode(self.device)
        def circuit(inputs, weights, bias):
            # Encode inputs into the quantum state
            for i in range(num_features):
                qml.RY(inputs[i], wires=i)

            # Apply parameterized layers
            for layer in range(self.num_layers):
                for i in range(num_features):
                    qml.RY(weights[layer, i], wires=i)
                for i in range(num_features - 1):
                    qml.CNOT(wires=[i, i + 1])

            # Apply bias rotation to the first qubit
            qml.RY(bias, wires=0)

            # Measure the expectation value of the first qubit
            return qml.expval(qml.PauliZ(0))

        return circuit

    def fit(self, X, y, num_it=10, learning_rate=0.1):
        """Train the quantum model using gradient descent."""
        # Ensure the input is a numeric numpy array
        X = np.array(X, dtype=float)
        y = np.array(y, dtype=float)

        # Dynamically adjust the number of features
        num_features = X.shape[1]
        self.weights = qml.numpy.tensor(
            np.random.uniform(-np.pi, np.pi, (self.num_layers, num_features)),
            requires_grad=True
        )
        self.bias = qml.numpy.tensor(np.random.uniform(-np.pi, np.pi), requires_grad=True)
        self.circuit = self._build_circuit(num_features)

        opt = qml.GradientDescentOptimizer(stepsize=learning_rate)

        print(f"Weights before training: {self.weights}")
        print(f"Bias before training: {self.bias}")

        for it in range(num_it):
            for i in range(len(X)):
                inputs = X[i]
                target = y[i]

                def cost_fn(weights, bias):
                    prediction = self.circuit(inputs, weights, bias)
                    return (prediction - target) ** 2  # Return a scalar value

                # Perform an optimization step
                self.weights, self.bias = opt.step(cost_fn, self.weights, self.bias)

            # Debugging information
            print(f"Iteration {it + 1}/{num_it}")
            print(f"Weights: {self.weights}")
            print(f"Bias: {self.bias}")

        return self

    def predict(self, X):
        """Predict labels for the given inputs."""
        # Ensure the input is a numeric numpy array
        X = np.array(X, dtype=float)

        predictions = []
        for inputs in X:
            output = self.circuit(inputs, self.weights, self.bias)
            predictions.append(np.sign(output))  # Convert to binary label {-1, 1}
        return np.array(predictions)

In [16]:
class QuantumFeatureSelection:
    def __init__(self, num_features, learning_rate=0.1):
        self.num_features = num_features  # Use one qubit per feature
        self.learning_rate = learning_rate  
        self.device = qml.device("default.qubit", wires=self.num_features)
        self.state = self.initialize_state()  # Initialize the quantum state

    def initialize_state(self):
        """Initialize the quantum state as an equal superposition over all 2^(num_features) basis states."""
        @qml.qnode(self.device)
        def circuit():
            # Create a state vector that is an equal superposition over all basis states.
            init_state = np.ones(2**self.num_features) / np.sqrt(2**self.num_features)
            qml.templates.embeddings.AmplitudeEmbedding(
                init_state, wires=range(self.num_features), normalize=False
            )
            return qml.state()

        # Get the state vector from the circuit and ensure normalization
        state_vector = circuit()
        state_vector = state_vector / np.linalg.norm(state_vector)
        return state_vector

    def measure(self, available_features, return_probabilities=False):
        """Measure based on the marginal probability of each qubit being in state |1>."""
        num_qubits = self.num_features
        marginals = []
        for i in available_features:
            total = 0.0
            for idx in range(2**num_qubits):
                bits = format(idx, f"0{num_qubits}b")
                if bits[i] == '1':
                    total += np.abs(self.state[idx])**2
            marginals.append(total)
        marginals = np.array(marginals, dtype=np.float64)
        marginals /= np.sum(marginals)
       
        if return_probabilities:
            return marginals
        selected_feature = np.random.choice(available_features, p=marginals)
        return selected_feature

    def unitary_update(self, action, reward):
        """Update the quantum state using a reward-driven unitary operation."""
        @qml.qnode(self.device)
        def update_circuit():
            # Embed the current state into the circuit.
            qml.templates.embeddings.AmplitudeEmbedding(
                self.state, wires=range(self.num_features), normalize=False
            )
            # Apply a reward-driven rotation to the selected qubit.
            if reward > 0:
                qml.RY(self.learning_rate * reward, wires=action)
            elif reward < 0:
                qml.RY(-self.learning_rate * abs(reward), wires=action)
            # Apply a Grover-like diffusion operator.
            #for qubit in range(self.num_features):
            #    qml.Hadamard(wires=qubit)
            #    qml.PauliZ(wires=qubit)
            #    qml.Hadamard(wires=qubit)
            return qml.state()

        # Update the quantum state.
        self.state = update_circuit()
        self.state = self.state / np.linalg.norm(self.state)
        print(f"🔍 Quantum state after update: {self.state}")


#### 4. Run all experiments

In [None]:
for e in range(number_of_experiment):
    experiment_path = f'Experiments/{experiment}/{e}'
    if not os.path.exists(experiment_path):
        os.makedirs(experiment_path)
    else:
        shutil.rmtree(experiment_path)  # Removes all the subdirectories!
        os.makedirs(experiment_path)

    print(f'Experiments {e} start')

    ########################## Experiment Setup ##########################
    # Read the data
    data = pd.read_csv(location + '/' + filename, index_col=0)

    ##### for run time - start #####
    import timeit
    start = timeit.default_timer()
    size = int(N_data * len(data.index))
    data = data.sample(n=size)
    data = data.iloc[:, -N_features - 1:]
    ##### for run time - end #####

    # Set the number of iterations:
    iterations = 10 * len(data.index) / episode_size
    # Set the number of columns excluding the class column
    number_of_columns = data.shape[1] - 1
    print(f"Number of columns: {number_of_columns} (exclude class column)")
    # Set the number of episodes
    episodes_number = iterations
    print(f"Number of episodes: {episodes_number}")
    # Initialize matrix Q as a 1-values matrix:
    Q = np.matrix(np.ones([number_of_columns + 1, number_of_columns + 1]))  # Use the last dummy column as initial state
    # Set initial_state to be the last dummy column we have created
    initial_state = number_of_columns
    # Define data frame to save episode policies results
    df = pd.DataFrame(columns=('episode', 'episode_columns', 'policy_columns', 'policy_accuracy_train', 'policy_accuracy_test'))
    print(f"Initial state number: {initial_state} (the last dummy column we have created)")

    ########################## Episode Loop ##########################
    for i in range(int(episodes_number)):
        ########## Beginning of Episode ##########
        # Initialize lists for available actions, episode columns, and policy mode & episode error
        episode_available_act = list(np.arange(number_of_columns))
        episode_columns = []
        policy = 0
        episode_error = 0
        # Initialize the error to 0.5
        episode_last_error = 0.5
        # Initialize current_state to be initial_state
        episode_current_state = initial_state
        # Create the episode data
        episode = get_data(episode_size, policy=0, mode='train')
        # Separate the episode data into features and labels
        X_episode, y_episode = data_separate(episode)
        # Split the data into train and test
        X_train_main_episode, X_test_main_episode, y_train_episode, y_test_episode = data_split(X_episode, y_episode)

        # Set epsilon and learning rate based on the episode index
        if i < episodes_number * 0.25:
            epsilon = 0.9
            learning_rate = 0.09
        elif i < episodes_number * 0.5:
            epsilon = 0.5
            learning_rate = 0.05
        elif i < episodes_number * 0.75:
            epsilon = 0.3
            learning_rate = 0.01
        else:
            epsilon = 0.1
            learning_rate = 0.005

        ########## Q-Learning Start ##########
        # Initialize Quantum RL feature selection
        quantum_rl = QuantumFeatureSelection(num_features=number_of_columns)

        while len(episode_available_act) > 0:
            print(f"\n🟢 Available features: {episode_available_act}")

            # Determine exploration vs. exploitation
            exploration = exploration_explotation(epsilon)

            # Update the available actions list
            episode_available_act = available_actions(
                number_of_columns, episode_columns, initial_state, episode_current_state, internal_trashold, exploration
            )

            # Ensure there are available features
            if not episode_available_act:
                print("❌ No available features left. Terminating episode.")
                break

            # Compute probabilities dynamically using the measure method
            try:
                probabilities = quantum_rl.measure(episode_available_act, return_probabilities=True)
            except ValueError as e:
                print(f"❌ Error during measurement: {e}")
                break

            # Validate available_features
            available_features = [f for f in episode_available_act if f < N_features]
            if not available_features:
                print("❌ No valid available features left. Terminating episode.")
                break

            # Debug probabilities and available features
            print(f"🔎 Probabilities: {probabilities}")
            print(f"🟢 Valid available features: {available_features}")

            # Select next feature using quantum measurement
            episode_action = quantum_rl.measure(available_features)
            print(f"🎯 Selected feature: {episode_action}")

            # Update the selected feature list
            episode_columns = update_columns(episode_action, episode_columns)
            print(f"📌 Updated selected features: {episode_columns}")

            # Prepare training dataset with selected features
            print(f"0 Selected episode columns: {episode_columns}")

            X_train_episode, X_test_episode = update_X_train_X_test(episode_columns, X_train_main_episode, X_test_main_episode)

            # Ensure the data is numeric
            X_train_episode = np.array(X_train_episode, dtype=float)
            X_test_episode = np.array(X_test_episode, dtype=float)
            y_train_episode = np.array(y_train_episode, dtype=float)
            y_test_episode = np.array(y_test_episode, dtype=float)

            print(f"📊 Training set shape: {X_train_episode.shape}, Test set shape: {X_test_episode.shape}")

            # Evaluate the model accuracy with the selected features
            print(f"⏳ Running Learner...", learner_model, X_train_episode, X_test_episode, y_train_episode, y_test_episode)
            episode_error = Learner(X_train_episode, X_test_episode, y_train_episode, y_test_episode)
            print(f"📉 Model error after selection: {episode_error}")

            # Compute reward based on improvement
            episode_reward = episode_last_error - episode_error
            print(f"🏆 Computed Reward: {episode_reward}")

            # Quantum RL update step
            print(f"Quantum state before update: {quantum_rl.state}")
            quantum_rl.unitary_update(episode_action, episode_reward)
            print(f"🔄 Updated Quantum State: {quantum_rl.state}")

            q_update(episode_current_state,episode_action,learning_rate, episode_reward)

            # Move to next state (selected feature)
            episode_current_state = episode_action
            episode_last_error = episode_error

        print("Q-learning End.")
        ########## Q-Learning End ##########

        # Save Q matrix periodically
        if i % 100 == 0:
            Q_save = pd.DataFrame(Q)
            Q_save.to_csv(f'{experiment_path}/Q.{i + 1}.csv')

        ########## End of Episode ##########
            
        print("Calculating policy...")
        # Calculate policy 
        policy_available_actions=list(np.arange(number_of_columns))
        policy_columns=[]
        policy_current_state=initial_state
        while len(policy_available_actions)>0:
            # Get available actions in the current state
            policy_available_actions = available_actions(number_of_columns,policy_columns,initial_state,policy_current_state, external_trashold, exploration=0)
            # # Sample next action to be performed
            if len(policy_available_actions)>0:
                policy_select_action = sample_next_action(policy_current_state, Q, policy_available_actions, exploration=0)
                # Update the episode_columns
                policy_columns=update_columns(policy_select_action,policy_columns)
                policy_current_state=policy_select_action

        print("Calculating policy_accuracy...")
        # Calculate policy_accuracy    
        if len(policy_columns) > 0:
            ## For training dataset ##
            policy_data = get_data(episode_size, policy=1, mode='train')
            X_policy, y_policy = data_separate(policy_data)
            X_train_main_policy, X_test_main_policy, y_train_policy, y_test_policy = data_split(X, y)
            print(f"1 Selected episode columns: {episode_columns}")
            X_train_policy, X_test_policy = update_X_train_X_test(policy_columns, X_train_main_policy, X_test_main_policy)
            print(f"1 X_train_episode shape: {X_train_episode.shape}, X_test_episode shape: {X_test_episode.shape}")

            # Print dataset shape before training to check if it's too large
            print(f"📊 Policy dataset shape: Train={X_train_policy.shape}, Test={X_test_policy.shape}")

            # Time the execution of the Learner function
            print("⏳ Running Learner...", learner_model, X_train_policy, X_test_policy, y_train_policy, y_test_policy)
            start_time = time.time()
            policy_error = Learner(X_train_policy, X_test_policy, y_train_policy, y_test_policy)
            end_time = time.time()

            # Print execution time
            print(f"✅ Learner execution time: {end_time - start_time:.2f} seconds")

            # Debugging y_pred
            print(f"🔍 Predictions (y_pred): {y_train_policy}")

            # Ensure predictions are valid
            if y_train_policy is None or len(y_train_policy) == 0:
                print("❌ Error: y_pred is empty or invalid.")
                policy_accuracy_train = 0
            else:
                policy_accuracy_train = 1 - policy_error

            ## For testing dataset ##
            policy_data = get_data(episode_size, policy=1, mode='test')
            X_policy, y_policy = data_separate(policy_data)
            X_train_main_policy, X_test_main_policy, y_train_policy, y_test_policy = data_split(X, y)
            X_train_policy, X_test_policy = update_X_train_X_test(policy_columns, X_train_main_policy, X_test_main_policy)

            if X_train_episode.size == 0 or X_test_episode.size == 0:
                print("❌ Error: X_train_episode or X_test_episode is empty. Skipping Learner call.")
                continue
            print(f"2 Selected episode columns: {episode_columns}")
            policy_error = Learner(X_train_policy, X_test_policy, y_train_policy, y_test_policy)
            print(f"2 X_train_episode shape: {X_train_episode.shape}, X_test_episode shape: {X_test_episode.shape}")

            # Debugging y_pred for testing
            print(f"🔍 Test Predictions (y_pred): {y_test_policy}")

            # Ensure predictions are valid
            if y_test_policy is None or len(y_test_policy) == 0:
                print("❌ Error: y_pred is empty or invalid.")
                policy_accuracy_test = 0
            else:
                policy_accuracy_test = 1 - policy_error
        else:
            policy_accuracy_train = 0
            policy_accuracy_test = 0
        #df=df.append({'episode':str(i+1), 'episode_columns':str(episode_columns),'policy_columns':str(policy_columns),'policy_accuracy_train':policy_accuracy_train,'policy_accuracy_test':policy_accuracy_test}, ignore_index=True)
        #new_row = pd.DataFrame([{'episode': str(i+1),
        #                  'episode_columns': str(episode_columns),
        #                  'policy_columns': str(policy_columns),
        #                  'policy_accuracy_train': policy_accuracy_train,
        #                  'policy_accuracy_test': policy_accuracy_test}])
        #df = pd.concat([df, new_row], ignore_index=True)
        df.loc[len(df)] = {
            'episode': str(i+1),
            'episode_columns': str(episode_columns),
            'policy_columns': str(policy_columns),
            'policy_accuracy_train': policy_accuracy_train,
            'policy_accuracy_test': policy_accuracy_test
        }

        #Prints
        print ("episode "+ str(i+1) +" start") 
        print ("episode columns: "+ str(episode_columns) + " epsilon: " + str(epsilon) + " learning rate: " + str(learning_rate) + " error: " +str(episode_error))
        print ("episode policy:" + str(policy_columns) + " train accuracy: " + str(policy_accuracy_train)  + " test accuracy: " +str(policy_accuracy_test)) 
        print ("episode "+ str(i+1) +" end") 
    ########## End of episode  ############
    #df.to_excel(writer, 'Experiment' + str(e))
    #df.to_excel(writer, sheet_name='Experiment' + str(e))
    df_plot=df[['episode','policy_accuracy_train','policy_accuracy_test']]
    plot=df_plot.plot()
    fig = plot.get_figure()
    fig.savefig('Experiments/'+ str(experiment) + '/plot_experiment_' + str(e) +'.png')
    
#writer.save()
#with pd.ExcelWriter('Experiments/'+ str(experiment) + '/df.xlsx') as writer:
#    df.to_excel(writer, sheet_name='Experiment' + str(e))

## for run time ##
stop = timeit.default_timer()
print (stop - start)
## for run time ##

Experiments 0 start
Number of columns: 5 (exclude class column)
Number of episodes: 24430.0
Initial state number: 5 (the last dummy column we have created)

🟢 Available features: [np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4)]
🔎 Probabilities: [0.2 0.2 0.2 0.2 0.2]
🟢 Valid available features: [np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4)]
🎯 Selected feature: 4
📌 Updated selected features: [np.int64(4)]
0 Selected episode columns: [np.int64(4)]
📊 Training set shape: (8, 1), Test set shape: (2, 1)
⏳ Running Learner... VQC [[38.]
 [ 8.]
 [38.]
 [38.]
 [38.]
 [38.]
 [38.]
 [38.]] [[38.]
 [38.]] [0. 0. 1. 1. 0. 0. 0. 1.] [1. 0.]
Weights before training: [[-0.63660422]
 [-0.007025  ]]
Bias before training: -1.7943418529245105
Iteration 1/10
Weights: [[-0.23838403]
 [ 0.39119518]]
Bias: -1.3961216723669767
Iteration 2/10
Weights: [[-0.2306359 ]
 [ 0.39894332]]
Bias: -1.388373534195576
Iteration 3/10
Weights: [[-0.23030892]
 [ 0.39927029]]
Bias: -1.38804655

In [81]:
def test_quantum_learner():
    """Test the QuantumLearner class with basic training and prediction."""
    np.random.seed(42)

    # Sample training data (binary classification, {-1, 1} labels)
    X_train = np.array([
        [0.1, 0.2], 
        [0.2, 0.3], 
        [0.3, 0.4], 
        [0.4, 0.5]
    ])
    y_train = np.array([1, -1, 1, -1])  # Binary labels

    # Initialize QuantumLearner
    ql = QuantumLearner(num_layers=2)

    # Test fitting the model
    print("🔍 Testing model training...")
    ql.fit(X_train, y_train, num_it=10, learning_rate=0.1)  # Train the model
    print("✅ Model trained successfully.")

    # Ensure the weights and bias are updated
    assert ql.weights is not None, "Weights were not initialized."
    assert ql.bias is not None, "Bias was not initialized."
    print(f"Weights after training: {ql.weights}")
    print(f"Bias after training: {ql.bias}")

    # Test prediction on new samples
    X_test = np.array([
        [0.15, 0.25],
        [0.35, 0.45]
    ])
    print("🔍 Testing model prediction...")
    predictions = ql.predict(X_test)
    print(f"Predictions: {predictions}")

    # Ensure output shape matches input shape
    assert predictions.shape == (X_test.shape[0],), "Prediction output shape mismatch."

    # Ensure predictions are valid (binary labels {-1, 1})
    assert all(pred in [-1, 1] for pred in predictions), "Predictions are not binary labels."

    print("✅ All tests passed successfully!")

# Run the test
test_quantum_learner()

🔍 Testing model training...
✅ Model trained successfully.
Weights after training: [[-0.35122669  2.87076592]
 [ 1.76817711  0.29060434]]
Bias after training: -2.2389734380856288
🔍 Testing model prediction...
Predictions: [-1. -1.]
✅ All tests passed successfully!


In [82]:
def test_quantum_learner_multiple_episodes():
    """Test the QuantumLearner class with multiple episodes to observe improvement."""
    np.random.seed(42)

    # Sample training data (binary classification, {-1, 1} labels)
    X_train = np.array([
        [0.1, 0.2], 
        [0.2, 0.3], 
        [0.3, 0.4], 
        [0.4, 0.5]
    ])
    y_train = np.array([1, -1, 1, -1])  # Binary labels

    # Initialize QuantumLearner
    ql = QuantumLearner(num_layers=2)

    # Parameters for multiple episodes
    num_episodes = 10
    epsilon = 0.9  # Initial exploration rate
    epsilon_decay = 0.1  # Decay rate for epsilon
    learning_rate = 0.1  # Reduced learning rate
    rewards = []

    for episode in range(num_episodes):
        print(f"\n🔄 Episode {episode + 1} start")

        # Simulate feature selection process
        selected_features = []
        available_features = list(range(X_train.shape[1]))
        last_error = 0.5  # Initialize error

        while available_features:
            # Exploration vs. exploitation
            explore = np.random.rand() < epsilon
            if explore:
                action = np.random.choice(available_features)  # Randomly select a feature
                print(f"🧭 Exploration: Selected feature {action}")
            else:
                action = available_features[0]  # Exploit (select the first available feature)
                print(f"📈 Exploitation: Selected feature {action}")

            # Update selected features and remove from available features
            selected_features.append(action)
            available_features.remove(action)

            # Simulate training with selected features
            X_train_selected = X_train[:, selected_features]

            # Pad the input to match the expected number of features
            num_features = len(selected_features)
            X_train_padded = np.zeros((X_train_selected.shape[0], num_features))
            X_train_padded[:, :len(selected_features)] = X_train_selected

            # Train the learner
            ql.fit(X_train_padded, y_train, num_it=50, learning_rate=learning_rate)
            predictions = ql.predict(X_train_padded)
            print(f"Predictions: {predictions}")

            # Validate predictions
            assert predictions.shape == (X_train_padded.shape[0],), "Prediction output shape mismatch."
            assert all(pred in [-1, 1] for pred in predictions), "Predictions are not binary labels."

            # Calculate error and reward
            error = 1 - np.mean(predictions == y_train)
            reward = last_error - error
            print(f"Error: {error}, Reward: {reward}")

            # Append reward and update last error
            rewards.append(reward)
            last_error = error

            print(f"🎯 Features: {selected_features}, Error: {error:.4f}, Reward: {reward:.4f}")

        # Decay epsilon to reduce exploration over time
        epsilon = max(0.1, epsilon - epsilon_decay)
        print(f"🔽 Epsilon after decay: {epsilon:.4f}")

        print(f"✅ Episode {episode + 1} end")

    # Check if rewards are not constant
    assert len(rewards) > 0, "No rewards were calculated."
    assert len(set(rewards)) > 1, "Rewards are constant over episodes."

    print("\n✅ All tests passed successfully!")

# Run the test
test_quantum_learner_multiple_episodes()


🔄 Episode 1 start
🧭 Exploration: Selected feature 0
Predictions: [-1. -1. -1. -1.]
Error: 0.5, Reward: 0.0
🎯 Features: [np.int64(0)], Error: 0.5000, Reward: 0.0000
🧭 Exploration: Selected feature 1
Predictions: [ 1. -1. -1. -1.]
Error: 0.25, Reward: 0.25
🎯 Features: [np.int64(0), np.int64(1)], Error: 0.2500, Reward: 0.2500
🔽 Epsilon after decay: 0.8000
✅ Episode 1 end

🔄 Episode 2 start
🧭 Exploration: Selected feature 1
Predictions: [-1. -1. -1. -1.]
Error: 0.5, Reward: 0.0
🎯 Features: [np.int64(1)], Error: 0.5000, Reward: 0.0000
🧭 Exploration: Selected feature 0
Predictions: [-1. -1. -1. -1.]
Error: 0.5, Reward: 0.0
🎯 Features: [np.int64(1), np.int64(0)], Error: 0.5000, Reward: 0.0000
🔽 Epsilon after decay: 0.7000
✅ Episode 2 end

🔄 Episode 3 start
🧭 Exploration: Selected feature 0
Predictions: [-1. -1. -1. -1.]
Error: 0.5, Reward: 0.0
🎯 Features: [np.int64(0)], Error: 0.5000, Reward: 0.0000
🧭 Exploration: Selected feature 1
Predictions: [ 1. -1. -1. -1.]
Error: 0.25, Reward: 0.25
🎯 

In [38]:
def test_unitary_update():
    """Test the unitary_update method of QuantumFeatureSelection."""
    num_features = 4
    learning_rate = 0.1
    qfs = QuantumFeatureSelection(num_features, learning_rate)

    # Test initialization
    print("Testing initialization...")
    initial_state = qfs.state.copy()
    assert initial_state.shape == (2**num_features,), "State shape is incorrect."
    assert np.isclose(np.sum(np.abs(initial_state)**2), 1.0), "State is not normalized."
    print("✅ Initialization test passed.")

    # Test unitary update with positive reward
    print("Testing unitary update with positive reward...")
    action = 1  # Example action
    reward = 1  # Positive reward
    qfs.unitary_update(action, reward)
    updated_state = qfs.state.copy()
    assert np.isclose(np.sum(np.abs(updated_state)**2), 1.0), "State is not normalized after update."
    assert not np.allclose(updated_state, initial_state), "State did not change after positive reward update."
    print("✅ Unitary update test (positive reward) passed.")

    # Test unitary update with negative reward
    print("Testing unitary update with negative reward...")
    initial_state = qfs.state.copy()
    reward = -1  # Negative reward
    qfs.unitary_update(action, reward)
    updated_state = qfs.state.copy()
    assert np.isclose(np.sum(np.abs(updated_state)**2), 1.0), "State is not normalized after negative reward update."
    assert not np.allclose(updated_state, initial_state), "State did not change after negative reward update."
    print("✅ Unitary update test (negative reward) passed.")

    # Debugging probabilities after updates
    print("Testing probabilities after updates...")
    available_features = [0, 1, 2, 3]
    probabilities = qfs.measure(available_features, return_probabilities=True)
    print(f"🔎 Probabilities after updates: {probabilities}")
    assert np.isclose(np.sum(probabilities), 1.0), "Probabilities are not normalized."
    print("✅ Probability test passed.")

    print("\n🎉 All unitary_update tests passed successfully!")

# Run the test
test_unitary_update()

Testing initialization...
✅ Initialization test passed.
Testing unitary update with positive reward...
🔍 Quantum state after update: [0.23719277+0.j 0.23719277+0.j 0.23719277+0.j 0.23719277+0.j
 0.26218236+0.j 0.26218236+0.j 0.26218236+0.j 0.26218236+0.j
 0.23719277+0.j 0.23719277+0.j 0.23719277+0.j 0.23719277+0.j
 0.26218236+0.j 0.26218236+0.j 0.26218236+0.j 0.26218236+0.j]
✅ Unitary update test (positive reward) passed.
Testing unitary update with negative reward...
🔍 Quantum state after update: [0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j
 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j 0.25+0.j]
✅ Unitary update test (negative reward) passed.
Testing probabilities after updates...
🔎 Probabilities after updates: [0.25 0.25 0.25 0.25]
✅ Probability test passed.

🎉 All unitary_update tests passed successfully!


In [5]:
def test_quantum_feature_selection():
    """Test the QuantumFeatureSelection class with quantum operations."""
    num_features = 5
    learning_rate = 0.1
    qfs = QuantumFeatureSelection(num_features, learning_rate)

    # Test initialization
    print("Testing initialization...")
    expected_state_size = 2**qfs.num_qubits  # Use num_qubits instead of num_features
    assert qfs.state.shape == (expected_state_size,), "State shape is incorrect."
    assert np.isclose(np.sum(np.abs(qfs.state)**2), 1.0), "State is not normalized."
    print("✅ Initialization test passed.")

    # Test measurement
    print("Testing measurement...")
    available_features = list(range(num_features))  # Dynamically set based on num_features
    print("Available features:", available_features)
    for _ in range(10):
        action = qfs.measure(available_features)
        assert action in available_features, f"Invalid action selected: {action}"
    print("✅ Measurement test passed.")

    # Test unitary update with positive reward
    print("Testing unitary update with positive reward...")
    initial_state = qfs.state.copy()
    action = 1  # Example action
    qfs.unitary_update(action, reward=1)
    updated_state = qfs.state.copy()
    assert np.isclose(np.sum(np.abs(updated_state)**2), 1.0), "State is not normalized after update."
    assert not np.allclose(updated_state, initial_state), "State did not change after positive reward update."
    print("✅ Unitary update test (positive reward) passed.")

    # Test probabilities after update
    print("Testing probabilities after update...")
    probabilities = qfs.measure(available_features, return_probabilities=True)
    print(f"🔎 Probabilities after update: {probabilities}")
    assert np.isclose(np.sum(probabilities), 1.0), "Probabilities are not normalized."
    assert probabilities[action] > 0.25, "Probability of the rewarded action did not increase."
    print("✅ Probability test passed.")

    print("\n🎉 All tests passed successfully!")

# Run the test
test_quantum_feature_selection()

Testing initialization...
✅ Initialization test passed.
Testing measurement...
Available features: [0, 1, 2, 3, 4]
✅ Measurement test passed.
Testing unitary update with positive reward...
🔍 Quantum state after update: [0.36892948+0.j 0.40779826+0.j 0.33376543+0.j 0.36892948+0.j
 0.33376543+0.j 0.36892948+0.j 0.30195299+0.j 0.33376543+0.j]
✅ Unitary update test (positive reward) passed.
Testing probabilities after update...
🔎 Probabilities after update: [0.2058153  0.25146739 0.16845101 0.2058153  0.16845101]
✅ Probability test passed.

🎉 All tests passed successfully!


In [2]:
class QuantumFeatureSelection:
    def __init__(self, num_features, learning_rate=0.1):
        self.num_features = num_features
        self.learning_rate = learning_rate

        # Dynamically determine the number of qubits needed
        self.num_qubits = ceil(log2(num_features))  # Minimum qubits to encode num_features
        self.device = qml.device("default.qubit", wires=self.num_qubits)

        # Decision register is all qubits (since we only need to encode features)
        self.decision_register = list(range(self.num_qubits))

        # Initialize the quantum state
        self.state = self.initialize_state()

    def initialize_state(self):
        """Initialize the state as an equal superposition over all basis states."""
        @qml.qnode(self.device)
        def circuit():
            init_state = np.ones(2**self.num_qubits) / np.sqrt(2**self.num_qubits)
            qml.templates.embeddings.AmplitudeEmbedding(
                init_state, wires=range(self.num_qubits), normalize=False
            )
            return qml.state()

        state_vector = circuit()
        return state_vector / np.linalg.norm(state_vector)

    def measure(self, available_features, return_probabilities=False):
        """
        Measure the quantum state by computing the probability associated with each action.
        Each action (0,1,2,..., num_features-1) is identified with a particular projection on the decision register.
        """
        # Filter out any available features that are out of bounds.
        valid_available_features = [a for a in available_features if a < self.num_features]
        if not valid_available_features:
            raise ValueError("No valid available features provided to measurement.")

        probabilities = np.abs(self.state)**2
        feature_probs = np.zeros(self.num_features)
        for idx, prob in enumerate(probabilities):
            bits = format(idx, f"0{self.num_qubits}b")
            decision_bits = bits[-len(self.decision_register):]  # Use only decision register bits
            feature = int(decision_bits, 2)
            # Only accumulate if the feature is in the valid available list.
            if feature in valid_available_features:
                feature_probs[feature] += prob

        total = np.sum(feature_probs[valid_available_features])
        feature_probs_normalized = np.array([feature_probs[i] / total for i in valid_available_features])
        
        if return_probabilities:
            return feature_probs_normalized

        selected_feature = np.random.choice(valid_available_features, p=feature_probs_normalized)
        return selected_feature

    def unitary_update(self, action, reward):
        """
        Update the quantum state with a reward-driven unitary operation.
        
        Here we interpret the action as a number in {0,1,...,num_features-1}, which is mapped to a binary string.
        Then we update the decision register by applying RY rotations based on the reward.
        """
        # Convert action (e.g. 1) to a binary string of length num_qubits
        target = format(action, f"0{len(self.decision_register)}b")
        
        @qml.qnode(self.device)
        def update_circuit():
            # Begin by embedding the current state into the circuit.
            qml.templates.embeddings.AmplitudeEmbedding(
                self.state, wires=range(self.num_qubits), normalize=False
            )
            # For each wire in the decision register, apply a reward-driven rotation.
            # If the target bit is '1', we rotate by +learning_rate*reward; if '0', by -learning_rate*reward.
            for i, wire in enumerate(self.decision_register):
                angle = self.learning_rate * reward if target[i] == '1' else -self.learning_rate * reward
                qml.RY(angle, wires=wire)
            return qml.state()
        
        # Update and renormalize the state.
        self.state = update_circuit()
        self.state = self.state / np.linalg.norm(self.state)
        print(f"🔍 Quantum state after update: {self.state}")