### MANDATORY ASSIGNMENT 2

In [2]:
from sklearn import datasets

In [3]:
import numpy as np

In [4]:
iris = datasets.load_iris()

In [5]:
X = iris.data
Y = iris.target

#### Task 1) data exploration

In [6]:
len(X)

150

In [7]:
print(X.shape, Y.shape)

(150, 4) (150,)


In [8]:
print(np.min(X), np.max(X))
print(np.min(Y), np.max(Y))

0.1 7.9
0 2


In [11]:
#at first we normalize the data from 0 to pi, and then implement angle encoding
from sklearn.preprocessing import MinMaxScaler
from qiskit import QuantumCircuit, transpile, assemble
from qiskit_aer import Aer, AerSimulator
from qiskit.visualization import plot_histogram
from qiskit.circuit import Parameter
from qiskit_algorithms.optimizers import SPSA
import random
from sklearn.metrics import log_loss # loss function
from sklearn.metrics import accuracy_score # accuracy

In [10]:
scaler = MinMaxScaler(feature_range=(0, np.pi))
X = scaler.fit_transform(X)

In [24]:
class QuantumMachineLearning:
    def __init__(self, X_train, y_train, num_qubits = 4, num_layers = 3, training_shots = 100, prediction_shots = 100000):
        self.X_train = X_train
        self.y_train = y_train
        self.num_qubits = num_qubits
        self.num_layers = num_layers
        self.training_shots = training_shots
        self.prediction_shots = prediction_shots #more prediction shots as it is only run once
    
    def angle_encoding(self, qc, sample):
        for qubit in range(len(qc.qubits)):
            qc.rx(sample[qubit], qubit)
    

    def real_amplitudes(self, data_point, parameters):
        qc = QuantumCircuit(self.num_qubits)
        self.angle_encoding(qc, data_point)

        param_index = 0

        for layer in range(self.num_layers):
            for qubit in range(len(qc.qubits)):
                qc.ry(parameters[param_index], qubit)
                param_index += 1
            qc.barrier()
            
            for qubit in range(len(qc.qubits)-1):
                qc.cx(qubit, qubit+1)
            qc.barrier()

        return qc
    
    def data_decoding(self, output):
        return int(output, 2) % 3 
    
    def objective_function(self, updated_params):
        backend = AerSimulator(method = 'statevector')
        predicted_probabilites = []
        for x, y in zip(self.X_train, self.y_train):
            qc = self.real_amplitudes(x, updated_params)
            qc.measure_all()

            tqc = transpile(qc, backend)
            
            job = backend.run(tqc, shots=self.training_shots)
            result = job.result()
            counts = result.get_counts(qc)

            count_classes = {0: 0, 1: 0, 2: 0}
            for output, count in counts.items():
                class_num = self.data_decoding(output)
                count_classes[class_num] += count / self.training_shots
            

            predicted_probabilites.append([count_classes[0], count_classes[1], count_classes[2]])        
            
        
        logloss = log_loss(self.y_train, predicted_probabilites)

        print(f"Parameters: {updated_params} loss: {logloss}")
        return logloss

    def SPSA_optimize(self):
        rng = np.random.default_rng(42)
        initial_parameters = rng.uniform(0, np.pi, self.num_qubits * self.num_layers)
        optimizer = SPSA(maxiter=100)
        # Optimize the parameters
        optimized = optimizer.minimize(fun=self.objective_function, x0=initial_parameters)

        print("Optimized Parameters:", optimized.x)
        print("Minimum Loss:", optimized.fun)
        self.optimized_params = optimized.x
        self.min_loss = optimized.fun
    
    def save_parameters(self):
        with open('optimized_parameters.txt', 'w') as file:
            file.write(str(self.optimized_params))
    
    
    def predict(self, data_point): #paramteres must be optimized before prediction
        qc = self.real_amplitudes(data_point, self.optimized_params)
        qc.measure_all()

        backend = AerSimulator(method = 'statevector')
        tqc = transpile(qc, backend)
        job = backend.run(tqc, shots=self.prediction_shots)
        result = job.result()
        counts = result.get_counts(qc)

        predicted_probabilites = {0: 0, 1: 0, 2: 0}
            
        # Decode each measurement outcome and aggregate probabilites for each class
        for output, count in counts.items():
            class_num = self.data_decoding(output)
            predicted_probabilites[class_num] += count / self.prediction_shots
        
        
        # Determine the predicted class by choosing the class with the highest probability
        predicted_class = max(predicted_probabilites, key=predicted_probabilites.get)
        
        return predicted_class

    def predict_dataset(self, X):
        return [self.predict(data_point) for data_point in X]
    
    def performance(self, y_test, X_test):
        return accuracy_score(y_test, self.predict_dataset(X_test))
    
    
    

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_temp, y_train, y_temp = train_test_split(X, Y, test_size=0.3, random_state=42) # 70% training 
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42) # 15% validation, 15% testing

In [25]:
test = QuantumMachineLearning(X_train, y_train)
test.SPSA_optimize()

Parameters: [2.63145464 1.17877728 2.49736492 1.99084628 0.49586686 2.86500801
 2.5911909  2.66949385 0.20248085 1.61492915 0.96489635 2.71151808] loss: 1.0842891064196707
Parameters: [2.23145464 1.57877728 2.89736492 2.39084628 0.09586686 3.26500801
 2.1911909  2.26949385 0.60248085 1.21492915 1.36489635 3.11151808] loss: 1.1063533781417223
Parameters: [2.63145464 1.17877728 2.89736492 1.99084628 0.49586686 3.26500801
 2.5911909  2.66949385 0.20248085 1.21492915 0.96489635 2.71151808] loss: 1.0139889539639317
Parameters: [2.23145464 1.57877728 2.49736492 2.39084628 0.09586686 2.86500801
 2.1911909  2.26949385 0.60248085 1.61492915 1.36489635 3.11151808] loss: 1.1038922440614394
Parameters: [2.63145464 1.17877728 2.49736492 1.99084628 0.09586686 2.86500801
 2.1911909  2.26949385 0.60248085 1.21492915 1.36489635 3.11151808] loss: 1.1296802811815068
Parameters: [2.23145464 1.57877728 2.89736492 2.39084628 0.49586686 3.26500801
 2.5911909  2.66949385 0.20248085 1.61492915 0.96489635 2.711

In [26]:
test.save_parameters()
test.performance(y_test, X_test)

0.9130434782608695