In [1]:
# Importing the Qiskit Lab. Drive:
import sys
sys.path.insert(0, '..')
from qiskit import BasicAer
from qiskit.utils import QuantumInstance, algorithm_globals
from qiskit.algorithms.optimizers import COBYLA
from qiskit.algorithms.optimizers import SPSA
from qiskit.circuit.library import TwoLocal, ZZFeatureMap
from qiskit_machine_learning.algorithms import VQC
from qiskit_machine_learning.datasets import ad_hoc_data



In [2]:
# Listing 2.1: Load the data from the csv‐files
import pandas as pd

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
# Listing 2.2: The shapes of the Titanic datasets
print('train has {} rows and {} columns'.format(*train.shape))
print('test has {} rows and {} columns'.format(*test.shape))

train has 891 rows and 12 columns
test has 418 rows and 11 columns


In [4]:
################################################
## 2. Data Preparation and Cleaning
################################################

In [5]:
# Listing 2.6: Cope with missing values
# option 1
# We only have two passengers without it. This is bearable
train = train.dropna(subset=["Embarked"])

# option 2
# We only have very few information about the cabin, let's drop it
train = train.drop("Cabin", axis=1) 

# option 3
# The age misses quite a few times. But intuition
# says it might be important for someone's chance to survive.
mean = train["Age"].mean()
train["Age"] = train["Age"].fillna(mean)

train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 889 entries, 0 to 890
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  889 non-null    int64  
 1   Survived     889 non-null    int64  
 2   Pclass       889 non-null    int64  
 3   Name         889 non-null    object 
 4   Sex          889 non-null    object 
 5   Age          889 non-null    float64
 6   SibSp        889 non-null    int64  
 7   Parch        889 non-null    int64  
 8   Ticket       889 non-null    object 
 9   Fare         889 non-null    float64
 10  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(4)
memory usage: 83.3+ KB


In [6]:
# Listing 2.7: Unique values in columns
print('There are {} different (unique) PassengerIds in the data'
    .format(train["PassengerId"].nunique()))
print('There are {} different (unique) names in the data'
    .format(train["Name"].nunique()))
print('There are {} different (unique) ticket numbers in the data'
    .format(train["Ticket"].nunique()))

There are 889 different (unique) PassengerIds in the data
There are 889 different (unique) names in the data
There are 680 different (unique) ticket numbers in the data


In [7]:
# Listing 2.8: Remove identifying data
train = train.drop("PassengerId", axis=1)
train = train.drop("Name", axis=1) 
train = train.drop("Ticket", axis=1) 

train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 889 entries, 0 to 890
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  889 non-null    int64  
 1   Pclass    889 non-null    int64  
 2   Sex       889 non-null    object 
 3   Age       889 non-null    float64
 4   SibSp     889 non-null    int64  
 5   Parch     889 non-null    int64  
 6   Fare      889 non-null    float64
 7   Embarked  889 non-null    object 
dtypes: float64(2), int64(4), object(2)
memory usage: 62.5+ KB


In [8]:
# Listing 2.9: Transforming textual data into numbers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

for col in ['Sex', 'Embarked']:
    le.fit(train[col])
    train[col] = le.transform(train[col])

train.head()


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2


In [9]:
# Listing 2.11: Normalization of the data.
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train)
train = scaler.transform(train)

print('The minimum value is {} and the maximum value is {}'
    .format(train.min(), train.max()))

The minimum value is 0.0 and the maximum value is 1.0


In [10]:
################################################
## Separating train and test sets
###################################################

In [11]:
# Listing 2.12: Separating input from labels and training from testing sets
from sklearn.model_selection import train_test_split

input_data = train[:, 1:8]
labels = train[:, 0]

train_input, test_input, train_labels, test_labels = train_test_split(
    input_data, labels, test_size =0.2)

print('We have {} training and {} testing rows'.format(train_input.shape[0], test_input.shape[0]))
print('There are {} input columns'.format(train_input.shape[1]))

We have 711 training and 178 testing rows
There are 7 input columns


In [12]:
# Listing 2.13: Save the data to the filesystem
import numpy as np

with open('train.npy', 'wb') as f:
    np.save(f, train_input)
    np.save(f, train_labels)

with open('test.npy', 'wb') as f:
    np.save(f, test_input)
    np.save(f, test_labels)
#CAPTION Save the data to the filesystem

In [13]:
################################################
# 3. Firts Classifiers
## 3.1 A Random Classifier
################################################

In [14]:
# se modificó el formato de las etiquetas, en forma de matrix
labels = np.zeros((train_labels.shape[0],2))
for i in range(train_labels.shape[0]):
    if train_labels[i] == 0:
        labels[i,0] = 1
    else:
        labels[i,1] = 1

train_labels2 = labels

In [15]:
# definicion del clasificar cuántico variacional

 # semilla aleatoria
seed = 1376          
algorithm_globals.random_seed = seed
# dimensiones de cada característica
feature_dim = 7  
training_size = 711
test_size = 178

# mapeo de características usando zzfeaturemap
feature_map = ZZFeatureMap(feature_dimension=feature_dim, reps=2, entanglement="linear")
# Usando la función twolocal para relizar el entrelazamiento en el circuito
ansatz = TwoLocal(feature_map.num_qubits, ['ry', 'rz'], 'cz', reps=3)
# definir el clasificador cuántico variacional 
vqc = VQC(feature_map=feature_map,
      ansatz=ansatz,
      optimizer=SPSA(maxiter=100),  # se utiliza como optimizador SPSA
      quantum_instance=QuantumInstance(BasicAer.get_backend('statevector_simulator'),
                                       shots=1024,
                                       seed_simulator=seed,
                                       seed_transpiler=seed)
      )
# entrenamiento del modelo clasificador
vqc.fit(train_input, train_labels2)


  vqc = VQC(feature_map=feature_map,


<qiskit_machine_learning.algorithms.classifiers.vqc.VQC at 0x7f64b3d10340>

In [16]:
# definicion de la función clasificadora
def predict_quantum(item):
    pred = vqc.predict(item)
    if (pred[0,0] == 1):
        return 0
    else:
        return 1

In [17]:
# Listing 2.15: The classification runner
def run(f_classify, x):
    return list(map(f_classify, x))

In [18]:
################################################
## 5. Giving the Classification Report
################################################

In [19]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# Listing 2.22: The specificity and the npv
def specificity(matrix):
    return matrix[0][0]/(matrix[0][0]+matrix[0][1]) if (matrix[0][0]+matrix[0][1] > 0) else 0

def npv(matrix):
    return matrix[0][0]/(matrix[0][0]+matrix[1][0]) if (matrix[0][0]+matrix[1][0] > 0) else 0


In [20]:
# Listing 2.31: A reusable function to unmask the classifier
def classifier_report(name, run, classify, input, labels):
    cr_predictions = run(classify, input)
    cr_cm = confusion_matrix(labels, cr_predictions)

    cr_precision = precision_score(labels, cr_predictions)
    cr_recall = recall_score(labels, cr_predictions)
    cr_specificity = specificity(cr_cm)
    cr_npv = npv(cr_cm)
    cr_level = 0.25*(cr_precision + cr_recall + cr_specificity + cr_npv)

    print('The precision score of the {} classifier is {:.2f}'
        .format(name, cr_precision))
    print('The recall score of the {} classifier is {:.2f}'
        .format(name, cr_recall))
    print('The specificity score of the {} classifier is {:.2f}'
        .format(name, cr_specificity))
    print('The npv score of the {} classifier is {:.2f}'
        .format(name, cr_npv))
    print('The information level is: {:.2f}'
        .format(cr_level))
    print('Matriz de confusión: ')
    print(cr_cm)


In [21]:
# Listing 2.32: The report of the random classifier
classifier_report(
    "Clasificador cuántico variacional", 
    run,
    predict_quantum,
    train_input,
    train_labels)

The precision score of the Clasificador cuántico variacional classifier is 0.61
The recall score of the Clasificador cuántico variacional classifier is 0.22
The specificity score of the Clasificador cuántico variacional classifier is 0.91
The npv score of the Clasificador cuántico variacional classifier is 0.65
The information level is: 0.60
Matriz de confusión: 
[[399  38]
 [214  60]]


In [22]:
# se modificó el formato de las etiquetas, en forma de matrix
labels = np.zeros((test_labels.shape[0],2))
for i in range(test_labels.shape[0]):
    if test_labels[i] == 0:
        labels[i,0] = 1
    else:
        labels[i,1] = 1

test_labels2 = labels
# se utiliza la métrica de desempeño vqc.score para determinar la exactítud del modelo
score = vqc.score(test_input, test_labels2)
print(f"Testing accuracy: {score:0.2f}")

Testing accuracy: 0.65
