In [145]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [146]:
train = pd.read_csv(r'C:\Users\kamata3\Work\QML\titanic\train.csv')
test = pd.read_csv(r'C:\Users\kamata3\Work\QML\titanic\test.csv')
print('trian has {} rows and {} columns' .format(*train.shape))
print('Test has {} rows and {}  columns'.format(*test.shape))
train.info()
test.info()

trian has 891 rows and 12 columns
Test has 418 rows and 11  columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       -----------

In [147]:
# data cleaing and preparation
train = train.dropna(subset=['Embarked'])
train = train.drop('Cabin', axis = 1)
mean = train['Age'].mean()
train['Age'] = train['Age'].fillna(mean)

train = train.drop("PassengerId", axis=1)
train = train.drop("Name", axis=1)
train = train.drop("Ticket", axis=1)

# handling text and catogorical data
le = LabelEncoder()
for col in ['Sex', 'Embarked']:
    le.fit(train[col])
    train[col] = le.transform(train[col])
train.head()

#train.info()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2


In [148]:
print('The maximum age is {}'.format(train["Age"].max()))
print('The maximum fare is {}'.format(train["Fare"].max()))

The maximum age is 80.0
The maximum fare is 512.3292


In [149]:
# Normalization
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train)
train = scaler.transform(train)
print('The minimum value is {} and the maximum value is {}'.format(train.min(), train.max()))

The minimum value is 0.0 and the maximum value is 1.0


In [150]:
from sklearn.model_selection import train_test_split
input_data = train[:, 1:8]
labels = train[:, 0]

train_input, test_input, train_labels, test_labels = train_test_split(input_data, labels, test_size = 0.2)
print('We have {} training and {} testing rows'.format(train_input.shape[0], test_input.shape[0]))
print('There are {} input columns'.format(train_input.shape[1]))

We have 711 training and 178 testing rows
There are 7 input columns


In [151]:
# save the data for future use
import numpy as np
with open('titanic/train.npy', 'wb') as f:
    np.save(f, train_input)
    np.save(f, train_labels)
with open('titanic/test.npy', 'wb') as f:
    np.save(f, test_input)
    np.save(f, test_labels)

In [152]:
with open('titanic/train.npy', 'rb') as f:
    train_input = np.load(f)
    train_labels = np.load(f)

with open('titanic/test.npy', 'rb') as f:
    test_input = np.load(f)
    test_labels = np.load(f)

In [153]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score

In [154]:
def specificity(matrix):
    return matrix[0][0]/(matrix[0][0]+matrix[0][1]) if (matrix[0][0]+matrix[0][1] > 0) else 0

def npv(matrix):
    return matrix[0][0]/(matrix[0][0]+matrix[1][0]) if (matrix[0][0]+matrix[1][0] > 0) else 0

def run(f_classify, x):
    return list(map(f_classify, x))

In [155]:
from qiskit import execute, QuantumCircuit, Aer

In [156]:
def pqc_classify(passanger_state, backend):
    qc = QuantumCircuit(1)
    qc.initialize(passanger_state, 0)
    qc.measure_all()
    results = execute(qc, backend).result()
    counts = results.get_counts()
    #print(counts)
    #return counts
    return int(list(map(lambda item: item[0], counts.items()))[0])

In [157]:
def classifier_report(name, run, classify, input, labels):
    cr_prediction = run(classify, input)
    # print(cr_prediction)
    cr_cm =confusion_matrix(labels, cr_prediction)
    cr_precision = precision_score(labels, cr_prediction)
    cr_recall = recall_score(labels, cr_prediction)
    cr_specificity = specificity(cr_cm)
    cr_npv = npv(cr_cm)
    cr_level = 0.25*(cr_precision + cr_recall + cr_specificity + cr_npv)
    print('The precision score of the Random PQC classifier is', cr_precision)
    print('The recall score of the Random PQC classifier is', cr_recall)
    print('The specificity of the Random PQC classifier is', cr_specificity)
    print('The NPV score of the Random PQC classifier is', cr_npv)
    print('The information level Random PQC classifier is', cr_level)

In [158]:
qc = QuantumCircuit(1)
backend = Aer.get_backend('statevector_simulator')
# Specify the quantum state that results in either 0 or 1
initial_state = [1/np.sqrt(2), 1/np.sqrt(2)]

classifier_report("Random PQC",run,lambda passenger: pqc_classify( initial_state, backend),train_input,train_labels)

The precision score of the Random PQC classifier is 0.410958904109589
The recall score of the Random PQC classifier is 0.5376344086021505
The specificity of the Random PQC classifier is 0.5023148148148148
The NPV score of the Random PQC classifier is 0.6271676300578035
The information level Random PQC classifier is 0.5195189393960895


Variational Hybrid Quantum‐Classical Algorithm

In [159]:
def pre_process(passanger):
    quantum_state = [1/np.sqrt(2), 1/np.sqrt(2)]
    return quantum_state

def pqc(backend, quantum_state):
    qc = QuantumCircuit(1)
    qc.initialize(quantum_state, 0)
    qc.measure_all()
    results = execute(qc,backend).result()
    counts = results.get_counts()
    return counts

def post_process(counts):
    # return int(list(lambda item: item[0], counts.item())[0])
    return int(list(map(lambda item: item[0],counts.items()))[0])
    

In [160]:
# Tell Qiskit how to simulate our circuit
backend = Aer.get_backend('statevector_simulator')
classifier_report("Variational",run,lambda passenger: post_process(pqc(backend,
 pre_process(passenger))),train_input,train_labels)

The precision score of the Random PQC classifier is 0.38243626062322944
The recall score of the Random PQC classifier is 0.4838709677419355
The specificity of the Random PQC classifier is 0.49537037037037035
The NPV score of the Random PQC classifier is 0.5977653631284916
The information level Random PQC classifier is 0.4898607404660067


In [161]:
def weigh_feature(feature, weight):
    return feature*weight

from functools import reduce 

def get_overall_probability(features, weights):
    return reduce(lambda result, data: result+weigh_feature(*data), zip(features, weights), 0)


In [162]:
# using correlation as a weights 
from scipy.stats import spearmanr
columns = [list(map(lambda passenger: passenger[i], train_input)) for i
in range(0,7)]
correlations = list(map(lambda col: spearmanr(col, train_labels)[0],columns))
correlations

[-0.31901567204795245,
 -0.5454925198242138,
 -0.06448989253745993,
 0.08703603878695323,
 0.12768580890741563,
 0.31258559525823626,
 -0.18565973139726472]

In [163]:
from math import pi, sin, cos
def get_state (theta):
    return [cos(theta/2), sin(theta/2)]
def pre_process_weighted(passenger):
    mu = get_overall_probability(passenger, correlations)
    quantum_state = get_state((1-mu)*pi)
    return quantum_state

In [171]:
# rerun the PQC with weighted features
backend = Aer.get_backend('statevector_simulator')
classifier_report("Variational",run,lambda passenger: post_process(pqc(backend,
 pre_process_weighted(passenger))),train_input,train_labels)

# test 
print('testing...')
classifier_report("Variational-test",run,lambda passenger: post_process(pqc(backend,
 pre_process_weighted(passenger))),test_input,test_labels)

The precision score of the Random PQC classifier is 0.7035398230088495
The recall score of the Random PQC classifier is 0.5698924731182796
The specificity of the Random PQC classifier is 0.8449074074074074
The NPV score of the Random PQC classifier is 0.7525773195876289
The information level Random PQC classifier is 0.7177292557805414
testing...
The precision score of the Random PQC classifier is 0.6415094339622641
The recall score of the Random PQC classifier is 0.5573770491803278
The specificity of the Random PQC classifier is 0.8376068376068376
The NPV score of the Random PQC classifier is 0.784
The information level Random PQC classifier is 0.7051233301873574
