In [1]:
import pennylane as qml
from pennylane import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv('PS_20174392719_1491204439457_log.csv')
data = data.drop(['oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'], axis=1)

type_encoder = OneHotEncoder(sparse_output=False)
type_encoded = type_encoder.fit_transform(data[['type']])

features = np.hstack((
    data[['amount', 'isFlaggedFraud']].values,
    type_encoded
))


labels = data['isFraud'].values.astype(float)

#normalize
scaler = StandardScaler()
features = scaler.fit_transform(features)

features = np.array(features, requires_grad=False)
labels = np.array(labels, requires_grad=False)


In [3]:
n_qubits = 7  

dev = qml.device('default.qubit', wires=n_qubits)


In [4]:
def qnn_layer(weights):
    for i in range(n_qubits):
        qml.RY(weights[i], wires=i)
    for i in range(n_qubits - 1):
        qml.CNOT(wires=[i, i + 1])


In [5]:
def quantum_neural_network(weights, x):
    #encode the features into quantum states
    for i in range(len(x)):
        qml.RX(x[i], wires=i)
    
    #apply variational layers
    qnn_layer(weights[0])
    qnn_layer(weights[1])

In [6]:
@qml.qnode(dev)
def circuit(weights, x):
    quantum_neural_network(weights, x)
    return qml.expval(qml.PauliZ(0))

In [7]:
#number of layers and parameters
num_layers = 2
weight_shapes = {"weights": (num_layers, n_qubits)}

@qml.qnode(dev)
def circuit(weights, x):
    quantum_neural_network(weights, x)
    return qml.expval(qml.PauliZ(0))


In [8]:
def cost(weights, X, Y):
    predictions = [circuit(weights, x) for x in X]
    predictions = np.array(predictions)
    predictions = (predictions + 1) / 2  #scale predictions to [0, 1]
    loss = np.mean(-(Y * np.log(predictions + 1e-6) + (1 - Y) * np.log(1 - predictions + 1e-6)))
    return loss


In [9]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [10]:
np.random.seed(42)
weights = np.random.uniform(size=(num_layers, n_qubits), requires_grad=True)


In [11]:
opt = qml.GradientDescentOptimizer(stepsize=0.1)


In [None]:
epochs = 50

for epoch in range(epochs):
    weights, loss = opt.step_and_cost(lambda w: cost(w, X_train, y_train), weights)
    print(f"Epoch {epoch}, Loss: {loss}")
#     if epoch % 5 == 0:
        


In [None]:
predictions = [circuit(weights, x) for x in X_test]
predictions = np.array(predictions)
predictions = (predictions + 1) / 2  
predicted_labels = (predictions > 0.5).astype(int)


In [None]:
accuracy = accuracy_score(y_test, predicted_labels)
print(f"\nTest set accuracy: {accuracy:.2f}")


In [None]:
# on subset

In [19]:
epochs = 20

for epoch in range(epochs):
    weights, loss = opt.step_and_cost(lambda w: cost(w, X_train, y_train), weights)
    if epoch % 5 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")


Epoch 0, Loss: 1.0945996450214717
Epoch 5, Loss: 0.4086709169270803
Epoch 10, Loss: 0.22889948228547524
Epoch 15, Loss: 0.16505991181800142


In [20]:
predictions = [circuit(weights, x) for x in X_test]
predictions = np.array(predictions)
predictions = (predictions + 1) / 2  #scale to [0, 1]
predicted_labels = (predictions > 0.5).astype(int)


In [21]:
accuracy = accuracy_score(y_test, predicted_labels)
print(f"\nTest set accuracy: {accuracy:.2f}")



Test set accuracy: 0.98
