In [1]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import NesterovMomentumOptimizer
import math

import pandas as pd
import numpy as np
import pennylane as qml
from pennylane import numpy as qnp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

KeyboardInterrupt: 

# Utilities

In [None]:
np.random.seed(42) 
qnp.random.seed(42)

In [None]:
def plot_training_results(hist_no, hist_yes, cm_no, cm_yes, village_name, figsize_1=(12, 4), figsize_2=(10, 4)):

    fig, ax = plt.subplots(1, 2, figsize=figsize_1)

    ax[0].plot(hist_no['train_acc'], label='Train (No Re-up)', linestyle='--')
    ax[0].plot(hist_no['test_acc'], label='Test (No Re-up)')
    ax[0].plot(hist_yes['train_acc'], label='Train (With Re-up)', linestyle='--')
    ax[0].plot(hist_yes['test_acc'], label='Test (With Re-up)')
    ax[0].set_title(f'Accuracy: {village_name}')
    ax[0].set_xlabel('Epoch')
    ax[0].legend()

    ax[1].plot(hist_no['loss'], label='Loss (No Re-up)')
    ax[1].plot(hist_yes['loss'], label='Loss (With Re-up)')
    ax[1].set_title(f'Loss: {village_name}')
    ax[1].set_xlabel('Epoch')
    ax[1].legend()

    plt.tight_layout()
    plt.show()

    plt.figure(figsize=figsize_2)

    plt.subplot(1, 2, 1)
    sns.heatmap(cm_no, annot=True, fmt='d', cmap='Blues')
    plt.title("CM: No Re-upload")

    plt.subplot(1, 2, 2)
    sns.heatmap(cm_yes, annot=True, fmt='d', cmap='Greens')
    plt.title("CM: With Re-upload")

    plt.tight_layout()
    plt.show()


### Data Loading

 We load three separate CSV files. Each dataset represents a different village and contains 500 entries.

In [None]:
gluehwein = 'gluehweindorf'
krampus = 'krampuskogel'
lebkuchen = 'lebkuchenstadt'

villages = [gluehwein, krampus, lebkuchen]
datasets = {}

for village in villages:
        datasets[village] = pd.read_csv(f'{village}.csv')

There are no null values in the dataset and 500 entries

In [None]:
datasets[gluehwein].info()

In [None]:
datasets[krampus].info()

In [None]:
datasets[lebkuchen].info()

### Feature Scaling

To ensure our features fit effectively into these gates, we scale the data to the range [-pi, pi] using MinMaxScaler.

In [None]:
for name in (gluehwein, krampus, lebkuchen):
    print(f'{name}-carol_singing: min: {datasets[name]['carol_singing'].min()}, max: {datasets[name]['carol_singing'].max()}')
    print(f'{name}-snowball_energy: min: {datasets[name]['snowball_energy'].min()}, max: {datasets[name]['snowball_energy'].max()}')
    print()

In [None]:
scaler = MinMaxScaler(feature_range=(-np.pi, np.pi))

for name in (gluehwein, krampus, lebkuchen):
    for col in ('carol_singing', 'snowball_energy'):
        data = datasets[name][col].values.reshape(-1, 1)
        
        # Fit and transform the data
        datasets[name][col] = scaler.fit_transform(data).flatten()

In [None]:
for name in (gluehwein, krampus, lebkuchen):
    print(f'{name}-carol_singing: min: {datasets[name]['carol_singing'].min()}, max: {datasets[name]['carol_singing'].max()}')
    print(f'{name}-snowball_energy: min: {datasets[name]['snowball_energy'].min()}, max: {datasets[name]['snowball_energy'].max()}')
    print()

Label values

In [None]:
for name in (gluehwein, krampus, lebkuchen):
    print(f'{name}-labels:  {datasets[gluehwein]['label'].unique()}')


In [None]:
X_train = {}
y_train = {}

for name in (gluehwein, krampus, lebkuchen):
    X_train[name] = datasets[name][['carol_singing', 'snowball_energy']].values
    y_train[name] = datasets[name]['label'].values

# Encoding

We just use angle encoding, because it works best in previous tests.

In [None]:
def angle_encoding_reuploading(inputs, wires):
    for i in wires:
        qml.RX(inputs[0], wires=i)
        qml.RY(inputs[1], wires=i)

# Entanglement

## Linear Entanglement

In [None]:
def linear_entanglement(wires):
    for i in range(len(wires) - 1):
        qml.CNOT(wires=[wires[i], wires[i + 1]])

## Circular Entanglement

In [None]:
def circular_entanglement(wires):
    linear_entanglement(wires)
    qml.CNOT(wires=[wires[-1], wires[0]])


## Full Entanglement

In [None]:
def full_entanglement(wires):
    for i in range(len(wires)):
        for j in range(i + 1, len(wires)):
            qml.CNOT(wires=[wires[i], wires[j]])


# QML Circuit

The final circuit structure follows a repeating pattern of encoding (which is Re-uploading if done mutliple times), entageling and rotations (where the weights need to be learned). At the end we measure.

In [None]:
def make_qml_circuit(dev):
    @qml.qnode(dev)
    def qml_circuit(weights, inputs, encoding_fn, entanglement_fn, reuploading_count):
        num_qubits = len(dev.wires)
        
        for j in range(reuploading_count):
            # 1. Encoding (Re-uploading)
            encoding_fn(inputs, wires=range(num_qubits))
            
            # 2. Entanglement
            entanglement_fn(wires=range(num_qubits))
            
            # 3. Trainable Layer
            for i in range(num_qubits):
                qml.Rot(weights[j, i, 0], weights[j, i, 1], weights[j, i, 2], wires=i)

        return qml.expval(qml.PauliZ(0))
    
    return qml_circuit

# Cost

We use Binary Cross-Entropy Loss.

In [None]:
def cost(weights, params):
    X, Y, circuit, encoding_fn, entang_fn, reupload_cnt = params
    predictions = qnp.array([
        (circuit(weights, x, encoding_fn, entang_fn, reupload_cnt) * 0.5 + 0.5)
        for x in X
    ])
    eps = 1e-7
    return -qnp.mean(Y * qnp.log(predictions + eps) + (1 - Y) * qnp.log(1 - predictions + eps))

# Predicting

Functions to convert the quantum expectation value [-1, 1] to labels in [0,1]. 

In [None]:
def predict_proba(weights, X, qml_circuit, encoding_fn, entanglement_fn, reuploading_count):
    return qnp.array([
        (qml_circuit(weights, x, encoding_fn, entanglement_fn, reuploading_count) + 1) / 2
        for x in X
    ])


def predict(weights, X, circuit, encoding_fn, entang_fn, reupload_cnt):
    return qnp.array([circuit(weights, x, encoding_fn, entang_fn, reupload_cnt) for x in X]) > 0

def accuracy(y_true, y_pred):
    return qnp.mean(y_true == y_pred)


# Confusion matrix

A manual implementation to evaluate true positives, true negatives, and errors.

In [None]:
def confusion_matrix(y_true, y_pred):
    tp = qnp.sum((y_true == 1) & (y_pred == 1))
    tn = qnp.sum((y_true == 0) & (y_pred == 0))
    fp = qnp.sum((y_true == 0) & (y_pred == 1))
    fn = qnp.sum((y_true == 1) & (y_pred == 0))

    return qnp.array([[tn, fp],
                      [fn, tp]])

# Evaluation

For each village, we compare two scenarios:
- No Re-upload: A simple single-layer quantum circuit.
- With Re-upload: A deeper circuit where data is re-entered multiple times. The amount of reupload varies.

In [None]:
def train_and_evaluate(reupload, epochs, village_name, learning_rate, num_qubits):
    count = reupload
    
    dev = qml.device("default.qubit", wires=num_qubits)
    circuit = make_qml_circuit(dev)
    
    # Data Split
    X_v = qnp.array(X_train[village_name], requires_grad=False)
    Y_v = qnp.array(y_train[village_name], requires_grad=False)
    X_tr, X_te, Y_tr, Y_te = train_test_split(X_v, Y_v, test_size=0.2, random_state=12317240)

    weights = qnp.array(
        np.random.uniform(0, 2 * np.pi, size=(count, num_qubits, 3)),
        requires_grad=True
    )
    
    opt = qml.AdamOptimizer(stepsize=learning_rate)
    
    history = {'loss': [], 'train_acc': [], 'test_acc': []}
    
    print(f"Starting Training: {village_name} | Re-upload: {reupload}")
    
    for epoch in range(epochs):
        params = [X_tr, Y_tr, circuit, angle_encoding_reuploading, circular_entanglement, count]
        weights, current_cost = opt.step_and_cost(cost, weights, params=params)
        
        # Predictions for metrics
        pred_tr = predict(weights, X_tr, circuit, angle_encoding_reuploading, circular_entanglement, count)
        pred_te = predict(weights, X_te, circuit, angle_encoding_reuploading, circular_entanglement, count)
        
        train_acc = qnp.mean(pred_tr == Y_tr)
        test_acc = qnp.mean(pred_te == Y_te)
        
        history['loss'].append(current_cost)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc)
        
        print(f"Epoch {epoch+1:3d} | Cost: {current_cost:.4f} | Accuracy train: {train_acc:.3f} | Accuracy test: {test_acc:.3f}")

    # Final confusion matrix
    final_preds = predict(weights, X_te, circuit, angle_encoding_reuploading, circular_entanglement, count)
    cm = confusion_matrix(Y_te, final_preds)
    
    return history, cm

## Gluehweindorf

In [None]:
village_name = gluehwein 
num_qubits = 2
learning_rate = 0.1


# Run both versions
hist_no, cm_no = train_and_evaluate(reupload=1, epochs = 30, village_name=village_name, learning_rate=learning_rate, num_qubits=num_qubits)
hist_yes, cm_yes = train_and_evaluate(reupload=5, epochs = 30, village_name=village_name, learning_rate=learning_rate, num_qubits=num_qubits)

In [None]:
plot_training_results(hist_no, hist_yes, cm_no, cm_yes, village_name="Gl√ºhweindorf")

## Krampus

In [None]:
village_name = krampus 
num_qubits = 2
learning_rate = 0.1

hist_no_2, cm_no_2 = train_and_evaluate(reupload=1, epochs = 30, village_name=village_name, learning_rate=learning_rate, num_qubits=num_qubits)
hist_yes_2, cm_yes_2 =  train_and_evaluate(reupload=10, epochs = 30, village_name=village_name, learning_rate=learning_rate, num_qubits=num_qubits)


In [None]:
plot_training_results(hist_no_2, hist_yes_2, cm_no_2, cm_yes_2, village_name="Krampuskogel")

## Lebkuchen

In [None]:
village_name = lebkuchen 
num_qubits = 2
learning_rate = 0.1

hist_no_3, cm_no_3 = train_and_evaluate(reupload=1, epochs = 30, village_name=village_name, learning_rate=learning_rate, num_qubits=num_qubits)
hist_yes_3, cm_yes_3 =  train_and_evaluate(reupload=3, epochs = 30, village_name=village_name, learning_rate=learning_rate, num_qubits=num_qubits)

In [None]:
plot_training_results(hist_no_3, hist_yes_3, cm_no_3, cm_yes_3, village_name="Lebkuchenstadt")