# Layerwise learning for Quantum Neural Nets

In [1]:
import random
import collections
import matplotlib.pyplot as plt

# Pennylane
import pennylane as qml
from pennylane import numpy as np

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
torch.cuda.is_available()

  return torch._C._cuda_getDeviceCount() > 0


False

### Parameters

In [3]:
n_qubits = 4
n_layer_steps = 5
n_layers_to_add = 2
batch_size = 128
epochs = 20

### Data pre-processing

In [4]:
data_transforms = transforms.Compose([transforms.Resize(2), #resize to a 2x2 image
                                      transforms.ToTensor(), #convert to tensor
                                      transforms.Lambda(lambda x: torch.flatten(x)) #obtain a one dimensional vector
                                     ])

In [5]:
train_set = datasets.MNIST(root='./data', train=True, download=True, transform=data_transforms)
test_set = datasets.MNIST(root='./data', train=False, download=True, transform=data_transforms)

# Change labels of digits '3' and '6' to be 0 and 1, respectively.
train_set.targets[train_set.targets == 1] = 10
train_set.targets[train_set.targets == 0] = 10
train_set.targets[train_set.targets == 3] = 0
train_set.targets[train_set.targets == 6] = 1

test_set.targets[test_set.targets == 1] = 10
test_set.targets[test_set.targets == 0] = 10
test_set.targets[test_set.targets == 3] = 0
test_set.targets[test_set.targets == 6] = 1

# Filter to just images of '3's and '6's
subset_indices_train = ((train_set.targets == 0) + (train_set.targets == 1)).nonzero().view(-1)
subset_indices_test = ((test_set.targets == 0) + (test_set.targets == 1)).nonzero().view(-1)

print(len(subset_indices_test))

# Select just a subset of the training set
NUM_EXAMPLES = 128
subset_indices_train = subset_indices_train[:NUM_EXAMPLES]

# DataLoaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=False,
                                          sampler=SubsetRandomSampler(subset_indices_train))
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False,
                                         sampler=SubsetRandomSampler(subset_indices_test))

1968


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
  app.launch_new_instance()


In [6]:
for x, y in train_loader:
    print(x)
    print(y)

tensor([[0.1686, 0.1608, 0.2157, 0.2275],
        [0.0863, 0.1098, 0.1176, 0.1490],
        [0.1373, 0.1569, 0.2118, 0.2196],
        [0.1216, 0.1529, 0.1647, 0.1490],
        [0.0980, 0.2235, 0.1961, 0.1569],
        [0.1882, 0.1490, 0.1451, 0.2235],
        [0.1765, 0.1529, 0.1882, 0.2157],
        [0.1294, 0.1412, 0.1451, 0.1961],
        [0.1804, 0.2588, 0.2706, 0.2431],
        [0.1373, 0.2196, 0.2471, 0.1922],
        [0.1137, 0.2118, 0.2000, 0.1412],
        [0.2588, 0.2706, 0.2706, 0.3608],
        [0.1216, 0.2627, 0.2078, 0.1804],
        [0.1216, 0.1608, 0.1255, 0.1608],
        [0.2196, 0.2471, 0.1725, 0.2353],
        [0.2078, 0.1529, 0.1373, 0.2196],
        [0.1608, 0.2941, 0.2275, 0.2235],
        [0.1294, 0.2510, 0.2314, 0.1529],
        [0.1765, 0.1412, 0.1020, 0.2314],
        [0.1059, 0.2353, 0.1804, 0.1569],
        [0.2196, 0.3529, 0.2784, 0.3020],
        [0.1529, 0.2588, 0.2235, 0.1569],
        [0.1294, 0.1176, 0.1294, 0.2118],
        [0.1412, 0.1569, 0.1725, 0

### Utility functions

In [7]:
def set_random_gates(n_qubits):
    """Utility function for creating a list
    of random gates chosen from gate_set.
    
    The returned list has a length of n_qubits.
    
    Arguments:
        n_qubits (int): Integer number indicating
            the number of qubits of the quantum
            circuit.
            
    Returns:
        chosen_gates (list): List of length equal
            to n_qubits containing RX, RY and RZ
            rotations randomly chosen.
    """
    
    gate_set = [qml.RX, qml.RY, qml.RZ]
    chosen_gates = []
    for i in range(n_qubits):
        chosen_gate = random.choice(gate_set)
        chosen_gates.append(chosen_gate)
    return chosen_gates

def total_elements(array_list):
    """Utility function that returns the total number
    of elements in a list of lists.
    
    Arguments:
        array_list (list[list]): List of lists.
    
    Returns:
        (int): Total number of elements in array_list.
    """

    flattened = [val for sublist in array_list for val in sublist]
    return len(flattened)

### Variational circuit

We first define some quantum layers that will compose the quantum circuit.

In [8]:
dev = qml.device("default.qubit", wires=n_qubits)

## Phase I

In [9]:
def apply_layer(gates, weights):
    """Function to apply the layer composed of
    of RX, RY and RZ to each qubit in the circuit
    (just one gate per qubit, randomly chosen) with
    their respective parameters. Then, apply CZ gates
    in a ladder structure.
    
    Arguments:
        gates: List of single qubit gates to apply in
            the circuit. Length equal to the number
            of qubits of the circuit.
        
        weights: List of parameters to apply in each
            gate from gates. Length equal to the 
            number of qubits of the circuit.
        
    Returns:
        None
    """
    
    # Apply single qubit gates with their weights
    for i in range(n_qubits): 
        gates[i](weights[i], wires = i)
    
    # Create the tuples with the ladder structure
    tuples = [(i,i+1) for i in range(n_qubits-1)]

    # Apply CZ gates to each pair of qubits in tuples.
    for tup in tuples:
        qml.CZ(wires=[tup[0], tup[1]])
        
#Function for non-trainable part of the quantum circuit
def frozen_layers(frozen_layer_gates, frozen_layer_weights):
    """Function that applies multiple layers to the quantum
    circuit. The main purpose of this function is to use it
    for applying the layers already trained during Phase I of
    layerwise learning.
    
    Arguments:
        frozen_layer_gates: List of lists containing the qubit
            rotations per layer to apply to the circuit.
            List of "shape" (number layers, number qubits).
        
        frozen_layer_weights: List of lists containing the
            parameters (angles) to each rotation in
            frozenlayer_gates. List of "shape" (number layers, number qubits).
    
    Returns:
        None
    """

    for i in range(len(frozen_layer_gates)):
        apply_layer(frozen_layer_gates[i], frozen_layer_weights[i])

@qml.qnode(dev, interface="torch")
def quantum_net(inputs, new_weights):
    """Quantum network to train during Phase I of
    layerwise learning. The data inputs are encoded
    using an Angle Embedding with X rotations. Then, 
    we apply the non-trainable layers or frozen layers
    using the two lists called layer_gates and layer_weights
    that store the randomly selected single qubit rotations
    and their trained weights in previous steps of layerwise
    learning. Finally, n_layers_to_add is an integer number that
    indicates the number of trainable layers to add in
    each step of Phase I.
    
    Arguments:
        inputs: Tensor data.
        new_weights: New paramters to be train of shape
            (n_layers_to_add, n_qubits).
            
    Returns:
        (float): Expectation value of an Z measurement in the
            last qubit of the circuit.
    """
    
    wirelist = [i for i in range(n_qubits)]

    # Encode the data with Angle Embedding
    qml.templates.AngleEmbedding(inputs, wires=wirelist, rotation='X')
    
    # Apply frozen layers
    frozen_layers(layer_gates, layer_weights)
    
    # Apply layers with trainable parameters
    for i in range(n_layers_to_add):
        apply_layer(new_gates[i], new_weights[i])
        
    # Expectation value of the last qubit
    return qml.expval(qml.PauliZ(n_qubits-1))

In [10]:
# Lists to update the new gates and trained weights
layer_gates = []
layer_weights = []

# Sigmoid function and Binary Cross Entropy loss
sigmoid = nn.Sigmoid()
loss = nn.BCELoss()

for step in range(n_layer_steps):
    
    print(f"Phase I step: {step+1}")
    
    # Obtain random gates for each new layer.
    new_gates = [set_random_gates(n_qubits) for i in range(n_layers_to_add)]
    
    # Define shape of the weights
    weight_shapes = {"new_weights": (n_layers_to_add, n_qubits)}
    
    # Quantum net as a TorchLayer
    qlayer = qml.qnn.TorchLayer(quantum_net, weight_shapes, init_method = nn.init.zeros_)
    
    # Create Sequential Model
    model = torch.nn.Sequential(qlayer, sigmoid)
    
    # Optimizer
    opt = optim.Adam(model.parameters(), lr=0.01)
    
    batches = NUM_EXAMPLES // batch_size
    for epoch in range(epochs):
        running_loss = 0
        for x, y in train_loader:
            opt.zero_grad()
            y = y.to(torch.float32)
            loss_evaluated = loss(model(x), y)
            loss_evaluated.backward()
            running_loss += loss_evaluated

            opt.step()
        avg_loss = running_loss / batches
        print("Average loss over epoch {}: {:.4f}".format(epoch + 1, avg_loss))
    
    # Extract weights after optimization to be save in layer_weights
    for param in model.parameters():
        new_weights = param.data
    new_weights = new_weights.tolist()
    print(f"Trained parameters: {total_elements(new_weights)}")

    layer_gates += new_gates
    layer_weights += new_weights
    print(f"Layer weights: {total_elements(layer_weights)}")
    print(f"Number of layers: {len(layer_gates)}")
    print("")

Phase I step: 1
Average loss over epoch 1: 0.8246
Average loss over epoch 2: 0.8246
Average loss over epoch 3: 0.8246
Average loss over epoch 4: 0.8246
Average loss over epoch 5: 0.8246
Average loss over epoch 6: 0.8246
Average loss over epoch 7: 0.8246
Average loss over epoch 8: 0.8246
Average loss over epoch 9: 0.8246
Average loss over epoch 10: 0.8246
Average loss over epoch 11: 0.8246
Average loss over epoch 12: 0.8246
Average loss over epoch 13: 0.8246
Average loss over epoch 14: 0.8246
Average loss over epoch 15: 0.8246
Average loss over epoch 16: 0.8246
Average loss over epoch 17: 0.8246
Average loss over epoch 18: 0.8246
Average loss over epoch 19: 0.8246
Average loss over epoch 20: 0.8246
Trained parameters: 8
Layer weights: 8
Number of layers: 2

Phase I step: 2
Average loss over epoch 1: 0.8246
Average loss over epoch 2: 0.8241
Average loss over epoch 3: 0.8237
Average loss over epoch 4: 0.8232
Average loss over epoch 5: 0.8227
Average loss over epoch 6: 0.8221
Average loss 

## Phase II

In [11]:
# Define partition of the circuit to train in each step.
# Here we train the circuit by half.
partition_percentage = 0.5
partition_size = int(n_layer_steps*n_layers_to_add*partition_percentage)
n_partition_weights = partition_size*n_qubits
n_sweeps = 2

In [12]:
@qml.qnode(dev, interface="torch")
def train_partition(inputs, partition_weights):
    """Qnode defined to train just a partition of 
    the quantum circuit after Phase I. This function
    supports just a partition in two pieces of the
    circuit. If partition == 1 is going to treat as
    trainable the first portion of the circuit and if
    partition == 2, the second portion is going to be
    trainable.
    
    Arguments:
        inputs: Tensor data.
        partition_weights: Partition of the weights to be
            trained. Shape (len(partition_weights, n_qubits).
    
    Returns:
        (float): Expectation value of an Z measurement in the
            last qubit of the circuit.
    """
    
    wirelist = [i for i in range(n_qubits)]

    #Encode the data with Angle Embedding
    qml.templates.AngleEmbedding(inputs, wires=wirelist, rotation='X')
    
    if partition == 1:
        # Apply trainable partition first
        for i in range(len(layer_gates[:partition_size])):
            apply_layer(layer_gates[:partition_size][i], partition_weights[i])
        
        #Apply non-trainable partition
        for i in range(len(layer_gates[partition_size:])):
            apply_layer(layer_gates[partition_size:][i], layer_weights[partition_size:][i])
    
    elif partition == 2:
        # Apply non-trainable partition first
        for i in range(len(layer_gates[:partition_size])):
            apply_layer(layer_gates[:partition_size][i], layer_weights[:partition_size][i])
        
        # Apply trainable partition
        for i in range(len(layer_gates[partition_size:])):
            apply_layer(layer_gates[partition_size:][i], partition_weights[i])
    
    # Expectation value of the last qubit
    return qml.expval(qml.PauliZ(n_qubits-1))

In [13]:
for sweep in range(n_sweeps):
    
    partition = 1
    print(f"Sweep: {sweep+1}, partition: {partition}")
    # First half to the weights to train
    trainable_weights = layer_weights[:partition_size]
    
    # Define shape of the weights
    weight_shapes = {"partition_weights": (len(trainable_weights), n_qubits)}
    
    # Quantum net as a TorchLayer
    qlayer = qml.qnn.TorchLayer(train_partition, weight_shapes, init_method = nn.init.zeros_)
    
    init_weights = nn.Parameter(torch.tensor(trainable_weights))
    
    # Create Sequential Model
    model = torch.nn.Sequential(qlayer, sigmoid)
    
    # Edit model initial parameters to be init_weights
    old_params = {}
    for name, params in model.named_parameters():
        old_params[name] = params.clone()
    
    old_params["0.partition_weights"] = init_weights
    
    for name, params in model.named_parameters():
        params.data.copy_(old_params[name])

    # Optimizer
    opt = optim.Adam(model.parameters(), lr=0.01)
    
    batches = NUM_EXAMPLES // batch_size
    for epoch in range(epochs):
        running_loss = 0
        for x, y in train_loader:
            opt.zero_grad()
            y = y.to(torch.float32)
            loss_evaluated = loss(model(x), y)
            loss_evaluated.backward()
            running_loss += loss_evaluated

            opt.step()
        avg_loss = running_loss / batches
        print("Average loss over epoch {}: {:.4f}".format(epoch + 1, avg_loss))
    
    for param in model.parameters():
        trained_weights = param.data
    trained_weights = trained_weights.tolist()
    print(f"Trained parameters: {total_elements(trained_weights)}")

    layer_weights[:partition_size] = trained_weights
    
    partition = 2
    print(f"Sweep: {sweep+1}, partition: {partition}")
    # Second half of the weights to train
    trainable_weights = layer_weights[partition_size:]
    
    # Define shape of the weights
    weight_shapes = {"partition_weights": (len(trainable_weights), n_qubits)}

    # Quantum net as a TorchLayer
    qlayer = qml.qnn.TorchLayer(train_partition, weight_shapes, init_method = nn.init.zeros_)
    
    init_weights = nn.Parameter(torch.tensor(trainable_weights))

    # Create Sequential Model
    model = torch.nn.Sequential(qlayer, sigmoid)
    
    # Edit model initial parameters to be init_weights
    old_params = {}
    for name, params in model.named_parameters():
        old_params[name] = params.clone()
    
    old_params["0.partition_weights"] = init_weights
    
    for name, params in model.named_parameters():
        params.data.copy_(old_params[name])
    
    # Optimizer
    opt = optim.Adam(model.parameters(), lr=0.01)
    
    batches = NUM_EXAMPLES // batch_size
    for epoch in range(epochs):
        running_loss = 0
        for x, y in train_loader:
            opt.zero_grad()
            y = y.to(torch.float32)
            loss_evaluated = loss(model(x), y)
            loss_evaluated.backward()
            running_loss += loss_evaluated

            opt.step()
        avg_loss = running_loss / batches
        print("Average loss over epoch {}: {:.4f}".format(epoch + 1, avg_loss))
    
    for param in model.parameters():
        trained_weights = param.data
    trained_weights = trained_weights.tolist()
    print(f"Trained parameters: {total_elements(trained_weights)}")

    layer_weights[partition_size:] = trained_weights

Sweep: 1, partition: 1
Average loss over epoch 1: 0.7900
Average loss over epoch 2: 0.7888
Average loss over epoch 3: 0.7876
Average loss over epoch 4: 0.7865
Average loss over epoch 5: 0.7853
Average loss over epoch 6: 0.7840
Average loss over epoch 7: 0.7828
Average loss over epoch 8: 0.7816
Average loss over epoch 9: 0.7803
Average loss over epoch 10: 0.7791
Average loss over epoch 11: 0.7778
Average loss over epoch 12: 0.7766
Average loss over epoch 13: 0.7753
Average loss over epoch 14: 0.7740
Average loss over epoch 15: 0.7727
Average loss over epoch 16: 0.7715
Average loss over epoch 17: 0.7702
Average loss over epoch 18: 0.7689
Average loss over epoch 19: 0.7676
Average loss over epoch 20: 0.7662
Trained parameters: 20
Sweep: 1, partition: 2
Average loss over epoch 1: 0.7649
Average loss over epoch 2: 0.7636
Average loss over epoch 3: 0.7623
Average loss over epoch 4: 0.7611
Average loss over epoch 5: 0.7598
Average loss over epoch 6: 0.7585
Average loss over epoch 7: 0.7572
Av