# ML4FG Interim Report HQNN Code
### By: Austin Stiefelmaier 11/11/23

## Imports and Settings

In [None]:
# Inspired by quantum ML code from:
# https://pennylane.ai/qml/demos/tutorial_quantum_transfer_learning.html
# And classical ML code from:
# https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

In [1]:
# General imports
import os
import copy
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.utils.data import TensorDataset

# Pennylane imports
import pennylane as qml
from pennylane import numpy as np

In [2]:
# Fix random num generation for reproducibility 
torch.manual_seed(7)
np.random.seed(7)
# os.environ["OMP_NUM_THREADS"] = "1"
# If GPU available, set to run on it
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


## Load Data

In [3]:
# Data download
df_x = pd.read_csv('/home/as6734/ml4fg_class_project/TCGA-PANCAN-HiSeq-801x20531/data.csv')
df_y = pd.read_csv('/home/as6734/ml4fg_class_project/TCGA-PANCAN-HiSeq-801x20531/labels.csv')

In [4]:
df_x.head()

Unnamed: 0.1,Unnamed: 0,gene_0,gene_1,gene_2,gene_3,gene_4,gene_5,gene_6,gene_7,gene_8,...,gene_20521,gene_20522,gene_20523,gene_20524,gene_20525,gene_20526,gene_20527,gene_20528,gene_20529,gene_20530
0,sample_0,0.0,2.017209,3.265527,5.478487,10.431999,0.0,7.175175,0.591871,0.0,...,4.926711,8.210257,9.723516,7.22003,9.119813,12.003135,9.650743,8.921326,5.286759,0.0
1,sample_1,0.0,0.592732,1.588421,7.586157,9.623011,0.0,6.816049,0.0,0.0,...,4.593372,7.323865,9.740931,6.256586,8.381612,12.674552,10.517059,9.397854,2.094168,0.0
2,sample_2,0.0,3.511759,4.327199,6.881787,9.87073,0.0,6.97213,0.452595,0.0,...,5.125213,8.127123,10.90864,5.401607,9.911597,9.045255,9.788359,10.09047,1.683023,0.0
3,sample_3,0.0,3.663618,4.507649,6.659068,10.196184,0.0,7.843375,0.434882,0.0,...,6.076566,8.792959,10.14152,8.942805,9.601208,11.392682,9.694814,9.684365,3.292001,0.0
4,sample_4,0.0,2.655741,2.821547,6.539454,9.738265,0.0,6.566967,0.360982,0.0,...,5.996032,8.891425,10.37379,7.181162,9.84691,11.922439,9.217749,9.461191,5.110372,0.0


In [5]:
df_y.head()

Unnamed: 0.1,Unnamed: 0,Class
0,sample_0,PRAD
1,sample_1,LUAD
2,sample_2,PRAD
3,sample_3,PRAD
4,sample_4,BRCA


In [6]:
# Clean data

# Drop first column that only notes sample number (which can be reconstructed from index if need be
df_x.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
df_y.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
# Remove columns with all 0.0 values
df_x = df_x.loc[:, (df_x != 0).any(axis=0)]

In [7]:
# Pre-process data

# Normalize values by mean
df_x=(df_x-df_x.mean())/df_x.std()
# Encode classes
enc = OneHotEncoder(handle_unknown='ignore', sparse_output=False).fit(df_y.values)
y = enc.transform(df_y.values)

In [8]:
df_x.head()

Unnamed: 0,gene_0,gene_1,gene_2,gene_3,gene_4,gene_6,gene_7,gene_8,gene_9,gene_10,...,gene_20521,gene_20522,gene_20523,gene_20524,gene_20525,gene_20526,gene_20527,gene_20528,gene_20529,gene_20530
0,-0.194678,-0.827513,0.159701,-1.947061,1.220812,-0.207838,0.180797,-0.125297,-0.065592,-0.082063,...,-1.299388,-0.921179,-0.87729,0.995625,-1.165344,0.389198,-0.869023,-1.187196,-0.11641,-0.261738
1,-0.194678,-2.013759,-1.414158,1.352264,-0.376283,-0.53189,-0.982474,-0.125297,-0.065592,-0.586397,...,-1.745985,-2.39072,-0.831373,0.59128,-2.548006,1.390759,0.623162,-0.342063,-1.655854,-0.261738
2,-0.194678,0.417087,1.156013,0.249651,0.112761,-0.391053,-0.092937,-0.125297,-0.065592,-0.586397,...,-1.033442,-1.059008,2.247399,0.232456,0.317682,-4.023107,-0.631986,0.886307,-1.854106,-0.261738
3,-0.194678,0.543549,1.325354,-0.098991,0.755269,0.395101,-0.127752,-0.125297,-0.065592,-0.586397,...,0.241148,0.044877,0.224815,1.718651,-0.263682,-0.521421,-0.793113,0.16607,-1.078268,-0.261738
4,-0.194678,-0.29577,-0.256947,-0.286234,-0.14875,-0.756645,-0.272995,-0.125297,-0.065592,-0.586397,...,0.133251,0.208122,0.837216,0.979312,0.196522,0.268824,-1.614832,-0.229734,-0.201463,-0.261738


In [9]:
y[0:5]

array([[0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.]])

In [11]:
# Split data, final percentages are approximately 64% train, 16% validation, 20% test
x_train, x_test, y_train, y_test = train_test_split(df_x.values, y, test_size=0.2, train_size=0.8, random_state=7, stratify=y)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, train_size=0.8, random_state=7, stratify=y_train)
dataset_sizes = {'train': len(x_train), 'validation': len(x_valid), 'test': len(x_test)}
print(dataset_sizes)

In [13]:
# Convert to DataLoaders
x_train_to_tensor = torch.from_numpy(x_train).to(torch.float32)
y_train_to_tensor = torch.from_numpy(y_train).to(torch.int) 
x_valid_to_tensor = torch.from_numpy(x_valid).to(torch.float32)
y_valid_to_tensor = torch.from_numpy(y_valid).to(torch.int)
x_test_to_tensor = torch.from_numpy(x_test).to(torch.float32)
y_test_to_tensor = torch.from_numpy(y_test).to(torch.int)

# Second step: Creating TensorDataset for Dataloader
train_set = TensorDataset(x_train_to_tensor, y_train_to_tensor)
valid_set = TensorDataset(x_valid_to_tensor, y_valid_to_tensor)
test_set = TensorDataset(x_test_to_tensor, y_test_to_tensor)

# Create DataLoader
dataloaders = {
    'train': torch.utils.data.DataLoader(train_set, batch_size=8, shuffle=True),
    'validation': torch.utils.data.DataLoader(valid_set, batch_size=8),
    'test': torch.utils.data.DataLoader(test_set, shuffle=True, batch_size=1)
}

## Quantum Circuit Architecture

In [14]:
# Set model architecture/training params
qubit_count = 5
step = 0.0004  # Initial learning rate
batch_size = 50
num_epochs = 10
circ_repeats = 6  # How many times to repeat RY and CNOT gates
gamma_lr_scheduler = 0.1  # Learning rate decay param
q_delta = 0.01  # Param for quantum circuit weight initialization

# Run quantum circuit on Pennylane default simulator
dev = qml.device('default.qubit', wires=qubit_count)

In [15]:
# Helper functions to construct quantum circuit
def ry_gates(w):
    # Apply rotation gate RY w/ given weights
    for i, weight in enumerate(w):
        qml.RY(weight, wires=i)

# Adds alternating CNOT layer to entangle qubits
def entangling_layer(nqubits):
    for i in range(0, nqubits - 1, 2):  # Evens
        qml.CNOT(wires=[i, i + 1])
    for i in range(1, nqubits - 1, 2):  # Odds
        qml.CNOT(wires=[i, i + 1])

In [16]:
# Function to construct quantum circuit to plug into PyTorch
@qml.qnode(dev, interface='torch')
def quantum_circuit(inputs, q_weights_flat):
    # Reshape weights
    q_weights = q_weights_flat.reshape(circ_repeats, qubit_count)
    # Initialize w/ H gates so orthogonal to computational basis
    # This helps start w/out bias towards 0 or 1 states
    for i in range(qubit_count):
        qml.Hadamard(wires=i)
    # Take given inputs and apply to quantum circuit as first weights
    ry_gates(inputs)
    # Repeat CNOT and RY gates to add more weights to train and
    # CNOT "convolutions" (really entangles)
    for k in range(circ_repeats):
        entangling_layer(qubit_count)
        ry_gates(q_weights[k])
    # Use Pennylane sim to get expected value after applying Z gate
    # which returns to standard computational basis, this is layer output
    expected_vals = [qml.expval(qml.PauliZ(entry)) for entry in range(qubit_count)]
    return tuple(expected_vals)

In [17]:
# Example visualization codes with nonsense weights
# Helps to see what the quantum circuit looks like
q_params_print = nn.Parameter(q_delta * torch.randn(circ_repeats * qubit_count))
input_test = torch.randn(1,512)
pre_net_test = nn.Linear(512, qubit_count)
pre_out_test = pre_net_test(input_test)
q_in_test = torch.tanh(pre_out_test) * np.pi / 2.0
for elem in q_in_test:
    print(qml.draw(quantum_circuit)(elem, q_params_print))

0: ──H──RY(-0.15)─╭●──RY(-0.01)────────────╭●──RY(0.00)────────────╭●──RY(-0.01)────────────╭●
1: ──H──RY(0.69)──╰X─╭●──────────RY(0.00)──╰X─╭●─────────RY(-0.01)─╰X─╭●──────────RY(-0.00)─╰X
2: ──H──RY(0.21)──╭●─╰X──────────RY(0.01)──╭●─╰X─────────RY(-0.01)─╭●─╰X──────────RY(0.01)──╭●
3: ──H──RY(-0.84)─╰X─╭●──────────RY(-0.01)─╰X─╭●─────────RY(0.01)──╰X─╭●──────────RY(0.01)──╰X
4: ──H──RY(-0.98)────╰X──────────RY(-0.00)────╰X─────────RY(-0.01)────╰X──────────RY(-0.01)───

───RY(0.01)────────────╭●──RY(0.01)────────────╭●──RY(0.01)────────────┤  <Z>
──╭●─────────RY(-0.00)─╰X─╭●─────────RY(-0.01)─╰X─╭●─────────RY(0.02)──┤  <Z>
──╰X─────────RY(-0.00)─╭●─╰X─────────RY(0.01)──╭●─╰X─────────RY(-0.01)─┤  <Z>
──╭●─────────RY(0.00)──╰X─╭●─────────RY(0.01)──╰X─╭●─────────RY(-0.00)─┤  <Z>
──╰X─────────RY(0.02)─────╰X─────────RY(-0.01)────╰X─────────RY(-0.00)─┤  <Z>


## HQNN Architecture

In [None]:
# HQNN Model Class
class HybridQuantumNet(nn.Module):
    # Initialize layers
    def __init__(self):
        super().__init__()
        # Layers before quantum circuit
        self.pre_quant = nn.Linear(512, qubit_count)
        # Quantum Circuit params
        self.q_params = nn.Parameter(q_delta * torch.randn(circ_repeats * qubit_count))
        # Layer(s) after quantum circuit
        self.post_quant = nn.Linear(qubit_count, 2)

    # Forward pass procedure
    def forward(self, input_features):
        # Map 512 output to qubit_count output
        pre_out = self.pre_quant(input_features)
        # Convert to radians for use as RY rotation gate params
        q_in = torch.tanh(pre_out) * np.pi / 2.0
        # Apply quantum circuit to each batch image, set to run on GPU
        q_out = torch.Tensor(0, qubit_count)
        q_out = q_out.to(device)
        for elem in q_in:
            q_out_elem = quantum_circuit(elem, self.q_params).float().unsqueeze(0)
            q_out = torch.cat((q_out, q_out_elem))
        # Map quantum circuit output using linear layer to classes
        return self.post_quant(q_out)

In [None]:
# Instantiate model
model_hybrid = HybridQuantumNet()
# Make sure set to use GPU
model_hybrid = model_hybrid.to(device)
# Show model architecture summary
model_hybrid

## Train HQNN

In [None]:
# Set up loss, optimizer, and learning rate decay manager
loss_func = nn.CrossEntropyLoss()
optimizer_hybrid = optim.Adam(model_hybrid.fc.parameters(), lr=step)
exp_lr_scheduler = lr_scheduler.StepLR(
    optimizer_hybrid, step_size=10, gamma=gamma_lr_scheduler
)

In [None]:
# Function to train model(s)
def train(model, loss_func, optimizer, scheduler, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = 100000.0
    best_acc_train = 0.0
    best_loss_train = 100000.0
    print('Training started:')
    for epoch in tqdm(range(num_epochs)):
        for phase in ['train', 'validation']:
            if phase == 'train':
                # Set model to train mode
                model.train()
            else:
                # Set model to eval mode
                model.eval()
            current_loss = 0.0
            current_corrects = 0
            n_batches = dataset_sizes[phase] // batch_size
            iter = 0
            for X, Y in dataloaders[phase]:
                batch_len = len(X)
                X = X.to(device)
                X = X.to(device)
                optimizer.zero_grad()  # Reset gradients
                # If in train mode, get loss, step optimizer
                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(X)
                    _, preds = torch.max(outputs, 1)
                    loss = loss_func(outputs, Y)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                # Print iteration results
                current_loss += loss.item() * batch_len
                batch_corrects = torch.sum(preds == X.data).item()
                current_corrects += batch_corrects
                print('Phase: {} Epoch: {}/{} Iter: {}/{}'.format(phase, epoch+1, num_epochs, iter+1, n_batches+1),
                    end="\r",
                    flush=True)
                iter += 1

            # Get epoch stats and print
            epoch_loss = current_loss / dataset_sizes[phase]
            epoch_acc = current_corrects / dataset_sizes[phase]
            print('Phase: {} Epoch: {}/{} Loss: {:.4f} Acc: {:.4f}        '.format(
                    'train' if phase == 'train' else 'validation  ',
                    epoch + 1,
                    num_epochs,
                    epoch_loss,
                    epoch_acc,
                )
            )
            # Update best var

In [None]:
model_hybrid = train(model_hybrid, loss_func, optimizer_hybrid, exp_lr_scheduler, num_epochs=num_epochs)

In [None]:
# Save model weights locally in case of GCP crash
torch.save(model_hybrid.state_dict(), './weights/hybrid_10epochs.pt')

## Test HQNN

In [None]:
# Load in saved weights, put in model, and than set to eval mode
best_weights = torch.load('./weights/hybrid_10epochs.pt')
model_hybrid.load_state_dict(best_weights)
model_hybrid.eval()

In [None]:
# Tests model (if epsilon=0, standard test set images)
# Otherwise perturbs using FGSM attack
# Also stores example images where the perturbation caused a mislabel
def test(model, device, test_loader, epsilon):
    # Accuracy counter
    correct = 0
    adv_examples = []
    # Loop over all examples in test set
    for data, target in tqdm(test_loader):
        # Send the data and label to GPU
        data, target = data.to(device), target.to(device)
        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True
        # Forward pass the data through the model
        output = model(data)
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        # Don't care to attack if already wrong
        if init_pred.item() != target.item():
            continue
        # Get loss backprop for gradient values
        loss = F.nll_loss(output, target)
        model.zero_grad()
        loss.backward()
        # Collect gradients
        data_grad = data.grad.data
        # Call FGSM function
        perturbed_data = fgsm_attack(data, epsilon, data_grad)
        # Re-classify the perturbed image
        output = model(perturbed_data)
        # Check for success
        final_pred = output.max(1, keepdim=True)[1]
        if final_pred.item() == target.item():
            correct += 1
            # Special case for saving 0 epsilon examples
            if epsilon == 0 and len(adv_examples) < 5:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
        else:
            # Save some adv examples for visualization later
            if len(adv_examples) < 5:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
    # Get accuracy metric
    final_acc = correct / float(dataset_sizes['test'])
    print('Epsilon: {}\tTest Accuracy = {} / {} = {}'.format(epsilon, correct, len(test_loader), final_acc))
    # Return the accuracy and an adversarial example
    return final_acc, adv_examples