# ML4FG Final Report HQNN Code
### By: Austin Stiefelmaier 12/14/23

## Imports and Settings

In [None]:
# Inspired by quantum ML code from:
# https://pennylane.ai/qml/demos/tutorial_quantum_transfer_learning.html
# And classical ML code from:
# https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

In [1]:
# General imports
import os
import copy
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.utils.data import TensorDataset

# Pennylane imports
import pennylane as qml
from pennylane import numpy as np

In [2]:
# Fix random num generation for reproducibility 
torch.manual_seed(7)
np.random.seed(7)
# os.environ["OMP_NUM_THREADS"] = "1"
# If GPU available, set to run on it
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device = 'cpu'
print(device)

cpu


## Load Data

In [3]:
# Data download
df_x = pd.read_csv('/home/as6734/ml4fg_class_project/TCGA-PANCAN-HiSeq-801x20531/data.csv')
df_y = pd.read_csv('/home/as6734/ml4fg_class_project/TCGA-PANCAN-HiSeq-801x20531/labels.csv')

In [4]:
# Clean data

# Drop first column that only notes sample number (which can be reconstructed from index if need be
df_x.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
df_y.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
# Remove columns with all 0.0 values
df_x = df_x.loc[:, (df_x != 0).any(axis=0)]

In [5]:
# Pre-process data

# Normalize values by mean
df_x=(df_x-df_x.mean())/df_x.std()
# Encode classes
enc = OneHotEncoder(handle_unknown='ignore', sparse_output=False).fit(df_y.values)
y = enc.transform(df_y.values)

In [6]:
# Split data, final percentages are approximately 64% train, 16% validation, 20% test
x_train, x_test, y_train, y_test = train_test_split(df_x.values, y, test_size=0.2, train_size=0.8, random_state=7, stratify=y)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, train_size=0.8, random_state=7, stratify=y_train)
dataset_sizes = {'train': len(x_train), 'validation': len(x_valid), 'test': len(x_test)}
print(dataset_sizes)

{'train': 512, 'validation': 128, 'test': 161}


In [7]:
# Training data reductions to match percentages noted in future cells, 
# NOTE: all reduced slightly to %8
x_train = x_train[0:int(dataset_sizes['train']*0.8-(dataset_sizes['train']*0.8)%8)]
y_train = y_train[0:int(dataset_sizes['train']*0.8-(dataset_sizes['train']*0.8)%8)]

# x_train = x_train[0:int(dataset_sizes['train']*0.6-(dataset_sizes['train']*0.6)%8)]
# y_train = y_train[0:int(dataset_sizes['train']*0.6-(dataset_sizes['train']*0.6)%8)]

# x_train = x_train[0:int(dataset_sizes['train']*0.4-(dataset_sizes['train']*0.4)%8)]
# y_train = y_train[0:int(dataset_sizes['train']*0.4-(dataset_sizes['train']*0.4)%8)]

# x_train = x_train[0:int(dataset_sizes['train']*0.2-(dataset_sizes['train']*0.2)%8)]
# y_train = y_train[0:int(dataset_sizes['train']*0.2-(dataset_sizes['train']*0.2)%8)]

In [8]:
# Convert to DataLoaders
x_train_to_tensor = torch.from_numpy(x_train).to(torch.float32).to(device)
y_train_to_tensor = torch.from_numpy(y_train).to(torch.float32).to(device)
x_valid_to_tensor = torch.from_numpy(x_valid).to(torch.float32).to(device)
y_valid_to_tensor = torch.from_numpy(y_valid).to(torch.float32).to(device)
x_test_to_tensor = torch.from_numpy(x_test).to(torch.float32).to(device)
y_test_to_tensor = torch.from_numpy(y_test).to(torch.float32).to(device)

# Second step: Creating TensorDataset for Dataloader
train_set = TensorDataset(x_train_to_tensor, y_train_to_tensor)
valid_set = TensorDataset(x_valid_to_tensor, y_valid_to_tensor)
test_set = TensorDataset(x_test_to_tensor, y_test_to_tensor)

# Create DataLoaders
dataloaders = {
    'train': torch.utils.data.DataLoader(train_set, batch_size=8, shuffle=True),
    'validation': torch.utils.data.DataLoader(valid_set, batch_size=8),
    'test': torch.utils.data.DataLoader(test_set, shuffle=True, batch_size=1)
}

In [9]:
# # Data downsampling helper function
# def downsample_train(x, y, percentage):
#     # Get number of datapoints to use in training set
#     total = round(dataset_sizes['train'] * (percentage / 100))
#     # Deepcopy
#     x_train = np.copy(x)
#     y_train = np.copy(y)
#     # Reduce samples
#     x_train = x_train[0:total]
#     y_train = y_train[0:total]
#     x_train_to_tensor = torch.from_numpy(x_train).to(torch.float32).to(device)
#     y_train_to_tensor = torch.from_numpy(y_train).to(torch.float32).to(device)
#     train_set = TensorDataset(x_train_to_tensor, y_train_to_tensor)
#     return torch.utils.data.DataLoader(train_set, batch_size=8, shuffle=True)

## Quantum Circuit Architecture

In [10]:
# Quantum circuit params
qubit_count = 5
circ_repeats = 6  # How many times to repeat RY and CNOT gates
q_delta = 0.01  # Param for quantum circuit weight initialization

# Run quantum circuit on Pennylane default simulator
dev = qml.device('default.qubit', wires=qubit_count)

In [11]:
# Helper functions to construct quantum circuit
def ry_gates(w):
    # Apply rotation gate RY w/ given weights
    for i, weight in enumerate(w):
        qml.RY(weight, wires=i)

# Adds alternating CNOT layer to entangle qubits
def entangling_layer(nqubits):
    for i in range(0, nqubits - 1, 2):  # Evens
        qml.CNOT(wires=[i, i + 1])
    for i in range(1, nqubits - 1, 2):  # Odds
        qml.CNOT(wires=[i, i + 1])

In [12]:
# Function to construct quantum circuit to plug into PyTorch
@qml.qnode(dev, interface='torch')
def quantum_circuit(inputs, q_weights_flat):
    # Reshape weights
    q_weights = q_weights_flat.reshape(circ_repeats, qubit_count)
    # Initialize w/ H gates so orthogonal to computational basis
    # This helps start w/out bias towards 0 or 1 states
    for i in range(qubit_count):
        qml.Hadamard(wires=i)
    # Take given inputs and apply to quantum circuit as first weights
    ry_gates(inputs)
    # Repeat CNOT and RY gates to add more weights to train and
    # CNOT "convolutions" (really entangles)
    for k in range(circ_repeats):
        entangling_layer(qubit_count)
        ry_gates(q_weights[k])
    # Use Pennylane sim to get expected value after applying Z gate
    # which returns to standard computational basis, this is layer output
    expected_vals = [qml.expval(qml.PauliZ(entry)) for entry in range(qubit_count)]
    return tuple(expected_vals)

In [13]:
# Example visualization codes with nonsense weights
# Helps to see what the quantum circuit looks like
q_params_print = nn.Parameter(q_delta * torch.randn(circ_repeats * qubit_count))
input_test = torch.randn(1,512)
pre_net_test = nn.Linear(512, qubit_count)
pre_out_test = pre_net_test(input_test)
q_in_test = torch.tanh(pre_out_test) * np.pi / 2.0
for elem in q_in_test:
    print(qml.draw(quantum_circuit)(elem, q_params_print))

0: ──H──RY(-0.15)─╭●──RY(-0.01)────────────╭●──RY(0.00)────────────╭●──RY(-0.01)────────────╭●
1: ──H──RY(0.69)──╰X─╭●──────────RY(0.00)──╰X─╭●─────────RY(-0.01)─╰X─╭●──────────RY(-0.00)─╰X
2: ──H──RY(0.21)──╭●─╰X──────────RY(0.01)──╭●─╰X─────────RY(-0.01)─╭●─╰X──────────RY(0.01)──╭●
3: ──H──RY(-0.84)─╰X─╭●──────────RY(-0.01)─╰X─╭●─────────RY(0.01)──╰X─╭●──────────RY(0.01)──╰X
4: ──H──RY(-0.98)────╰X──────────RY(-0.00)────╰X─────────RY(-0.01)────╰X──────────RY(-0.01)───

───RY(0.01)────────────╭●──RY(0.01)────────────╭●──RY(0.01)────────────┤  <Z>
──╭●─────────RY(-0.00)─╰X─╭●─────────RY(-0.01)─╰X─╭●─────────RY(0.02)──┤  <Z>
──╰X─────────RY(-0.00)─╭●─╰X─────────RY(0.01)──╭●─╰X─────────RY(-0.01)─┤  <Z>
──╭●─────────RY(0.00)──╰X─╭●─────────RY(0.01)──╰X─╭●─────────RY(-0.00)─┤  <Z>
──╰X─────────RY(0.02)─────╰X─────────RY(-0.01)────╰X─────────RY(-0.00)─┤  <Z>


## HQNN Architecture

In [14]:
# HQNN Model Class
class HybridQuantumNet(nn.Module):
    # Initialize layers
    def __init__(self, num_feature):
        super().__init__()
        # Layers before quantum circuit
        self.layer_1 = nn.Linear(num_feature, 512)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.layer_2 = nn.Linear(512, 128)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.layer_3 = nn.Linear(128, 64)
        self.batchnorm3 = nn.BatchNorm1d(64)
        self.layer_4 = nn.Linear(64, qubit_count)
        # Quantum Circuit params
        self.q_params = nn.Parameter(q_delta * torch.randn(circ_repeats * qubit_count))
        # Layer(s) after quantum circuit
        # self.post_quant = torch.nn.Softmax(dim=qubit_count)
        # self.post_quant = nn.Linear(qubit_count, 5)
        # Misc
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)

    # Forward pass procedure
    def forward(self, x):
        # First layer
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)

        # Second layer
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)

        # Third layer
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        # Map classical 64 output to qubit_count output
        pre_out = self.layer_4(x)
        
        # Convert to radians for use as RY rotation gate params
        q_in = torch.tanh(pre_out) * np.pi / 2.0
        # Apply quantum circuit set to run on GPU
        q_out = torch.Tensor(0, qubit_count)
        q_out = q_out.to(device)
        for elem in q_in:
            q_out_elem = quantum_circuit(elem, self.q_params)
            q_out_elem = torch.stack(list(q_out_elem), dim=0)
            q_out_elem = q_out_elem.float().unsqueeze(0)
            q_out = torch.cat((q_out, q_out_elem))
        # Map quantum circuit output using softmax to classes
        # return self.post_quant(q_out)
        return q_out

In [15]:
# Instantiate model
model_hybrid = HybridQuantumNet(num_feature=len(df_x.columns))
# Make sure set to use GPU
model_hybrid = model_hybrid.to(device)
# Show model architecture summary
print(model_hybrid)

HybridQuantumNet(
  (layer_1): Linear(in_features=20264, out_features=512, bias=True)
  (batchnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_2): Linear(in_features=512, out_features=128, bias=True)
  (batchnorm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_3): Linear(in_features=128, out_features=64, bias=True)
  (batchnorm3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer_4): Linear(in_features=64, out_features=5, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
)


## Train HQNN

In [16]:
step = 0.0004  # Initial learning rate
batch_size = 8
num_epochs = 50
gamma_lr_scheduler = 0.1  # Learning rate decay param

In [17]:
# Set up loss, optimizer, and learning rate decay manager
loss_func = nn.CrossEntropyLoss()
optimizer_hybrid = optim.Adam(model_hybrid.parameters(), lr=step)
exp_lr_scheduler = lr_scheduler.StepLR(
    optimizer_hybrid, step_size=10, gamma=gamma_lr_scheduler
)

In [18]:
# Function to train model(s)
def train(model, loss_func, optimizer, scheduler, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 100000.0
    best_loss_train = 100000.0
    print('Training started:')
    for epoch in tqdm(range(num_epochs)):
        for phase in ['train', 'validation']:
            if phase == 'train':
                # Set model to train mode
                model.train()
            else:
                # Set model to eval mode
                model.eval()
            current_loss = 0.0
            n_batches = dataset_sizes[phase] // batch_size
            iter = 0
            for X, Y in dataloaders[phase]:
                batch_len = len(X)
                X = X.to(device)
                optimizer.zero_grad()  # Reset gradients
                # If in train mode, get loss, step optimizer
                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(X)
                    _, preds = torch.max(outputs, 1)
                    loss = loss_func(outputs, Y)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                # Print iteration results
                current_loss += loss.item() * batch_len
                print('Phase: {} Epoch: {}/{} Iter: {}/{}'.format(phase, epoch+1, num_epochs, iter+1, n_batches+1),
                    end="\r",
                    flush=True)
                iter += 1

            # Get epoch stats and print
            epoch_loss = current_loss / dataset_sizes[phase]
            print('Phase: {} Epoch: {}/{} Loss: {:.4f}        '.format(
                    'train' if phase == 'train' else 'validation  ',
                    epoch + 1,
                    num_epochs,
                    epoch_loss,
                )
            )
            # Update best vars and make copy of best weights
            # if phase == "validation":
            #     best_model_wts = copy.deepcopy(model.state_dict())
            if phase == "validation" and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == "train" and epoch_loss < best_loss_train:
                best_loss_train = epoch_loss
            # Decay learning rate
            if phase == "train":
                scheduler.step()
    # Print final results
    model.load_state_dict(best_model_wts)
    print('Best test loss: {:.4f}'.format(best_loss))
    return model

In [19]:
model = train(model_hybrid, loss_func, optimizer_hybrid, exp_lr_scheduler, num_epochs=num_epochs)
torch.save(model.state_dict(), './weights/updated60_hybrid_50epochs.pt')

Training started:


  0%|          | 0/50 [00:00<?, ?it/s]

Phase: train Epoch: 1/50 Loss: 0.9154        
Phase: validation   Epoch: 1/50 Loss: 0.7488        
Phase: train Epoch: 2/50 Loss: 0.6526        
Phase: validation   Epoch: 2/50 Loss: 0.6211        
Phase: train Epoch: 3/50 Loss: 0.5696        
Phase: validation   Epoch: 3/50 Loss: 0.5339        
Phase: train Epoch: 4/50 Loss: 0.5389        
Phase: validation   Epoch: 4/50 Loss: 0.4921        
Phase: train Epoch: 5/50 Loss: 0.4797        
Phase: validation   Epoch: 5/50 Loss: 0.4730        
Phase: train Epoch: 6/50 Loss: 0.4450        
Phase: validation   Epoch: 6/50 Loss: 0.4585        
Phase: train Epoch: 7/50 Loss: 0.4584        
Phase: validation   Epoch: 7/50 Loss: 0.4635        
Phase: train Epoch: 8/50 Loss: 0.4222        
Phase: validation   Epoch: 8/50 Loss: 0.4525        
Phase: train Epoch: 9/50 Loss: 0.4095        
Phase: validation   Epoch: 9/50 Loss: 0.4437        
Phase: train Epoch: 10/50 Loss: 0.4211        
Phase: validation   Epoch: 10/50 Loss: 0.4472        
Phase: t

In [21]:
preds, targets = test(model_hybrid, 'cpu', dataloaders['test'])
print(accuracy_score(y_true=targets, y_pred=preds))
print(classification_report(y_true=targets, y_pred=preds))

  0%|          | 0/161 [00:00<?, ?it/s]

1.0
              precision    recall  f1-score   support

        BRCA       1.00      1.00      1.00        60
        COAD       1.00      1.00      1.00        16
        KIRC       1.00      1.00      1.00        30
        LUAD       1.00      1.00      1.00        28
        PRAD       1.00      1.00      1.00        27

    accuracy                           1.00       161
   macro avg       1.00      1.00      1.00       161
weighted avg       1.00      1.00      1.00       161



In [22]:
# Modify training set to be fractional amounts and train/save various models
percentages = [10, 20, 40, 60, 80]
# percentages = [1, 5]
for percentage in percentages:
    # Modify training dataloader to only contain a fraction of the original
    dataloaders['train'] = downsample_train(x_train, y_train, percentage)
    # Train model
    model_hybrid = train(model_hybrid, loss_func, optimizer_hybrid, exp_lr_scheduler, num_epochs=num_epochs)
    # Save model weights locally in case of GCP crash
    torch.save(model_hybrid.state_dict(), f'./weights/{percentage}_hybrid_50epochs.pt')

Training started:


  0%|          | 0/50 [00:00<?, ?it/s]

Phase: train Epoch: 1/50 Loss: 0.0088        
Phase: validation   Epoch: 1/50 Loss: 0.7548        
Phase: train Epoch: 2/50 Loss: 0.0093        
Phase: validation   Epoch: 2/50 Loss: 0.7611        
Phase: train Epoch: 3/50 Loss: 0.0096        
Phase: validation   Epoch: 3/50 Loss: 0.7622        
Phase: train Epoch: 4/50 Loss: 0.0083        
Phase: validation   Epoch: 4/50 Loss: 0.7665        
Phase: train Epoch: 5/50 Loss: 0.0080        
Phase: validation   Epoch: 5/50 Loss: 0.7725        
Phase: train Epoch: 6/50 Loss: 0.0089        
Phase: validation   Epoch: 6/50 Loss: 0.7783        
Phase: train Epoch: 7/50 Loss: 0.0084        
Phase: validation   Epoch: 7/50 Loss: 0.7837        
Phase: train Epoch: 8/50 Loss: 0.0071        
Phase: validation   Epoch: 8/50 Loss: 0.7864        
Phase: train Epoch: 9/50 Loss: 0.0083        
Phase: validation   Epoch: 9/50 Loss: 0.7905        
Phase: train Epoch: 10/50 Loss: 0.0075        
Phase: validation   Epoch: 10/50 Loss: 0.7954        
Phase: t

  0%|          | 0/50 [00:00<?, ?it/s]

Phase: train Epoch: 1/50 Loss: 0.0404        
Phase: validation   Epoch: 1/50 Loss: 0.8245        
Phase: train Epoch: 2/50 Loss: 0.0431        
Phase: validation   Epoch: 2/50 Loss: 0.8487        
Phase: train Epoch: 3/50 Loss: 0.0399        
Phase: validation   Epoch: 3/50 Loss: 0.8595        
Phase: train Epoch: 4/50 Loss: 0.0376        
Phase: validation   Epoch: 4/50 Loss: 0.8661        
Phase: train Epoch: 5/50 Loss: 0.0422        
Phase: validation   Epoch: 5/50 Loss: 0.8600        
Phase: train Epoch: 6/50 Loss: 0.0379        
Phase: validation   Epoch: 6/50 Loss: 0.8602        
Phase: train Epoch: 7/50 Loss: 0.0434        
Phase: validation   Epoch: 7/50 Loss: 0.8520        
Phase: train Epoch: 8/50 Loss: 0.0427        
Phase: validation   Epoch: 8/50 Loss: 0.8553        
Phase: train Epoch: 9/50 Loss: 0.0421        
Phase: validation   Epoch: 9/50 Loss: 0.8387        
Phase: train Epoch: 10/50 Loss: 0.0423        
Phase: validation   Epoch: 10/50 Loss: 0.8410        
Phase: t

## Test HQNN

In [20]:
# Helper function to test model
def test(model, device, test_loader):
    preds = []
    targets = []
    with torch.no_grad():
        model.eval()
        for data, target in tqdm(test_loader):
            # Send the data and target to device
            data, target = data.to(device), target.to(device)
            output = model(data)
            preds.append(enc.inverse_transform(output)[0])
            targets.append(enc.inverse_transform(target)[0])
    return preds, targets

# Helper function to load in weights and send to test function
def load_and_test(percentage, model_type):
    best_weights = torch.load(f'./weights/{percentage}_{model_type}_50epochs.pt')
    model_hybrid.load_state_dict(best_weights)
    model_hybrid.eval()
    preds, targets = test(model_hybrid, 'cpu', dataloaders['test'])
    print(classification_report(y_true=targets, y_pred=preds))
    return accuracy_score(y_true=targets, y_pred=preds)

In [24]:
# percentages = [10, 20, 40, 60, 80]
percentages = [1, 5]
acc_hybrid = []
for percentage in percentages:
    acc_hybrid.append(load_and_test(percentage, 'hybrid'))
    print()
acc_hybrid

  0%|          | 0/161 [00:00<?, ?it/s]

              precision    recall  f1-score   support

        BRCA       0.98      1.00      0.99        60
        COAD       1.00      0.81      0.90        16
        KIRC       1.00      1.00      1.00        30
        LUAD       0.90      0.93      0.91        28
        PRAD       0.96      1.00      0.98        27

    accuracy                           0.97       161
   macro avg       0.97      0.95      0.96       161
weighted avg       0.97      0.97      0.97       161




  0%|          | 0/161 [00:00<?, ?it/s]

              precision    recall  f1-score   support

        BRCA       0.94      1.00      0.97        60
        COAD       1.00      0.56      0.72        16
        KIRC       0.97      1.00      0.98        30
        LUAD       0.90      0.93      0.91        28
        PRAD       0.96      1.00      0.98        27

    accuracy                           0.94       161
   macro avg       0.95      0.90      0.91       161
weighted avg       0.95      0.94      0.94       161




[0.968944099378882, 0.9440993788819876]