In [1]:
import torch 
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn import svm
import pandas as pd
from IPython.display import display

from __future__ import print_function
import os
import random
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.utils as vutils
import math

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [41]:
def setup_data(dir, num_circuits, file_prefix, dir_with_gates, separate_bits, norm = 'max_norm', pick_file = 'False', unique_circ_id = 'True'):
  onlyfiles = next(os.walk(dir))[2] #dir is your directory path as string
  print(pick_file)

  # Currently, we only count files with frequencies
  if not pick_file:
    print("bsdjbc")
    if dir_with_gates:
      num_files = int(math.ceil((len(onlyfiles) - 1)/3))
    else:
      num_files = len(onlyfiles)
  else:
    num_files = 1
  print(pick_file)
  for i in range(num_files): # Only FC files matter and there is an extra IDs file
    # Give unique indexes to each circuit in each file      
    idx = i*num_circuits
    d = np.load(dir + "/" + file_prefix + str(i) + ".npy", allow_pickle=True)
    if not unique_circ_id:
      circuits = np.concatenate(d[:,0]).reshape(d.shape[0],2)
    else:
      circuits = np.concatenate(d[:,0]).reshape(d.shape[0],2) + np.array([idx,idx+ int(num_circuits/2)]) 

    # Separate out cicruits and freq
    d = d[:,1:]

    # Stack
    if i == 0:
      det_c = np.copy(d)
      circ = np.copy(circuits)
    else:
      det_c = np.vstack((det_c, d))
      circ = np.vstack((circ, circuits))

  # Separate out frequencies by setting on each bit (instead of settings on pairs)
  if separate_bits == "True":
    det_c_copy = np.empty(det_c.shape)

    # 0 on qubit_0
    det_c_copy[:,0] = det_c[:,0] + det_c[:,2]

    # 1 on qubit_0
    det_c_copy[:,1] = det_c[:,1] + det_c[:,3]

    # 0 on qubit_1
    det_c_copy[:,2] = det_c[:,0] + det_c[:,1]

    # 1 on qubit_1
    det_c_copy[:,3] = det_c[:,2] + det_c[:,3]

    det_c = det_c_copy



  # Normalize frequencies
  if norm == "max_norm":
    det_c = det_c/np.max(det_c)
  elif norm == "col_max_norm":
    det_c = det_c/det_c.max(axis = 0)
  elif norm == "row_norm":
    det_c = det_c/np.sum(det_c[0,:])

  # Used only for testing
  elif norm == "no_norm":
    det_c = det_c

  return circ, det_c

def create_labels(label, size):
  return np.full((size,), label)

def create_dataset(features, label):
  y = create_labels(label, features.shape[0])
  return features, y

def combine_datasets(x1, y1, x2, y2):
  return np.vstack((x1,x2)), np.concatenate((y1,y2))

def binary_comparison_data_pipeline(freq_1, freq_2, test_frac = 0.2):
  x1, y1 = create_dataset(freq_1, 1)
  x2, y2 = create_dataset(freq_2, 0)
  x, y = combine_datasets(x1,y1, x2, y2)
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
  return X_train, X_test, y_train, y_test

def svm_expt(freq_1, freq_2, test_size = 0.2):
  X_train, X_test, y_train, y_test = binary_comparison_data_pipeline(freq_1, freq_2, test_frac = test_size)

  clf = svm.SVC()
  clf.fit(X_train, y_train)

  test_acc = (clf.predict(X_test) == y_test).mean()

  return test_acc

# Apply binary expt to all possible combinations of data_list and pretty print
def create_expt_matrix(dataset_list, data_list, binary_expt):
  num_datasets = len(data_list)

  # get names of datasets
  dataset_names = [d['dir_name'] for d in dataset_list]

  expt_matrix = np.zeros((num_datasets, num_datasets))
  for i in range(num_datasets):
    for j in range(num_datasets):
      if (j >= i) :
        expt_matrix[i][j] = binary_expt(data_list[i][1], data_list[j][1])

  # pretty printing
  df = pd.DataFrame(expt_matrix, index=dataset_names, columns=dataset_names)
  display(df)

  return expt_matrix

# Setup a list of data to be experimented on

# dir_prefix - path to directory where all data is stored
# f_prefix - prefix of file names with frequency data
# dir_with_gates - True if data directory has gates included
# sep_bits - if True, setup data such that frequency is calculated on each result bit instead of result pairs
# normalize - Normalization scheme used
def setup_datalist(datasets, dir_prefix, f_prefix, sep_bits, normalize):
  data_list = []
  data_list.append(setup_data(dir_prefix + datasets[0]['dir_name'],  num_circuits = datasets[0]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[0]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[1]['dir_name'],  num_circuits = datasets[1]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[1]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[2]['dir_name'],  num_circuits = datasets[2]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[2]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[3]['dir_name'],  num_circuits = datasets[3]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[3]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[4]['dir_name'],  num_circuits = datasets[4]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[4]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[5]['dir_name'],  num_circuits = datasets[5]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[5]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[6]['dir_name'],  num_circuits = datasets[6]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[6]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[7]['dir_name'],  num_circuits = datasets[7]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[7]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))
  data_list.append(setup_data(dir_prefix + datasets[8]['dir_name'],  num_circuits = datasets[8]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[8]['dir_with_gates'], separate_bits = sep_bits, norm = normalize))

  return data_list

In [4]:
datasets = [
            {
              'dir_name':"IdealGates",
              'num_circuits':20,
              'dir_with_gates':True
            },
            {
              'dir_name':"Operation Crosstalk 1 (a)",
              'num_circuits':20,
              'dir_with_gates':False
            },
            {
              'dir_name':"Operation Crosstalk 2 (b)",
              'num_circuits':20,
              'dir_with_gates':False
            },
            {
              'dir_name':"Detection Crosstalk (c)",
              'num_circuits':40,
              'dir_with_gates':False
            },
            {
              'dir_name':"XT1WithGates",
              'num_circuits':20,
              'dir_with_gates':True
            },
            {
              'dir_name':"XT2WithGates",
              'num_circuits':20,
              'dir_with_gates':True
            },
            {
              'dir_name':"DetX400",
              'num_circuits':40,
              'dir_with_gates':True
            },
            {
              'dir_name':"DetXWithGates",
              'num_circuits':40,
              'dir_with_gates':True
            },
            {
              'dir_name':"DetX100",
              'num_circuits':20,
              'dir_with_gates':True
            }
]

In [7]:
dir_prefix = "gdrive/MyDrive/QuSense/Papers/Simulated-Data-Sarovar/"
f_prefix = "FCTexts_"


In [43]:
sep_bits = False
normalize = "no_norm"
pf = 'False'
ucid = 'True'
ideal_circuits, ideal_freq = setup_data(dir_prefix + datasets[0]['dir_name'],  num_circuits = datasets[0]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[0]['dir_with_gates'], separate_bits = sep_bits, norm = normalize, pick_file = pf, unique_circ_id=ucid)

False
False


In [26]:
ideal_freq.shape

(100, 4)

In [5]:
ideal_circuits, ideal_freq = setup_data(dir_prefix + datasets[0]['dir_name'],  num_circuits = datasets[0]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[0]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[1]['dir_name'],  num_circuits = datasets[1]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[1]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[2]['dir_name'],  num_circuits = datasets[2]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[2]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[3]['dir_name'],  num_circuits = datasets[3]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[3]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[4]['dir_name'],  num_circuits = datasets[4]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[4]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[5]['dir_name'],  num_circuits = datasets[5]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[5]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[6]['dir_name'],  num_circuits = datasets[6]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[6]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[7]['dir_name'],  num_circuits = datasets[7]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[7]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)
setup_data(dir_prefix + datasets[8]['dir_name'],  num_circuits = datasets[8]['num_circuits'], file_prefix = f_prefix, dir_with_gates = datasets[8]['dir_with_gates'], separate_bits = sep_bits, norm = normalize)

TypeError: ignored

In [11]:
def gen_data_2(data, circ, frac):
  data = data * (frac)
  data = data.astype('int')
  ds_size =  np.sum(data)
  ds_size_column = np.cumsum(data, axis = 1)
  arr = np.zeros((ds_size, 4))

  running_sum = 0
  for i in range(circ.shape[0]):
    arr[running_sum :running_sum + ds_size_column[i][0],:] = [circ[i][0],0,circ[i][1],0]
    arr[running_sum + ds_size_column[i][0]:running_sum + ds_size_column[i][1],:] = [circ[i][0],1,circ[i][1],0]
    arr[running_sum + ds_size_column[i][1]:running_sum + ds_size_column[i][2],:] = [circ[i][0],0,circ[i][1],1]
    arr[running_sum + ds_size_column[i][2]:running_sum + ds_size_column[i][3],:] = [circ[i][0],1,circ[i][1],1]
    running_sum += np.sum(data[i])
  np.random.shuffle(arr)
  return arr

In [14]:
a = gen_data_2(ideal_freq[:100], ideal_circuits[:100], 0.01)

In [15]:
a[:100]

array([[ 8.,  0., 17.,  1.],
       [ 7.,  0., 15.,  1.],
       [ 9.,  0., 16.,  1.],
       [ 0.,  1., 16.,  0.],
       [ 9.,  0., 17.,  1.],
       [ 5.,  0., 14.,  1.],
       [ 5.,  0., 16.,  1.],
       [ 4.,  1., 18.,  0.],
       [ 7.,  0., 10.,  1.],
       [ 5.,  1., 14.,  0.],
       [ 2.,  1., 10.,  0.],
       [ 7.,  0., 12.,  0.],
       [ 3.,  0., 12.,  1.],
       [ 4.,  0., 13.,  0.],
       [ 3.,  1., 19.,  1.],
       [ 1.,  1., 11.,  1.],
       [ 7.,  0., 16.,  0.],
       [ 3.,  0., 18.,  1.],
       [ 8.,  1., 15.,  1.],
       [ 3.,  1., 11.,  1.],
       [ 3.,  1., 12.,  1.],
       [ 3.,  1., 19.,  1.],
       [ 3.,  0., 15.,  0.],
       [ 3.,  1., 19.,  1.],
       [ 9.,  0., 15.,  1.],
       [ 4.,  1., 10.,  0.],
       [ 6.,  0., 14.,  0.],
       [ 3.,  1., 16.,  1.],
       [ 7.,  0., 15.,  1.],
       [ 8.,  1., 12.,  1.],
       [ 7.,  0., 10.,  1.],
       [ 5.,  1., 15.,  1.],
       [ 5.,  0., 15.,  1.],
       [ 5.,  1., 17.,  1.],
       [ 9.,  