In [1]:
from __future__ import print_function
import os
import numpy as np
import h5py

In [2]:
#######################################
# Dataset class template 
#######################################
class Qubit_Readout_Dataset(object):
    
    def __init__(self, file_path, csr, sr):
        with h5py.File(file_path, 'r') as f:
            g_data = np.array(f['g_data'])[0]
            e_data = np.array(f['e_data'])[0]

        adc_g_1 = g_data[:,0,:] 
        adc_g_2 = g_data[:,1,:]
        adc_e_1 = e_data[:,0,:]
        adc_e_2 = e_data[:,1,:]

        I_g = adc_g_1[:,csr]
        Q_g = adc_g_2[:,csr] 
        I_e = adc_e_1[:,csr] 
        Q_e = adc_e_2[:,csr] 

        self.data = np.zeros((adc_g_1.shape[0]*2,sr,2))
        self.data[0:adc_g_1.shape[0],:,0] = I_g
        self.data[0:adc_g_1.shape[0],:,1] = Q_g
        self.data[adc_g_1.shape[0]:adc_g_1.shape[0]*2,:,0] = I_e
        self.data[adc_g_1.shape[0]:adc_g_1.shape[0]*2,:,1] = Q_e

        self.labels = np.zeros(I_e.shape[0]*2)
        self.labels[I_e.shape[0]:I_e.shape[0]*2] = 1
        self.data = self.data.reshape(len(self.data), sr*2)
       
    def __len__(self):
        return self.labels.shape[0]

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]
    
    def __add__(self, other):
        self.data = np.concatenate((self.data, other.data), 0)
        self.labels = np.concatenate((self.labels, other.labels), 0)
        return self

In [3]:
### Load and prepare the data ###
def process_data(start_window, end_window, data_dir, num_files, train_test_split=0.9, verbose=True):
    csr = range(start_window, end_window)
    sr = len(csr)
    dataset = None

    #######################################
    # Load dataset 
    #######################################
    for i in range(num_files):  # Loop over files
        filename = os.path.join(data_dir, f'{str(i).zfill(5)}_IQ_RAW_ADC_training.h5')
        if verbose == True:
            print('Processing', filename)
        if dataset:
            dataset += Qubit_Readout_Dataset(filename, csr, sr)
        else: 
            dataset = Qubit_Readout_Dataset(filename, csr, sr)

    #######################################
    # Partition into train-test split 
    #######################################
    all_indices = np.arange(len(dataset)) # Indices 

    train_indices = all_indices[:int(train_test_split*len(all_indices))]
    test_indices = all_indices[int(train_test_split*len(all_indices)):]

    X_train, y_train = dataset[train_indices]
    X_test, y_test = dataset[test_indices]
    return X_train, y_train, X_test, y_test

In [4]:
START_WIDOW = 100
END_WINDOW = 150
DATA_DIR = '../data/malab_05272024/raw/'
SAVE_DIR = '../data/malab_05272024/npz/'
NUM_H5_FILES = 100

In [54]:
# start processing h5 files  
X_train, y_train, X_test, y_test = process_data(
    data_dir=DATA_DIR,
    start_window=START_WIDOW,
    end_window=END_WINDOW,
    num_files=NUM_H5_FILES
)

# save to disk
np.save(os.path.join(SAVE_DIR, f'X_train_{START_WIDOW}_{END_WINDOW}.npy'), X_train)
np.save(os.path.join(SAVE_DIR, f'y_train.npy'), y_train)
np.save(os.path.join(SAVE_DIR, f'X_test_{START_WIDOW}_{END_WINDOW}.npy'), X_test)
np.save(os.path.join(SAVE_DIR, f'y_test.npy'), y_test)


Processing ../data/malab_05272024/raw/00000_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00001_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00002_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00003_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00004_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00005_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00006_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00007_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00008_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00009_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00010_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00011_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00012_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00013_IQ_RAW_ADC_training.h5
Processing ../data/malab_05272024/raw/00014_IQ_RAW_ADC_trainin

In [5]:
end_windwows = list(range(150, 751, 50))   # [150, 200, 250... 750]
start_windows = [100] * len(end_windwows)  # [100, 100, 100... 100]

X_train = np.load(os.path.join(SAVE_DIR, 'X_train_0_770.npy'))
y_train = np.load(os.path.join(SAVE_DIR, 'y_train.npy'))
X_test = np.load(os.path.join(SAVE_DIR, 'X_test_0_770.npy'))
y_test = np.load(os.path.join(SAVE_DIR, 'y_test.npy'))

for start, end in zip(start_windows, end_windwows):
    print(f'Processing window: [{start}, {end}]')
    x_train_window = X_train[:,start*2:end*2]
    x_test_window = X_test[:,start*2:end*2]

    # save to disk
    np.save(os.path.join(SAVE_DIR, f'X_train_{start}_{end}.npy'), x_train_window)
    np.save(os.path.join(SAVE_DIR, f'X_test_{start}_{end}.npy'), x_test_window)


Processing window: [100, 150]
Processing window: [100, 200]
Processing window: [100, 250]
Processing window: [100, 300]
Processing window: [100, 350]
Processing window: [100, 400]
Processing window: [100, 450]
Processing window: [100, 500]
Processing window: [100, 550]
Processing window: [100, 600]
Processing window: [100, 650]
Processing window: [100, 700]
Processing window: [100, 750]


In [None]:
"""
end_windwows = list(range(150, 751, 50))   # [150, 200, 250... 750]
start_windows = [100] * len(end_windwows)  # [100, 100, 100... 100]

for start, end in zip(start_windows, end_windwows):
    # start processing h5 files  
    X_train, y_train, X_test, y_test = process_data(
        data_dir=DATA_DIR,
        start_window=start,
        end_window=end,
        num_files=NUM_H5_FILES,
        verbose=False
    )

    # save to disk
    np.save(os.path.join(SAVE_DIR, f'X_train_{START_WIDOW}_{END_WINDOW}.npy'), X_train)
    np.save(os.path.join(SAVE_DIR, f'y_train_{START_WIDOW}_{END_WINDOW}.npy'), y_train)
    np.save(os.path.join(SAVE_DIR, f'X_test_{START_WIDOW}_{END_WINDOW}.npy'), X_test)
    np.save(os.path.join(SAVE_DIR, f'y_test_{START_WIDOW}_{END_WINDOW}.npy'), y_test)
"""