## 2000 bit QPU classification

**This notebook contains the code for inputting data from training sets, applying preprocessing methods from modules, and running data through GB and MLP classifiers**

In [10]:
#imports

from sklearn.model_selection import train_test_split
from modules.classification_model_skeletons import *
from modules.data_preprocessing import *


In [11]:
df = load_data_into_df('data/IBM_Superposition_QRNG_100qubit.txt', ['quantum', 'QPU'])

In [12]:
df_testing = filter_by_label(df, ['binary_number', 'QPU'])

# run below line if classifying by QPU, otherwise comment out 
# (first 40000 lines are QRNG data, next 40000 lines are PRNG data)
df_testing = df_testing[:40000]

df_testing

Unnamed: 0,binary_number,QPU
0,0001011100100011110100100001101111000011010101...,ibm_brisbane
1,0110101010000001110100000100111101101111110111...,ibm_brisbane
2,1001001010000000001111010010111000100011110000...,ibm_brisbane
3,0101100101110010001001011000011010111011101101...,ibm_brisbane
4,1000101001000000111110011000010111010011111110...,ibm_brisbane
...,...,...
39995,0010010110101100011000101110101101010110000011...,ibm_sherbrooke
39996,1101001111000011101100001011011000010001100011...,ibm_sherbrooke
39997,0010000101001010011001111001100100011011100000...,ibm_sherbrooke
39998,0010110101100001001111101000111100010111111000...,ibm_sherbrooke


In [13]:
# For testing with different input lengths (base length = 100)

df_testing_concatenated = concatenate_data(df_testing, 20)
df_testing_concatenated

Unnamed: 0,Concatenated_Data,QPU
0,0001011100100011110100100001101111000011010101...,ibm_brisbane
1,0001100011000111101001010110010011110111010100...,ibm_brisbane
2,1011011110011000000100011001001101111111001010...,ibm_brisbane
3,1101101001011000010101100110110001001110010111...,ibm_brisbane
4,1101011100111101100101100110110110011101111111...,ibm_brisbane
...,...,...
1995,0010100110010001101000010011011110101100001001...,ibm_sherbrooke
1996,0111100111000001111001000110000010011110110000...,ibm_sherbrooke
1997,1011100111111111101101100111010001100111100001...,ibm_sherbrooke
1998,0111010110101000010000101100001000111100001001...,ibm_sherbrooke


In [14]:
# Applying preprocessing feature extraction tests:
tests = ['counts']
'''tests = ['counts', 'runs', 'unq_subsq', 'longest_run']'''

# See modules directory for function documentation
df_testing_features = apply_individual_qubit_functions(df_testing_concatenated, tests, 100)
df_testing_features

  df[f'{function}_qb_{qb}'] = qubitRes
  df[f'{function}_qb_{qb}'] = qubitRes


Unnamed: 0,Concatenated_Data,QPU,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,0001011100100011110100100001101111000011010101...,ibm_brisbane,11,12,10,7,9,10,6,8,...,9,12,10,11,12,8,9,6,8,12
1,0001100011000111101001010110010011110111010100...,ibm_brisbane,7,13,12,9,11,13,14,10,...,11,9,8,8,11,10,6,6,9,14
2,1011011110011000000100011001001101111111001010...,ibm_brisbane,9,9,9,9,8,15,9,7,...,11,10,9,8,16,7,12,8,7,10
3,1101101001011000010101100110110001001110010111...,ibm_brisbane,7,9,10,9,8,14,9,10,...,6,11,12,12,8,11,7,12,9,9
4,1101011100111101100101100110110110011101111111...,ibm_brisbane,9,9,12,13,9,9,9,14,...,9,9,9,10,12,13,9,14,9,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0010100110010001101000010011011110101100001001...,ibm_sherbrooke,13,10,6,9,9,13,13,5,...,11,9,14,12,14,8,7,9,8,12
1996,0111100111000001111001000110000010011110110000...,ibm_sherbrooke,8,10,13,8,5,14,10,12,...,14,7,8,11,14,8,11,9,12,13
1997,1011100111111111101101100111010001100111100001...,ibm_sherbrooke,9,9,10,13,11,14,11,9,...,11,9,13,15,13,10,11,11,12,11
1998,0111010110101000010000101100001000111100001001...,ibm_sherbrooke,16,11,9,12,10,11,6,8,...,10,14,11,11,8,8,14,7,9,11


In [15]:
# For counting individual bits as features: if just using frequency 
# as the only feature, comment out to avoid redundancy: 

'''
df_testing_features = make_bit_features(df_testing_concatenated)
#df_testing_features
'''

df_testing_features = df_testing_features.drop(columns='Concatenated_Data')
df_testing_features

Unnamed: 0,QPU,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,counts_qb_8,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,ibm_brisbane,11,12,10,7,9,10,6,8,12,...,9,12,10,11,12,8,9,6,8,12
1,ibm_brisbane,7,13,12,9,11,13,14,10,12,...,11,9,8,8,11,10,6,6,9,14
2,ibm_brisbane,9,9,9,9,8,15,9,7,10,...,11,10,9,8,16,7,12,8,7,10
3,ibm_brisbane,7,9,10,9,8,14,9,10,13,...,6,11,12,12,8,11,7,12,9,9
4,ibm_brisbane,9,9,12,13,9,9,9,14,12,...,9,9,9,10,12,13,9,14,9,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,ibm_sherbrooke,13,10,6,9,9,13,13,5,8,...,11,9,14,12,14,8,7,9,8,12
1996,ibm_sherbrooke,8,10,13,8,5,14,10,12,12,...,14,7,8,11,14,8,11,9,12,13
1997,ibm_sherbrooke,9,9,10,13,11,14,11,9,8,...,11,9,13,15,13,10,11,11,12,11
1998,ibm_sherbrooke,16,11,9,12,10,11,6,8,9,...,10,14,11,11,8,8,14,7,9,11


In [16]:
# Splitting intro training and testing data: 80% and 20% split

X = df_testing_features.drop(columns=df_testing_features.columns[0]).values
y = df_testing_features[df_testing_features.columns[0]].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### GB model

In [17]:
accuracy = sk_learn_gradient_boosting(X_train, y_train, X_test, y_test)
print('GB accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
GB accuracy:  0.9425


### Basic MLP model 
**nodes in hidden layer = size of input layer (for consistency)**

In [18]:
num_features = df_testing_features.shape[1] - 1
accuracy = sk_learn_MLP(X_train, y_train, X_test, y_test, num_features)
print('MLP accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
MLP accuracy:  0.955
