## 4000 bit QPU classification

**This notebook contains the code for inputting data from training sets, applying preprocessing methods from modules, and running data through GB and MLP classifiers**

In [1]:
#imports

from sklearn.model_selection import train_test_split
from modules.classification_model_skeletons import *
from modules.data_preprocessing import *


In [2]:
df = load_data_into_df('data/IBM_Superposition_QRNG_100qubit.txt', ['quantum', 'QPU'])

In [3]:
df_testing = filter_by_label(df, ['binary_number', 'QPU'])

# run below line if classifying by QPU, otherwise comment out 
# (first 40000 lines are QRNG data, next 40000 lines are PRNG data)
df_testing = df_testing[:40000]

df_testing

Unnamed: 0,binary_number,QPU
0,0001011100100011110100100001101111000011010101...,ibm_brisbane
1,0110101010000001110100000100111101101111110111...,ibm_brisbane
2,1001001010000000001111010010111000100011110000...,ibm_brisbane
3,0101100101110010001001011000011010111011101101...,ibm_brisbane
4,1000101001000000111110011000010111010011111110...,ibm_brisbane
...,...,...
39995,0010010110101100011000101110101101010110000011...,ibm_sherbrooke
39996,1101001111000011101100001011011000010001100011...,ibm_sherbrooke
39997,0010000101001010011001111001100100011011100000...,ibm_sherbrooke
39998,0010110101100001001111101000111100010111111000...,ibm_sherbrooke


In [4]:
# For testing with different input lengths (base length = 100)

df_testing_concatenated = concatenate_data(df_testing, 40)
df_testing_concatenated

Unnamed: 0,Concatenated_Data,QPU
0,0001011100100011110100100001101111000011010101...,ibm_brisbane
1,1011011110011000000100011001001101111111001010...,ibm_brisbane
2,1101011100111101100101100110110110011101111111...,ibm_brisbane
3,0011101101111000011110110101111100100100110000...,ibm_brisbane
4,1000100100100000111111010101111011110001111010...,ibm_brisbane
...,...,...
995,1011111110111010000101111011111110001110001000...,ibm_sherbrooke
996,1110100101000000111011000110101001111111011011...,ibm_sherbrooke
997,0111000000000011100010000011101001010100101001...,ibm_sherbrooke
998,0111100111000001111001000110000010011110110000...,ibm_sherbrooke


In [5]:
# Applying preprocessing feature extraction tests:
tests = ['counts']
'''tests = ['counts', 'runs', 'unq_subsq', 'longest_run']'''

# See modules directory for function documentation
df_testing_features = apply_individual_qubit_functions(df_testing_concatenated, tests, 100)
df_testing_features

  df[f'{function}_qb_{qb}'] = qubitRes
  df[f'{function}_qb_{qb}'] = qubitRes


Unnamed: 0,Concatenated_Data,QPU,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,0001011100100011110100100001101111000011010101...,ibm_brisbane,18,25,22,16,20,23,20,18,...,20,21,18,19,23,18,15,12,17,26
1,1011011110011000000100011001001101111111001010...,ibm_brisbane,16,18,19,18,16,29,18,17,...,17,21,21,20,24,18,19,20,16,19
2,1101011100111101100101100110110110011101111111...,ibm_brisbane,20,19,24,22,19,17,21,26,...,16,20,19,20,22,24,18,23,22,23
3,0011101101111000011110110101111100100100110000...,ibm_brisbane,25,14,19,12,14,18,21,23,...,20,21,24,16,18,17,21,24,22,18
4,1000100100100000111111010101111011110001111010...,ibm_brisbane,21,21,18,7,22,22,21,17,...,18,22,14,17,24,26,14,17,16,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1011111110111010000101111011111110001110001000...,ibm_sherbrooke,20,21,21,19,21,21,20,23,...,20,16,26,21,21,17,20,13,19,14
996,1110100101000000111011000110101001111111011011...,ibm_sherbrooke,20,15,14,17,18,22,21,19,...,21,22,21,26,20,19,14,21,20,26
997,0111000000000011100010000011101001010100101001...,ibm_sherbrooke,25,21,15,18,19,25,21,14,...,21,21,25,18,26,22,16,17,21,23
998,0111100111000001111001000110000010011110110000...,ibm_sherbrooke,17,19,23,21,16,28,21,21,...,25,16,21,26,27,18,22,20,24,24


In [6]:
# For counting individual bits as features: if just using frequency 
# as the only feature, comment out to avoid redundancy: 

'''
df_testing_features = make_bit_features(df_testing_concatenated)
#df_testing_features
'''

df_testing_features = df_testing_features.drop(columns='Concatenated_Data')
df_testing_features

Unnamed: 0,QPU,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,counts_qb_8,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,ibm_brisbane,18,25,22,16,20,23,20,18,24,...,20,21,18,19,23,18,15,12,17,26
1,ibm_brisbane,16,18,19,18,16,29,18,17,23,...,17,21,21,20,24,18,19,20,16,19
2,ibm_brisbane,20,19,24,22,19,17,21,26,24,...,16,20,19,20,22,24,18,23,22,23
3,ibm_brisbane,25,14,19,12,14,18,21,23,19,...,20,21,24,16,18,17,21,24,22,18
4,ibm_brisbane,21,21,18,7,22,22,21,17,11,...,18,22,14,17,24,26,14,17,16,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,ibm_sherbrooke,20,21,21,19,21,21,20,23,18,...,20,16,26,21,21,17,20,13,19,14
996,ibm_sherbrooke,20,15,14,17,18,22,21,19,18,...,21,22,21,26,20,19,14,21,20,26
997,ibm_sherbrooke,25,21,15,18,19,25,21,14,18,...,21,21,25,18,26,22,16,17,21,23
998,ibm_sherbrooke,17,19,23,21,16,28,21,21,20,...,25,16,21,26,27,18,22,20,24,24


In [7]:
# Splitting intro training and testing data: 80% and 20% split

X = df_testing_features.drop(columns=df_testing_features.columns[0]).values
y = df_testing_features[df_testing_features.columns[0]].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### GB model

In [8]:
accuracy = sk_learn_gradient_boosting(X_train, y_train, X_test, y_test)
print('GB accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
GB accuracy:  0.985


### Basic MLP model 
**nodes in hidden layer = size of input layer (for consistency)**

In [9]:
num_features = df_testing_features.shape[1] - 1
accuracy = sk_learn_MLP(X_train, y_train, X_test, y_test, num_features)
print('MLP accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
MLP accuracy:  0.99
