## 4000 bit QRNG vs PRNG classification

**This notebook contains the code for inputting data from training sets, applying preprocessing methods from modules, and running data through GB and MLP classifiers**

In [82]:
#imports

from sklearn.model_selection import train_test_split
from modules.classification_model_skeletons import *
from modules.data_preprocessing import *


In [83]:
df = load_data_into_df('data/IBM_Superposition_QRNG_100qubit.txt', ['quantum', 'QPU'])

In [84]:
df_testing = filter_by_label(df, ['binary_number', 'quantum'])

# run below line if classifying by QPU, otherwise comment out 
# (first 40000 lines are QRNG data, next 40000 lines are PRNG data)
# df_testing = df_testing[:40000]

df_testing

Unnamed: 0,binary_number,quantum
0,0001011100100011110100100001101111000011010101...,quantum
1,0110101010000001110100000100111101101111110111...,quantum
2,1001001010000000001111010010111000100011110000...,quantum
3,0101100101110010001001011000011010111011101101...,quantum
4,1000101001000000111110011000010111010011111110...,quantum
...,...,...
79995,0011111011100101010101000110001001011000010001...,non-quantum
79996,0001011001010010110001010111010010011110100000...,non-quantum
79997,1000001100011000110001111000010100100110111000...,non-quantum
79998,0101101010110011010011110011100011010000001101...,non-quantum


In [85]:
# For testing with different input lengths (base length = 100)

df_testing_concatenated = concatenate_data(df_testing, 40)
df_testing_concatenated

Unnamed: 0,Concatenated_Data,quantum
0,0001011100100011110100100001101111000011010101...,quantum
1,1011011110011000000100011001001101111111001010...,quantum
2,1101011100111101100101100110110110011101111111...,quantum
3,0011101101111000011110110101111100100100110000...,quantum
4,1000100100100000111111010101111011110001111010...,quantum
...,...,...
1995,0001011011001010100011010111101110101011101111...,non-quantum
1996,0010100010101001111011110110101010101101000010...,non-quantum
1997,1010000111010100000001100101011010010101101001...,non-quantum
1998,0010100011100111111101010111111111011010010011...,non-quantum


In [86]:
# Applying preprocessing feature extraction tests:
tests = ['counts']
'''tests = ['counts', 'runs', 'unq_subsq', 'longest_run']'''

# See modules directory for function documentation
df_testing_features = apply_individual_qubit_functions(df_testing_concatenated, tests, 100)
df_testing_features

  df[f'{function}_qb_{qb}'] = qubitRes
  df[f'{function}_qb_{qb}'] = qubitRes


Unnamed: 0,Concatenated_Data,quantum,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,0001011100100011110100100001101111000011010101...,quantum,18,25,22,16,20,23,20,18,...,20,21,18,19,23,18,15,12,17,26
1,1011011110011000000100011001001101111111001010...,quantum,16,18,19,18,16,29,18,17,...,17,21,21,20,24,18,19,20,16,19
2,1101011100111101100101100110110110011101111111...,quantum,20,19,24,22,19,17,21,26,...,16,20,19,20,22,24,18,23,22,23
3,0011101101111000011110110101111100100100110000...,quantum,25,14,19,12,14,18,21,23,...,20,21,24,16,18,17,21,24,22,18
4,1000100100100000111111010101111011110001111010...,quantum,21,21,18,7,22,22,21,17,...,18,22,14,17,24,26,14,17,16,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0001011011001010100011010111101110101011101111...,non-quantum,25,20,24,18,23,20,16,22,...,18,16,18,17,22,17,17,25,22,17
1996,0010100010101001111011110110101010101101000010...,non-quantum,16,22,18,21,17,24,19,24,...,20,18,24,21,17,18,21,21,19,20
1997,1010000111010100000001100101011010010101101001...,non-quantum,23,22,22,12,22,27,18,22,...,26,23,17,18,27,23,18,17,19,16
1998,0010100011100111111101010111111111011010010011...,non-quantum,21,19,23,21,20,19,18,24,...,20,19,11,25,14,18,20,22,22,20


In [87]:
# For counting individual bits as features: if just using frequency 
# as the only feature, comment out to avoid redundancy: 

'''
df_testing_features = make_bit_features(df_testing_concatenated)
#df_testing_features
'''

df_testing_features = df_testing_features.drop(columns='Concatenated_Data')
df_testing_features

Unnamed: 0,quantum,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,counts_qb_8,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,quantum,18,25,22,16,20,23,20,18,24,...,20,21,18,19,23,18,15,12,17,26
1,quantum,16,18,19,18,16,29,18,17,23,...,17,21,21,20,24,18,19,20,16,19
2,quantum,20,19,24,22,19,17,21,26,24,...,16,20,19,20,22,24,18,23,22,23
3,quantum,25,14,19,12,14,18,21,23,19,...,20,21,24,16,18,17,21,24,22,18
4,quantum,21,21,18,7,22,22,21,17,11,...,18,22,14,17,24,26,14,17,16,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,non-quantum,25,20,24,18,23,20,16,22,24,...,18,16,18,17,22,17,17,25,22,17
1996,non-quantum,16,22,18,21,17,24,19,24,24,...,20,18,24,21,17,18,21,21,19,20
1997,non-quantum,23,22,22,12,22,27,18,22,22,...,26,23,17,18,27,23,18,17,19,16
1998,non-quantum,21,19,23,21,20,19,18,24,21,...,20,19,11,25,14,18,20,22,22,20


In [88]:
# Splitting intro training and testing data: 80% and 20% split

X = df_testing_features.drop(columns=df_testing_features.columns[0]).values
y = df_testing_features[df_testing_features.columns[0]].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### GB model

In [89]:
accuracy = sk_learn_gradient_boosting(X_train, y_train, X_test, y_test)
print('GB accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
GB accuracy:  0.935


### Basic MLP model 
**nodes in hidden layer = size of input layer (for consistency)**

In [90]:
num_features = df_testing_features.shape[1] - 1
accuracy = sk_learn_MLP(X_train, y_train, X_test, y_test, num_features)
print('MLP accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
MLP accuracy:  0.8925
